# Import Required Libraries
Import necessary libraries such as pandas, numpy, tensorflow, and keras.

In [6]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import StandardScaler
import gc
import matplotlib.pyplot as plt

# Load and Preprocess Data
Load the train_data.csv file and preprocess the data, including handling missing values and scaling features.

In [7]:
train_df = pd.read_csv('data/train_data.csv')
train_df.replace(-1.0, np.nan, inplace=True)
train_df.replace('-1', np.nan, inplace=True)

In [8]:
numeric_cols = train_df.select_dtypes(include=[np.number]).columns
numeric_cols

Index(['che_pc_usd', 'che_perc_gdp', 'insurance_perc_che', 'population',
       'prev_perc', 'price_month', 'price_unit', 'public_perc_che', 'target'],
      dtype='object')

In [9]:
train_df.columns

Index(['brand', 'che_pc_usd', 'che_perc_gdp', 'cluster_nl', 'corporation',
       'country', 'launch_date', 'date', 'drug_id', 'ind_launch_date',
       'indication', 'insurance_perc_che', 'population', 'prev_perc',
       'price_month', 'price_unit', 'public_perc_che', 'therapeutic_area',
       'target'],
      dtype='object')

In [10]:
train_df[numeric_cols] = train_df[numeric_cols].fillna(train_df[numeric_cols].median())

In [11]:
date_columns = ['launch_date', 'date', 'ind_launch_date']
for col in date_columns:
    train_df[col] = pd.to_datetime(train_df[col], errors='coerce')

In [12]:
train_df['launch_year'] = train_df['launch_date'].dt.year
train_df['launch_month'] = train_df['launch_date'].dt.month
train_df['date_year'] = train_df['date'].dt.year
train_df['date_month'] = train_df['date'].dt.month

In [13]:
from sklearn.preprocessing import LabelEncoder
label_enc_columns = ['brand', 'corporation', 'country', 'therapeutic_area', 'drug_id']
label_encoders = {}
for col in label_enc_columns:
    le = LabelEncoder()
    train_df[col] = le.fit_transform(train_df[col].astype(str))
    label_encoders[col] = le

In [14]:
train_data = train_df[train_df['date'] < '2022-01-01']
test_data = train_df[train_df['date'] >= '2022-01-01']

In [15]:
X_train = train_df.drop(['target', 'cluster_nl', 'launch_date', 'date', 'ind_launch_date', 'indication'], axis=1)
# X_train = train_data.drop(['target', 'launch_date', 'date', 'ind_launch_date', 'indication'], axis=1)
y_train = train_df['target']

X_test = test_data.drop(['target', 'cluster_nl', 'launch_date', 'date', 'ind_launch_date', 'indication'], axis=1)
# X_test = test_data.drop(['target', 'launch_date', 'date', 'ind_launch_date', 'indication'], axis=1)
y_test = test_data['target']

# Define Model Architecture
Define the architecture of the model using a suitable neural network for time series prediction.

In [21]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class AE_MLP(nn.Module):
    def __init__(self, num_columns, num_labels, hidden_units, dropout_rates):
        super(AE_MLP, self).__init__()
        
        # Initial batch normalization
        self.batch_norm0 = nn.BatchNorm1d(num_columns)
        
        # Encoder
        self.encoder_noise = nn.Dropout(dropout_rates[0])
        self.encoder_dense = nn.Linear(num_columns, hidden_units[0])
        self.encoder_batch_norm = nn.BatchNorm1d(hidden_units[0])
        
        # Decoder
        self.decoder_dropout = nn.Dropout(dropout_rates[1])
        self.decoder_dense = nn.Linear(hidden_units[0], num_columns)
        
        # AE branch
        self.x_ae_dense = nn.Linear(num_columns, hidden_units[1])
        self.x_ae_batch_norm = nn.BatchNorm1d(hidden_units[1])
        self.x_ae_dropout = nn.Dropout(dropout_rates[2])
        self.out_ae_dense = nn.Linear(hidden_units[1], num_labels)
        
        # Concatenation and main branch
        concat_input_dim = num_columns + hidden_units[0]
        self.concat_batch_norm = nn.BatchNorm1d(concat_input_dim)
        self.concat_dropout = nn.Dropout(dropout_rates[3])
        
        # Adjusted hidden layers
        self.hidden_layers = nn.ModuleList()
        input_dim = concat_input_dim  # Start with concatenated dimension
        for i in range(2, len(hidden_units)):
            self.hidden_layers.append(nn.Linear(input_dim, hidden_units[i]))
            self.hidden_layers.append(nn.BatchNorm1d(hidden_units[i]))
            self.hidden_layers.append(nn.Dropout(dropout_rates[i + 2]))
            input_dim = hidden_units[i]  # Update input_dim for next layer
        
        # Output layer
        self.out_dense = nn.Linear(input_dim, num_labels)
        
    def forward(self, x):
        x0 = self.batch_norm0(x)
        
        # Encoder
        encoder = self.encoder_noise(x0)
        encoder = self.encoder_dense(encoder)
        encoder = self.encoder_batch_norm(encoder)
        encoder = F.silu(encoder)
        
        # Decoder
        decoder = self.decoder_dropout(encoder)
        decoder = self.decoder_dense(decoder)
        
        # AE branch
        x_ae = self.x_ae_dense(decoder)
        x_ae = self.x_ae_batch_norm(x_ae)
        x_ae = F.silu(x_ae)
        x_ae = self.x_ae_dropout(x_ae)
        out_ae = torch.sigmoid(self.out_ae_dense(x_ae))
        
        # Main branch
        x_concat = torch.cat([x0, encoder], dim=1)
        x = self.concat_batch_norm(x_concat)
        x = self.concat_dropout(x)
        
        for layer in self.hidden_layers:
            if isinstance(layer, nn.Linear):
                x = layer(x)
                x = F.silu(x)
            else:
                x = layer(x)
                
        out = torch.sigmoid(self.out_dense(x))
        
        return decoder, out_ae, out

# Example usage:
# model = AE_MLP(num_columns, num_labels, hidden_units, dropout_rates)
# optimizer = torch.optim.Adam(model.parameters(), lr=lr)
# criterion_decoder = nn.MSELoss()
# criterion_ae_action = nn.BCEWithLogitsLoss()
# criterion_action = nn.BCEWithLogitsLoss()

In [26]:
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'

# Train the Model
Train the model on the entire dataset without using cross-validation. Save the best model using ModelCheckpoint.

In [28]:
batch_size = 4096
epochs = 50

# Instantiate the model
model = AE_MLP(num_columns=X_train_tensor.shape[1], num_labels=1, hidden_units=[256, 256, 256], dropout_rates=[0.2]*7)
model.to(device)

# Define loss functions and optimizer
criterion_decoder = nn.MSELoss()
criterion_ae_action = nn.BCEWithLogitsLoss()  # Changed to BCEWithLogitsLoss
criterion_action = nn.BCEWithLogitsLoss()     # Changed to BCEWithLogitsLoss
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
train_losses = []
val_losses = []
val_maes = []

for epoch in range(epochs):
    model.train()
    train_loss = 0.0
    for X_batch, y_batch in train_loader:
        # Move data to device
        X_batch = X_batch.to(device)
        y_batch = y_batch.to(device)
        
        optimizer.zero_grad()

        # Forward pass
        decoder_out, out_ae, out = model(X_batch)

        # Compute losses
        loss_decoder = criterion_decoder(decoder_out, X_batch)
        loss_ae_action = criterion_ae_action(out_ae, y_batch)
        loss_action = criterion_action(out, y_batch)

        # Total loss
        loss = loss_decoder + loss_ae_action + loss_action

        # Backward and optimize
        loss.backward()
        optimizer.step()

        train_loss += loss.item() * X_batch.size(0)

    train_loss /= len(train_loader.dataset)
    train_losses.append(train_loss)

    # Validation
    model.eval()
    val_loss = 0.0
    val_mae = 0.0
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            # Move data to device
            X_batch = X_batch.to(device)
            y_batch = y_batch.to(device)
            
            decoder_out, out_ae, out = model(X_batch)

            # Compute losses
            loss_decoder = criterion_decoder(decoder_out, X_batch)
            loss_ae_action = criterion_ae_action(out_ae, y_batch)
            loss_action = criterion_action(out, y_batch)

            loss = loss_decoder + loss_ae_action + loss_action

            val_loss += loss.item() * X_batch.size(0)
            val_mae += torch.sum(torch.abs(torch.sigmoid(out) - y_batch)).item()  # Apply sigmoid to outputs

    val_loss /= len(val_loader.dataset)
    val_mae /= len(val_loader.dataset)
    val_losses.append(val_loss)
    val_maes.append(val_mae)

    print(f"Epoch {epoch+1}/{epochs}, Training Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}, Validation MAE: {val_mae:.4f}")

# Plot the training and validation loss
plt.figure(figsize=(12, 6))
plt.plot(train_losses, label='Training Loss')
plt.plot(val_losses, label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

# Plot the validation MAE
plt.figure(figsize=(12, 6))
plt.plot(val_maes, label='Validation MAE')
plt.xlabel('Epochs')
plt.ylabel('MAE')
plt.legend()
plt.show()

RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [None]:
del model
gc.collect()
torch.cuda.empty_cache()

# Evaluate the Model
Evaluate the model's performance on a validation set or using other suitable metrics.