In [None]:
import torch
import pandas as pd

from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import train_test_split

from SurvTRACE.survtrace.utils import set_random_seed
from SurvTRACE.survtrace.config import STConfig


from utils.covariate_data_processing import pbc2_proccess_covariate, padded_mask_processing

In [None]:
# define the setup parameters - we want to extend the metabric config to save the time, all different in PBC2 will be changed by our functions
STConfig['data'] = 'metabric'

set_random_seed(STConfig['seed'])

hparams = {
    'batch_size': 64,
    'weight_decay': 1e-4,
    'learning_rate': 1e-3,
    'epochs': 20,
}

In [None]:
df = pd.read_csv("/Users/davidlee/Documents/GitHub/Surtimesurvival/Data Project/Pycox Lib/PBC2 Convariate Data/pbc2_data_proccessed_auton_covariate.csv")

In [None]:
df_temp = df.loc[0:, ['seq_id', 'seq_time_id']]
df_event_time_temp = df.loc[0:, ['event', 'time']]
df = df.drop(columns=['seq_id', 'seq_time_id'])

In [None]:
df_temp.head()

In [None]:
df.info()

In [None]:
# for noauton processing
# df.rename(columns = {'seq_out_time_to_event':'duration', 'seq_out_event':'event'}, inplace = True)

#for auton processing
df.rename(columns = {'time':'duration'}, inplace = True)

In [None]:
y, df, df_train, df_y_train = pbc2_proccess_covariate(df)

In [None]:
df_train.head()

In [None]:
df_y_train.head()

In [None]:
df_train = pd.concat([df_train, df_temp['seq_id']], axis=1, join='inner')
df_y_train = pd.concat([df_y_train, df_temp['seq_id']], axis=1, join='inner')

In [None]:
masks, padded_patients = padded_mask_processing(df_train)

In [None]:
print(padded_patients.shape)
print(masks.shape)

In [None]:
df_y_train = df_y_train.drop_duplicates(subset='seq_id', keep='last')
df_y_train = df_y_train.reset_index(drop=True)
df_y_train = df_y_train.drop(columns=['seq_id'])
df_y_train

In [None]:
X_features_data_tensor = padded_patients
Y_labels_data_tensor = torch.tensor(df_y_train.values)

In [None]:
X_train, X_val, y_train, y_val, masks_train, masks_val = train_test_split(X_features_data_tensor, Y_labels_data_tensor, masks, test_size=0.1)
train_data = TensorDataset(X_train, y_train, masks_train)
val_data = TensorDataset(X_val, y_val, masks_val)

In [None]:
from model.survtimesurvival_model import TransformerClassifier
# Hyperparameters
embed_dim = 32
num_heads = 2
ffn_hidden_dim = 64
num_layers = 2


batch_size = 1
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=False)

# Set up training configurations
input_dim = X_features_data_tensor.size(2)
seq_length = X_features_data_tensor.size(1)
model = TransformerClassifier(input_dim, seq_length, embed_dim, num_heads, ffn_hidden_dim, num_layers)

In [None]:
# Parameter 'embed_dim' unfilled
# Parameter 'ffn_hidden_dim' unfilled
# Parameter 'input_dim' unfilled
# Parameter 'num_heads' unfilled
# Parameter 'num_layers' unfilled
# Parameter 'seq_length' unfilled
print(input_dim)
print(seq_length)
print(embed_dim)
print(num_heads)
print(ffn_hidden_dim)
print(num_layers)

In [None]:
a = masks_train[0].unsqueeze(0)
output = model(X_train[0], a)

In [None]:
output.shape