In [182]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix,accuracy_score

from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout, Bidirectional, BatchNormalization, LeakyReLU, Conv1D
from keras.callbacks import EarlyStopping
from keras.optimizers import Adam
from livelossplot import PlotLossesKeras

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data
from torch.utils.data import TensorDataset,DataLoader

%matplotlib inline

In [183]:
df = pd.read_csv('../../data/main_process_preprocessed_data.csv')

In [184]:
df.sex = [1 if v == 'M' else 0 for v in df.sex]

# Prepare tensors

In [185]:
max_length = 2200

In [186]:
X = df[['sex', 'age_in', 'PC1', 'PC2']].values
Y = df.time_event.values

In [187]:
x = np.repeat(X,max_length, axis = 0)
x = x.reshape(2916,max_length,-1)

In [188]:
y = np.zeros(shape = (2916,max_length,1))
for i in range(1,len(Y)):
    t = Y[i]
    y[i,t:,:] = 1

In [189]:
x.shape,y.shape

((2916, 2200, 4), (2916, 2200, 1))

In [190]:
indexes = np.arange(len(x))
np.random.shuffle(indexes)
idx_validation = np.random.choice(indexes,size = int(len(x)*0.1),replace=False)
idx_training = list(set(indexes)-set(idx_validation))

In [191]:
# create training set
x_training = x[idx_training]
y_training = y[idx_training]

# create validation set
x_validation = x[idx_validation]
y_validation = y[idx_validation]

In [192]:
x_training.shape,y_training.shape

((2625, 2200, 4), (2625, 2200, 1))

# Keras

In [117]:
# create a model
model = Sequential()
model.add(LSTM(1, return_sequences=True,input_shape = (2200,4), activation = 'sigmoid'))

# create optimizer
adam = Adam(lr=5e-2)

# compile model
model.compile(optimizer=adam, loss='mse', metrics=['mse'])

# add early stopping callback
early_stopping = EarlyStopping(patience=10, monitor='val_loss', mode='auto', restore_best_weights=True)

In [119]:
# fit the model
#model.fit(x_training, y_training, validation_data=[x_validation, y_validation],\
#          epochs=10, shuffle=True, batch_size=128, callbacks=[PlotLossesKeras(), early_stopping])

In [106]:
y_pred = model.predict(x_training)

In [111]:
y_pred.min(),y_pred.max()

(0.5, 0.5)

# Pytorch

In [193]:
# prepare dataloader
x_training = torch.tensor(x_training, dtype=torch.float)
y_training = torch.tensor(y_training, dtype=torch.float)

training_set = TensorDataset(x_training,y_training)
trainloader = DataLoader(training_set, batch_size=32, shuffle=True, num_workers=8)

In [228]:
# define net architecture
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.lstm = nn.LSTM(input_size = 4,hidden_size = 1,num_layers = 1)

    def forward(self, x):
        x = self.lstm(x)
        #x = torch.sigmoid(x)
        return x
net = Net()

In [229]:
# set criterion, optimizer:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

In [230]:
# train
for epoch in range(10):  # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data    

        # zero the parameter gradients
        optimizer.zero_grad()
        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Training')

AttributeError: 'tuple' object has no attribute 'log_softmax'