In [1]:
import sys, os
if os.path.abspath(os.pardir) not in sys.path:
    sys.path.insert(0, os.path.abspath(os.pardir))
import CONFIG
%reload_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import pandas as pd
import pydicom
import random
from sklearn import preprocessing

In [3]:
ROOT = CONFIG.CFG.DATA.BASE
BATCH_SIZE = 64

In [4]:
train_df = pd.read_csv(os.path.join(ROOT, "train.csv"))
train_df.head()

Unnamed: 0,Patient,Weeks,FVC,Percent,Age,Sex,SmokingStatus
0,ID00007637202177411956430,-4,2315,58.253649,79,Male,Ex-smoker
1,ID00007637202177411956430,5,2214,55.712129,79,Male,Ex-smoker
2,ID00007637202177411956430,7,2061,51.862104,79,Male,Ex-smoker
3,ID00007637202177411956430,9,2144,53.950679,79,Male,Ex-smoker
4,ID00007637202177411956430,11,2069,52.063412,79,Male,Ex-smoker


In [16]:
min_max_scaler = preprocessing.MinMaxScaler()

In [20]:
lstm_input = train_df[['Weeks', 'FVC', 'Percent', 'Age']].values
lstm_input_scaled = torch.tensor(min_max_scaler.fit_transform(lstm_input)).float()
train_df[['Weeks', 'FVC', 'Percent', 'Age']] = lstm_input_scaled

In [21]:
train_df.head()

Unnamed: 0,Patient,Weeks,FVC,Percent,Age,Sex,SmokingStatus
0,ID00007637202177411956430,0.007246,0.26705,0.236393,0.769231,Male,Ex-smoker
1,ID00007637202177411956430,0.072464,0.248923,0.215941,0.769231,Male,Ex-smoker
2,ID00007637202177411956430,0.086957,0.221464,0.18496,0.769231,Male,Ex-smoker
3,ID00007637202177411956430,0.101449,0.23636,0.201767,0.769231,Male,Ex-smoker
4,ID00007637202177411956430,0.115942,0.2229,0.18658,0.769231,Male,Ex-smoker


In [8]:
import torch
import torch.nn as nn
import torch.nn.functional as F

torch.manual_seed(1)

<torch._C.Generator at 0x7fbeb99e46a8>

In [22]:
class OSICLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(OSICLSTM, self).__init__()
        self.hidden_size = hidden_size

        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.lc = nn.Linear(hidden_size, output_size)

    def forward(self, X):
        out, _ = self.lstm(X)
        out = self.lc(out)
        return out

In [23]:
model = OSICLSTM(4, 100, 1, 4)
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [24]:
len(list(model.parameters()))

6

In [25]:
for i in range(len(list(model.parameters()))):
    print(list(model.parameters())[i].size())

torch.Size([400, 4])
torch.Size([400, 100])
torch.Size([400])
torch.Size([400])
torch.Size([4, 100])
torch.Size([4])


In [26]:
print(model)

OSICLSTM(
  (lstm): LSTM(4, 100, batch_first=True)
  (lc): Linear(in_features=100, out_features=4, bias=True)
)


In [27]:
ALL_PATIENTS = train_df["Patient"].unique().tolist()

In [28]:
model.train()

OSICLSTM(
  (lstm): LSTM(4, 100, batch_first=True)
  (lc): Linear(in_features=100, out_features=4, bias=True)
)

In [29]:
for patient in ALL_PATIENTS:
    patient_data = train_df[train_df["Patient"] == patient][['Weeks', 'FVC', 'Percent', 'Age']].values
    patient_data = torch.tensor(patient_data).float()

    for data in patient_data:
        out = model(data.view(1,1,-1))
        print(out)
        break
    break

tensor([[[0.0356, 0.1050, 0.0795, 0.0066]]], grad_fn=<AddBackward0>)


In [30]:
for epoch in range(1000):
    total_loss = 0
    for patient in ALL_PATIENTS:
        patient_loss = 0
        model.zero_grad()

        patient_data = train_df[train_df["Patient"] == patient][['Weeks', 'FVC', 'Percent', 'Age']].values
        patient_data = torch.tensor(patient_data).float()

        loss = 0
        for i in range(patient_data.size()[0] - 1):
            out = model(patient_data[i].view(1,1,-1))
            loss += loss_function(out.view(4), patient_data[i+1])
            patient_loss += loss.item()
        # for data in patient_data:
        #     out = model(data.view(1, 1, -1))
        #     print(out)
        loss.backward()
        optimizer.step()
        total_loss += patient_loss/patient_data.shape[0]
    print(f"Epoch {epoch}, loss={total_loss}")

Epoch 0, loss=10.578166382103568
Epoch 1, loss=1.5046061009539213
Epoch 2, loss=0.957016112834201
Epoch 3, loss=0.9199114482127925
Epoch 4, loss=0.9353469789592571
Epoch 5, loss=0.9530735072787914
Epoch 6, loss=0.9601929409023906
Epoch 7, loss=0.9747644018967867
Epoch 8, loss=0.9714935864186964
Epoch 9, loss=0.9686685320556521
Epoch 10, loss=0.9671902290449976
Epoch 11, loss=0.9711561967032164
Epoch 12, loss=0.990335028531835
Epoch 13, loss=0.9853241236027809
Epoch 14, loss=0.9256156483030857
Epoch 15, loss=0.9088940953749489
Epoch 16, loss=0.9159720649784098
Epoch 17, loss=0.9150007020322749
Epoch 18, loss=0.9050621580945494
Epoch 19, loss=0.9019594548205898
Epoch 20, loss=0.9044298249824816
Epoch 21, loss=0.9051997109260853
Epoch 22, loss=0.9042383937033599
Epoch 23, loss=0.9044177300667796
Epoch 24, loss=0.9060268095016895
Epoch 25, loss=0.9077113932861678
Epoch 26, loss=0.9083101831431877
Epoch 27, loss=0.9074147396293282
Epoch 28, loss=0.9053198047854336
Epoch 29, loss=0.902650616

In [42]:
sample = torch.tensor([[-12, 3020,   70,   73]])
sample = torch.tensor(min_max_scaler.transform(sample)).float()
sample

tensor([[-0.0507,  0.3936,  0.3309,  0.6154]])

In [44]:
with torch.no_grad():
    for i in range(15):
        out = model(sample.view(1,1,-1))
        print(min_max_scaler.inverse_transform(out.squeeze(dim=0)))
        sample = out.squeeze(dim=0)

[[  12.72775212 3351.45580626   79.05865674   72.65787369]]
[[  17.55453753 3323.5759058    78.1780903    72.70940733]]
[[  23.72204721 3317.9380523    77.80429176   72.75171006]]
[[  31.15028667 3333.3524363    77.96112257   72.79025859]]
[[  40.49702799 3357.50930429   78.53850451   72.77853805]]


In [None]:
for i in range(5):
    print(sample)
    out = model(sample.view(1, 1, -1))
    print(out)
    sample = out

In [None]:
with torch.no_grad():
    input = torch.randn(2, 5, 4)
    print(input)
    print()
    out = model(input)
    print(out)
    print(out.shape)