In [1]:
import sys, os
if os.path.abspath(os.pardir) not in sys.path:
    sys.path.insert(0, os.path.abspath(os.pardir))
import CONFIG
%reload_ext autoreload
%autoreload 2

In [15]:
import numpy as np
import pandas as pd
import pydicom
import random
from sklearn import preprocessing

In [3]:
ROOT = CONFIG.CFG.DATA.BASE
BATCH_SIZE = 64

In [4]:
train_df = pd.read_csv(os.path.join(ROOT, "train.csv"))
train_df.head()

Unnamed: 0,Patient,Weeks,FVC,Percent,Age,Sex,SmokingStatus
0,ID00007637202177411956430,-4,2315,58.253649,79,Male,Ex-smoker
1,ID00007637202177411956430,5,2214,55.712129,79,Male,Ex-smoker
2,ID00007637202177411956430,7,2061,51.862104,79,Male,Ex-smoker
3,ID00007637202177411956430,9,2144,53.950679,79,Male,Ex-smoker
4,ID00007637202177411956430,11,2069,52.063412,79,Male,Ex-smoker


In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F

torch.manual_seed(1)

<torch._C.Generator at 0x7f13583899c0>

In [6]:
class OSICLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(OSICLSTM, self).__init__()
        self.hidden_size = hidden_size

        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.lc = nn.Linear(hidden_size, output_size)

    def forward(self, X):
        out, _ = self.lstm(X)
        out = self.lc(out)
        return out

In [7]:
model = OSICLSTM(4, 100, 1, 4)
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [8]:
len(list(model.parameters()))

6

In [9]:
for i in range(len(list(model.parameters()))):
    print(list(model.parameters())[i].size())

torch.Size([400, 4])
torch.Size([400, 100])
torch.Size([400])
torch.Size([400])
torch.Size([4, 100])
torch.Size([4])


In [10]:
print(model)

OSICLSTM(
  (lstm): LSTM(4, 100, batch_first=True)
  (lc): Linear(in_features=100, out_features=4, bias=True)
)


In [11]:
ALL_PATIENTS = train_df["Patient"].unique().tolist()

In [12]:
model.train()

OSICLSTM(
  (lstm): LSTM(4, 100, batch_first=True)
  (lc): Linear(in_features=100, out_features=4, bias=True)
)

In [32]:
min_max_scaler = preprocessing.MinMaxScaler()
for patient in ALL_PATIENTS:
    patient_data = train_df[train_df["Patient"] == patient][['Weeks', 'FVC', 'Percent', 'Age']].values
    patient_data_scaled = min_max_scaler.fit_transform(patient_data)
    print(patient_data_scaled)
    patient_data_scaled = torch.tensor(patient_data_scaled).float()

    for data in patient_data_scaled:
        out = model(data.view(1,1,-1))
        print(out)
        break
    break

[[0.         1.         1.         0.        ]
 [0.14754098 0.67936508 0.67936508 0.        ]
 [0.18032787 0.19365079 0.19365079 0.        ]
 [0.21311475 0.45714286 0.45714286 0.        ]
 [0.24590164 0.21904762 0.21904762 0.        ]
 [0.3442623  0.32063492 0.32063492 0.        ]
 [0.54098361 0.         0.         0.        ]
 [0.73770492 0.2031746  0.2031746  0.        ]
 [1.         0.18095238 0.18095238 0.        ]]
tensor([[[ 0.0617,  1.2652,  1.2256, -0.0626]]], grad_fn=<AddBackward0>)


In [33]:
for epoch in range(1000):
    total_loss = 0
    for patient in ALL_PATIENTS:
        model.zero_grad()

        patient_data = train_df[train_df["Patient"] == patient][['Weeks', 'FVC', 'Percent', 'Age']].values
        patient_data_scaled = torch.tensor(min_max_scaler.transform(patient_data)).float()

        loss = 0
        for i in range(patient_data_scaled.size()[0] - 1):
            out = model(patient_data_scaled[i].view(1,1,-1))
            loss += loss_function(out.view(4), patient_data_scaled[i+1])
            total_loss += loss.item()
        # for data in patient_data:
        #     out = model(data.view(1, 1, -1))
        #     print(out)
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch}, loss={total_loss}")

Epoch 0, loss=2448.1237071836367
Epoch 1, loss=1991.956739502959
Epoch 2, loss=1880.1801336413482
Epoch 3, loss=1899.47520223842
Epoch 4, loss=1909.5620239567943
Epoch 5, loss=1857.9278167719021
Epoch 6, loss=1837.1983676236123
Epoch 7, loss=1798.5436229465995
Epoch 8, loss=1811.2486738935113
Epoch 9, loss=1810.1519283092348
Epoch 10, loss=1851.6391951371916
Epoch 11, loss=1953.0552522069775
Epoch 12, loss=1887.6583381248638
Epoch 13, loss=1850.3717936307658
Epoch 14, loss=1833.3231678070151
Epoch 15, loss=1775.3431258443743
Epoch 16, loss=1828.5670242896304
Epoch 17, loss=1937.7820549034514
Epoch 18, loss=1800.965662198374
Epoch 19, loss=1734.5729330130853
Epoch 20, loss=1741.7523203770397
Epoch 21, loss=2019.845281823771
Epoch 22, loss=1726.4203623384237
Epoch 23, loss=1791.2901400900446
Epoch 24, loss=1688.2257511655334
Epoch 25, loss=1690.395838774275
Epoch 26, loss=1657.9669929859228
Epoch 27, loss=1722.2837787307799
Epoch 28, loss=1653.4935976522975
Epoch 29, loss=1832.9618692302

In [None]:
sample = torch.tensor([-8, 2698.674521,   60,   67.7956])
model(sample.view(1,1,-1))

In [None]:
for i in range(5):
    print(sample)
    out = model(sample.view(1, 1, -1))
    print(out)
    sample = out

In [None]:
with torch.no_grad():
    input = torch.randn(2, 5, 4)
    print(input)
    print()
    out = model(input)
    print(out)
    print(out.shape)