In [1]:
import pandas as pd
import torch
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader,IterableDataset
from sklearn.model_selection import train_test_split

In [2]:
oppScrData = pd.read_excel (r'sample_data/OppScrData.xlsx')

In [3]:
def clean_ct_data(oppScrData):
    # Delete rows with empty values
    ct_data= oppScrData[["L1_HU_BMD", "TAT Area (cm2)", 'Total Body                Area EA (cm2)',
       'VAT Area (cm2)', 'SAT Area (cm2)', 'VAT/SAT     Ratio', 'Muscle HU',
       ' Muscle Area (cm2)', 'L3 SMI (cm2/m2)', 'AoCa        Agatston',
       'Liver HU    (Median)', 'Age at CT']]
    n = ct_data.shape[0]
    preprocessed_ct_data = []
    for i in range(n):
        row = ct_data.loc[i]
        ignore = False
        for j in row:
          if pd.isna(j) or j == ' ': # There is an empty string somewhere in Liver column
            ignore = True
            break
        if not ignore:
          preprocessed_ct_data.append(row)
    return np.array(preprocessed_ct_data, dtype=np.float32)


In [4]:
def normalize_ct_data(ct_data):
    n = ct_data.shape[1]    
    for i in range(n-1):
      ct_data[:,i] = (ct_data[:,i] - np.min(ct_data[:,i]))/(np.max(ct_data[:,i])- np.min(ct_data[:,i]))
    return ct_data

In [5]:
oppScrData = pd.read_excel (r'sample_data/OppScrData.xlsx')  
ct_data = clean_ct_data(oppScrData)
ct_data= normalize_ct_data(ct_data)

In [6]:
y = ct_data[:,-1]
x = (ct_data[:,:-1])

In [7]:
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.10, random_state=42)

In [11]:
class CT_Dataset(Dataset):

    def __init__(self, X, y):
        self.ct_data = X
        self.age = y 

    def __len__(self):
        return len(self.ct_data)

    def __getitem__(self, idx):
        ct_data = self.ct_data[idx]     
        age = np.array(self.age[idx], dtype=np.float32)
        return torch.from_numpy(ct_data), torch.from_numpy(age)

In [31]:
a = CT_Dataset(X=X_train, y = y_train)
train_set = DataLoader(a, batch_size=64, shuffle=True)
# train_set = DataLoader(a)

In [65]:
def build_model():
  return nn.Sequential(
    nn.Linear(11, 64),
    nn.BatchNorm1d(64),
    nn.ReLU(),
    nn.Linear(64, 32),
    nn.BatchNorm1d(32),
    nn.ReLU(),
    nn.Linear(32, 10),
    nn.BatchNorm1d(10),
    nn.ReLU(),
    nn.Linear(10, 1)
    )

model = build_model()

In [67]:
criterion = nn.MSELoss()

def train_model(model, train_loader, criterion, T):
  model.train()
  opt = optim.Adam(model.parameters(), lr=0.001, weight_decay=0.1)
  # opt = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
  for epoch in range(T): 
      running_loss = 0
      for data in enumerate(train_loader):
          ct_data, actual_age = data[1][0], data[1][1]
          opt.zero_grad()
          predicted_age = model(ct_data)      
          loss = criterion(predicted_age, actual_age)
          running_loss += loss.item()*train_loader.batch_size
          loss.backward() 
          opt.step()
          
      print("​Train Epoch: "+str(epoch) + " Loss =", str(running_loss))

  model.train(mode=False)

model = build_model()
train_model(model, train_set, criterion, T = 100)

  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


​Train Epoch: 0 Loss = 26202353.234375
​Train Epoch: 1 Loss = 25431270.9375
​Train Epoch: 2 Loss = 24184200.078125
​Train Epoch: 3 Loss = 22284124.78125
​Train Epoch: 4 Loss = 19834438.171875
​Train Epoch: 5 Loss = 16999667.078125
​Train Epoch: 6 Loss = 13991026.46875
​Train Epoch: 7 Loss = 10995382.1328125
​Train Epoch: 8 Loss = 8280743.13671875
​Train Epoch: 9 Loss = 5977162.56640625
​Train Epoch: 10 Loss = 4089965.994140625
​Train Epoch: 11 Loss = 2719650.5361328125
​Train Epoch: 12 Loss = 1756240.5205078125
​Train Epoch: 13 Loss = 1157539.4174804688
​Train Epoch: 14 Loss = 847777.6672363281
​Train Epoch: 15 Loss = 641880.0187988281
​Train Epoch: 16 Loss = 560993.1391601562
​Train Epoch: 17 Loss = 511217.82958984375
​Train Epoch: 18 Loss = 493452.88623046875
​Train Epoch: 19 Loss = 485972.6652832031
​Train Epoch: 20 Loss = 480865.49755859375
​Train Epoch: 21 Loss = 476567.89306640625
​Train Epoch: 22 Loss = 474310.6611328125
​Train Epoch: 23 Loss = 474095.91955566406
​Train Epoch: 2

In [42]:
b = CT_Dataset(X=X_test, y = y_test)
test_set = DataLoader(b, batch_size=64)
# train_set = DataLoader(a)

In [48]:
def evaluate_model(model, test_loader, criterion):
  model.eval()
  running_loss = 0
  with torch.no_grad():
    for data in enumerate(test_loader):
          ct_data, actual_age = data[1][0], data[1][1]
          predicted_age = model(ct_data)
          loss = criterion(predicted_age, actual_age)
          running_loss += loss.item()
  return running_loss

evaluate_model(model, test_set, criterion)

  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


911.4797096252441