In [1]:
import pandas as pd
import torch
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader,IterableDataset
from sklearn.model_selection import train_test_split

In [2]:
!pip install torchvision

Collecting torchvision
  Downloading torchvision-0.12.0-cp38-cp38-macosx_10_9_x86_64.whl (1.3 MB)
[K     |████████████████████████████████| 1.3 MB 5.9 MB/s eta 0:00:01
Collecting torch==1.11.0
  Downloading torch-1.11.0-cp38-none-macosx_10_9_x86_64.whl (129.9 MB)
[K     |████████████████████████████████| 129.9 MB 140 kB/s eta 0:00:012
Installing collected packages: torch, torchvision
  Attempting uninstall: torch
    Found existing installation: torch 1.10.2
    Uninstalling torch-1.10.2:
      Successfully uninstalled torch-1.10.2
Successfully installed torch-1.11.0 torchvision-0.12.0


In [2]:
oppScrData = pd.read_excel ('OppScrData.xlsx')

In [3]:
def clean_ct_data(oppScrData):
    # Delete rows with empty values
    ct_data= oppScrData[["L1_HU_BMD", "TAT Area (cm2)", 'Total Body                Area EA (cm2)',
       'VAT Area (cm2)', 'SAT Area (cm2)', 'VAT/SAT     Ratio', 'Muscle HU',
       ' Muscle Area (cm2)', 'L3 SMI (cm2/m2)', 'AoCa        Agatston',
       'Liver HU    (Median)', 'Age at CT']]
    n = ct_data.shape[0]
    preprocessed_ct_data = []
    for i in range(n):
        row = ct_data.loc[i]
        ignore = False
        for j in row:
          if pd.isna(j) or j == ' ': # There is an empty string somewhere in Liver column
            ignore = True
            break
        if not ignore:
          preprocessed_ct_data.append(row)
    return np.array(preprocessed_ct_data, dtype=np.float32)


In [4]:
def normalize_ct_data(ct_data):
    n = ct_data.shape[1]    
    for i in range(n-1):
      ct_data[:,i] = (ct_data[:,i] - np.min(ct_data[:,i]))/(np.max(ct_data[:,i])- np.min(ct_data[:,i]))
    return ct_data

In [7]:
oppScrData = pd.read_excel ('OppScrData.xlsx')  
ct_data = clean_ct_data(oppScrData)
ct_data= normalize_ct_data(ct_data)

In [8]:
y = ct_data[:,-1]
x = (ct_data[:,:-1])

In [9]:
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.10, random_state=42)

In [10]:
class CT_Dataset(Dataset):

    def __init__(self, X, y):
        self.ct_data = X
        self.age = y 

    def __len__(self):
        return len(self.ct_data)

    def __getitem__(self, idx):
        ct_data = self.ct_data[idx]     
        age = np.array(self.age[idx], dtype=np.float32)
        return torch.from_numpy(ct_data), torch.from_numpy(age)

In [11]:
a = CT_Dataset(X=X_train, y = y_train)
train_set = DataLoader(a, batch_size=64, shuffle=True)
# train_set = DataLoader(a)

In [12]:
def build_model():
  return nn.Sequential(
    nn.Linear(11, 64),
    nn.BatchNorm1d(64),
    nn.ReLU(),
    nn.Linear(64, 32),
    nn.BatchNorm1d(32),
    nn.ReLU(),
    nn.Linear(32, 10),
    nn.BatchNorm1d(10),
    nn.ReLU(),
    nn.Linear(10, 1)
    )

model = build_model()

In [13]:
criterion = nn.MSELoss()

def train_model(model, train_loader, criterion, T):
  model.train()
  opt = optim.Adam(model.parameters(), lr=0.001, weight_decay=0.1)
  # opt = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
  for epoch in range(T): 
      running_loss = 0
      for data in enumerate(train_loader):
          ct_data, actual_age = data[1][0], data[1][1]
          opt.zero_grad()
          predicted_age = model(ct_data)      
          loss = criterion(predicted_age, actual_age)
          running_loss += loss.item()*train_loader.batch_size
          loss.backward() 
          opt.step()
          
      print("​Train Epoch: "+str(epoch) + " Loss =", str(running_loss))

  model.train(mode=False)

model = build_model()
train_model(model, train_set, criterion, T = 100)

  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


​Train Epoch: 0 Loss = 25902001.859375
​Train Epoch: 1 Loss = 24753189.46875
​Train Epoch: 2 Loss = 22962756.171875
​Train Epoch: 3 Loss = 20613063.890625
​Train Epoch: 4 Loss = 17866483.15625
​Train Epoch: 5 Loss = 14928023.3046875
​Train Epoch: 6 Loss = 11915736.921875
​Train Epoch: 7 Loss = 9124103.42578125
​Train Epoch: 8 Loss = 6708469.98046875
​Train Epoch: 9 Loss = 4743699.642578125
​Train Epoch: 10 Loss = 3219426.6279296875
​Train Epoch: 11 Loss = 2134055.00390625
​Train Epoch: 12 Loss = 1433756.3276367188
​Train Epoch: 13 Loss = 977726.8159179688
​Train Epoch: 14 Loss = 728022.423828125
​Train Epoch: 15 Loss = 602901.9213867188
​Train Epoch: 16 Loss = 532882.2216796875
​Train Epoch: 17 Loss = 503741.3361816406
​Train Epoch: 18 Loss = 492512.28515625
​Train Epoch: 19 Loss = 485053.87658691406
​Train Epoch: 20 Loss = 481532.30859375
​Train Epoch: 21 Loss = 477646.4372558594
​Train Epoch: 22 Loss = 475444.59411621094
​Train Epoch: 23 Loss = 474510.5021972656
​Train Epoch: 24 Loss

In [14]:
b = CT_Dataset(X=X_test, y = y_test)
test_set = DataLoader(b, batch_size=64)
# train_set = DataLoader(a)

In [15]:
def evaluate_model(model, test_loader, criterion):
  model.eval()
  running_loss = 0
  with torch.no_grad():
    for data in enumerate(test_loader):
          ct_data, actual_age = data[1][0], data[1][1]
          predicted_age = model(ct_data)
          loss = criterion(predicted_age, actual_age)
          running_loss += loss.item()
  return running_loss

evaluate_model(model, test_set, criterion)

  return F.mse_loss(input, target, reduction=self.reduction)


910.386360168457