Three types: Chronic Lymphocytic Leukemia (CLL), Follicular Lymphoma (FL), and Mantle Cell Lymphoma (MCL). 

Data: 
- Samples prepared by different pathologists at different sites.
- 374 images (113 CLL, 139 FL, 122 MCL) of size 1388 x 1040. 
- Idea is that this is real-world type of sample 
- Potential issues: OG authors assume (but are not sure) that diff sources did similar smounts of each classes, and each sample is from unique patient

OG pre-processing:
- Use MATLAB
- Break each image into patches of 36x36 pix
- Perform 90 degree rotations on half the patches
- Break into 5-fold training and test sets using sets of files (we can probably do this within the code?)

OG Model: AlexNet
- Specific NN architecture 
- Description https://towardsdatascience.com/alexnet-the-architecture-that-challenged-cnns-e406d5297951
- OG paper https://dl.acm.org/doi/10.1145/3065386

Other models to try
- VGG https://blog.paperspace.com/vgg-from-scratch-pytorch/
- ResNet 



In [4]:
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np
import matplotlib.pyplot as plt
import os
from sklearn.model_selection import train_test_split
import pickle

### Train

In [5]:
import importlib 
import utils
import torch
import torch.nn as nn
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
import numpy as np
from sklearn.model_selection import train_test_split
importlib.reload(utils)

Net_name = 'VGG1' # 'ResNet1'
in_channels = 1
n_classes = 3
num_epochs = 3 # 20 
channels = []
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
load_data = False

# Get the data
batch_size = 16
if load_data:
    X_train = np.transpose(np.load('../Data/X_train.npy'), axes=(0,3,1,2)) 
    y_train = np.load('../Data/y_train.npy')
    X_test = np.transpose(np.load('../Data/X_test.npy'), axes=(0,3,1,2)) 
    y_test = np.load('../Data/y_test.npy')
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=1)
    X_val = np.transpose(X_val, axes=(0,3,1,2)) 
    train_ds = utils.MyDataset(X_train, y_train, norm=False)
    train_loader = DataLoader(train_ds, batch_size=batch_size, num_workers=2, pin_memory=True, shuffle=True)
    val_ds = utils.MyDataset(X_val, y_val, norm=False) 
    val_loader = DataLoader(val_ds, batch_size=batch_size, num_workers=2, pin_memory=True, shuffle=False)
    test_ds = utils.MyDataset(X_test, y_test, norm=False) 
    test_loader = DataLoader(test_ds, batch_size=batch_size, num_workers=2, pin_memory=True, shuffle=False)
    utils.check_inputs(train_ds, train_loader, savefig=False, name=Net_name)

# Define model, optimizer, and transforms
model = utils.MyVGG16(num_classes=n_classes).to(device) # utils.ResNet(ResidualBlock, layers=[3, 4, 6, 3]).to(device)
learning_rate = 0.005 # 0.01
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) # torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay=0.001, momentum=0.9)  
loss_fn = nn.CrossEntropyLoss()

# Train
print('Training:')
for epoch in range(num_epochs):
    print(f'\tEpoch {epoch}')

    # Train (not saving snapshot)
    utils.train(train_loader, model, loss_fn, optimizer, device)
    
    # check accuracy 
    accuracy, = utils.validate(val_loader, model, device)
    print(f"\tGot validation accuracy {accuracy:.2f}")
    model.train() # set model back into train mode

# Save model 
torch.save(model.state_dict(), f'../NN_storage/{Net_name}.pth')
print(f'Saving trained model as {Net_name}.pth')

# Load it back in and compute results on test set
model = utils.MyVGG16(in_channels, out_channels=n_classes)
model.load_state_dict(torch.load(f'../NN_storage/{Net_name}.pth'))
accuracy = utils.validate(test_loader, model=model)



Training:
	Epoch 0


OSError: [Errno 12] Cannot allocate memory

In [None]:
# class VGG16(nn.Module):
#     def __init__(self, num_classes=10):
#         super(VGG16, self).__init__()
#         self.layer1 = nn.Sequential(
#             nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1),
#             nn.BatchNorm2d(64),
#             nn.ReLU())
#         self.layer2 = nn.Sequential(
#             nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
#             nn.BatchNorm2d(64),
#             nn.ReLU(), 
#             nn.MaxPool2d(kernel_size = 2, stride = 2))
#         self.layer3 = nn.Sequential(
#             nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
#             nn.BatchNorm2d(128),
#             nn.ReLU())
#         self.layer4 = nn.Sequential(
#             nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1),
#             nn.BatchNorm2d(128),
#             nn.ReLU(),
#             nn.MaxPool2d(kernel_size = 2, stride = 2))
#         self.layer5 = nn.Sequential(
#             nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
#             nn.BatchNorm2d(256),
#             nn.ReLU())
#         self.layer6 = nn.Sequential(
#             nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
#             nn.BatchNorm2d(256),
#             nn.ReLU())
#         self.layer7 = nn.Sequential(
#             nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
#             nn.BatchNorm2d(256),
#             nn.ReLU(),
#             nn.MaxPool2d(kernel_size = 2, stride = 2))
#         self.layer8 = nn.Sequential(
#             nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),
#             nn.BatchNorm2d(512),
#             nn.ReLU())
#         self.layer9 = nn.Sequential(
#             nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
#             nn.BatchNorm2d(512),
#             nn.ReLU())
#         self.layer10 = nn.Sequential(
#             nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
#             nn.BatchNorm2d(512),
#             nn.ReLU(),
#             nn.MaxPool2d(kernel_size = 2, stride = 2))
#         self.layer11 = nn.Sequential(
#             nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
#             nn.BatchNorm2d(512),
#             nn.ReLU())
#         self.layer12 = nn.Sequential(
#             nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
#             nn.BatchNorm2d(512),
#             nn.ReLU())
#         self.layer13 = nn.Sequential(
#             nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
#             nn.BatchNorm2d(512),
#             nn.ReLU(),
#             nn.MaxPool2d(kernel_size = 2, stride = 2))
#         self.fc = nn.Sequential(
#             nn.Dropout(0.5),
#             nn.Linear(7*7*512, 4096),
#             nn.ReLU())
#         self.fc1 = nn.Sequential(
#             nn.Dropout(0.5),
#             nn.Linear(4096, 4096),
#             nn.ReLU())
#         self.fc2= nn.Sequential(
#             nn.Linear(4096, num_classes))
        
#     def forward(self, x):
#         out = self.layer1(x)
#         out = self.layer2(out)
#         out = self.layer3(out)
#         out = self.layer4(out)
#         out = self.layer5(out)
#         out = self.layer6(out)
#         out = self.layer7(out)
#         out = self.layer8(out)
#         out = self.layer9(out)
#         out = self.layer10(out)
#         out = self.layer11(out)
#         out = self.layer12(out)
#         out = self.layer13(out)
#         out = out.reshape(out.size(0), -1)
#         out = self.fc(out)
#         out = self.fc1(out)
#         out = self.fc2(out)
#         return out

