In [1]:
import glob
import os
import timm 
import torch

# I'm adding this in
from torch.autograd import Variable

import numpy as np
import pandas as pd
from PIL import Image
import efficientnet.keras as efn

from torch import nn
from torch import optim
import torch.nn.functional as F
from torch.utils.data import Subset
from torchvision import transforms, models, datasets
from torchvision.transforms import Compose, ToTensor, Resize
from torch.utils.data import DataLoader, Dataset

from sklearn.model_selection import train_test_split
from sklearn import preprocessing


METADATA_SUBSET_PATH = "/Users/franceskoback/Documents/research/pytorch_1/metadata_100subset_df.csv"

def get_manufacturer_labels(encoder, target_variable = "(0008, 0070) Manufacturer"):
    df = pd.read_csv(METADATA_SUBSET_PATH)
    df["id"] = df["id"].astype("str").str.zfill(8)
    df["code"] = encoder.fit_transform(df[target_variable])
    
    return {row["id"]: row["code"] for i, row in df.iterrows()}

class CustomImageDataset(Dataset):
    def __init__(self):
        #self.img_dir = "/Users/franceskoback/Documents/research/pytorch_1/imagezz"
        self.img_dir = "/Users/franceskoback/Documents/research/pytorch_1/xray_subsets"

        self.images = glob.glob(os.path.join(self.img_dir, "*.npy")) 
        self.le = preprocessing.LabelEncoder()
        self.label_map = get_manufacturer_labels(self.le)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path = self.images[idx]
        image = Image.fromarray(np.load(img_path)).convert("RGB")
        image = transforms.ToTensor()(image)
        xray_id = os.path.basename(img_path).replace(".npy", "")
        
        return {"image": image, "label": self.label_map[xray_id]}

def train_val_dataset(dataset, val_split=0.25):
    train_idx, val_idx = train_test_split(list(range(len(dataset))), test_size=val_split)
    datasets = {}
    datasets['train'] = Subset(dataset, train_idx)
    datasets['val'] = Subset(dataset, val_idx)
    return datasets

In [2]:
dataset = CustomImageDataset()
datasets = train_val_dataset(dataset)
print(len(datasets['train'].dataset)) #6
#datasets['train'].dataset, batch_size=params["batch_size"], shuffle=True
train_loader = DataLoader(
    datasets['train'].dataset, batch_size=3, shuffle=True
)
valid_loader = DataLoader(
    datasets['val'].dataset, batch_size=3, shuffle=True
)
print(len(train_loader.dataset)) #6 
len_train=len(datasets['train'])
len_val= len(datasets['val'])
print("Training length", len(datasets['train']))
print("Validation length", len(datasets['val']))

6
6
Training length 4
Validation length 2


In [6]:
def Net(num_classes):
    model = models.resnet50(pretrained=True)
    #model=timm.create_model(model,pretrained= True)
    #model = EfficientNet.from_pretrained('efficientnet-b0')
    #model= efn.EfficientNetB0(weights='imagenet')
    # Freeze parameters so we don't backprop through them
    for param in model.parameters():
        param.requires_grad = False

    from collections import OrderedDict
    classifier = nn.Sequential(OrderedDict([
                              ('fc1', nn.Linear(2048, 1024)),
                              ('relu', nn.ReLU()),
                              ('fc2', nn.Linear(1024, 256)),
                              ('relu', nn.ReLU()),
                              ('fc3', nn.Linear(256, num_classes)),
                              ('output', nn.LogSoftmax(dim=1))
                              ]))

    model.fc = classifier
    return model

params = {
    "model": "resnet50",
    #"device": "cuda",
    "lr": 0.001,
    "batch_size": 3, #64
    "num_workers": 1, #20
    "n_epochs": 50, #100
    "image_size": 224, 
    "in_channels": 3, #3
    "num_classes": 3, #12
    "device": "cpu"
}

model = Net(params['num_classes'])
model.to(params["device"])
#loss_fn = nn.NLLLoss() # we want MSE loss i think 
loss_fn = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr = params['lr'])

In [24]:
def train_one_epoch(epoch, model, loss_fn, optimizer, train_loader, device = "cpu"):
    #put model in training state
    model.train()
    #i=0
    train_loss = 0.0
    

    for batch_idx, img_dicts in enumerate(train_loader,0):  #used to be enumerate(train_loader)
        inputs = img_dicts["image"] #ORIGINAL ONE
        labels = img_dicts["label"]  # ORIGINAL ONE
        
        inputs = Variable(inputs.to(device).float())
        labels = Variable(labels.to(device).float())
        #print(inputs.shape, labels.shape)
        
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs)
        loss = loss_fn(outputs, labels)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad() # do we need this or not? sets all grads to None 

        # print statistics
        #running_loss += loss.item()
        #if i % 2000 == 1999:    # print every 2000 mini-batches
         #   print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
            #running_loss=0
          #  i=i+1

        
        
        #
    
        train_loss+= ((1 / (batch_idx + 1)) * (loss.data.item() - train_loss))
        if batch_idx%5==0:
            print('train loss', train_loss)
    
    #print('Epoch {} avg Training loss: {:.3f}'.format(epoch+1, train_loss))
    
    return model, train_loss

def test_one_epoch(epoch, model, loss_fn, loader, len_val, device = "cpu"):
    model.eval()
    
    #pbar = tqdm(enumerate(test_loader), total = len(test_loader))
    running_loss = 0
    actual_labels = []
    pred_labels = []
    
    #for step, (imgs, labels) in pbar:
    for batch_idx, img_dicts in enumerate(loader,0):    
        inputs = img_dicts["image"] #ORIGINAL ONE
        labels = img_dicts["label"]  # ORIGINAL ONE
        
        inputs = Variable(inputs.to(device).float())
        labels = Variable(labels.to(device).float())
        
        log_preds = model(inputs)
        loss = loss_fn(log_preds, labels)
        
        preds = torch.exp(log_preds)
        running_loss+=((1 / (batch_idx + 1)) * (loss.data.item() - running_loss))
        
        #calculate accuracy
        top_prob, top_class = preds.topk(1, dim=1)
        pred_labels+= list((top_class.view(-1)).cpu().numpy())
        actual_labels+= list(labels.cpu().numpy())
        
        
    
    accuracy = ((np.array(pred_labels)==np.array(actual_labels)).sum())/len_val #size of test set
    correct = ((np.array(pred_labels)==np.array(actual_labels)).sum())
    total = len_val
    
    
    return running_loss, accuracy, correct, total

## Training Loop

In [25]:
train_losses = []
valid_losses = []

for epoch in range(params['n_epochs']):
    train_loss = train_one_epoch(epoch, model, loss_fn, optimizer, train_loader)
    train_losses+= [train_loss]
    valid_loss, accuracy, correct, total = test_one_epoch(epoch, model, loss_fn, valid_loader, len_val)
    valid_losses+=[valid_loss]
    print('Epoch {} avg Valid loss: {:.3f}'.format(epoch+1, valid_loss))
    print('Epoch {} Valid accuracy: {:.1%} ({} of {} right)\n'.format(epoch+1, accuracy, correct, total))
    if len(valid_losses)>1 and (valid_loss<min(valid_losses[:-1])):
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': loss_fn,
            }, 'checkpoint.tar')

    


train loss 10.457435607910156
Epoch 1 avg Valid loss: 10.429
Epoch 1 Valid accuracy: 50.0% (1 of 2 right)

train loss 6.120249271392822
Epoch 2 avg Valid loss: 10.388
Epoch 2 Valid accuracy: 50.0% (1 of 2 right)

train loss 13.340846061706543
Epoch 3 avg Valid loss: 10.411
Epoch 3 Valid accuracy: 50.0% (1 of 2 right)

train loss 10.504226684570312
Epoch 4 avg Valid loss: 10.351
Epoch 4 Valid accuracy: 100.0% (2 of 2 right)

train loss 10.633596420288086
Epoch 5 avg Valid loss: 10.448
Epoch 5 Valid accuracy: 100.0% (2 of 2 right)

train loss 12.187188148498535
Epoch 6 avg Valid loss: 10.362
Epoch 6 Valid accuracy: 100.0% (2 of 2 right)

train loss 3.3581321239471436
Epoch 7 avg Valid loss: 10.369
Epoch 7 Valid accuracy: 100.0% (2 of 2 right)

train loss 5.065195560455322
Epoch 8 avg Valid loss: 10.473
Epoch 8 Valid accuracy: 100.0% (2 of 2 right)

train loss 13.272343635559082
Epoch 9 avg Valid loss: 10.420
Epoch 9 Valid accuracy: 100.0% (2 of 2 right)

train loss 13.418072700500488
Epo

In [26]:
# load the model that got the best validation accuracy
checkpoint = torch.load('checkpoint.tar')
loaded_model = Net(params['num_classes'])
loaded_model.to(params["device"])
loaded_model.load_state_dict(checkpoint['model_state_dict'])

loaded_criterion = checkpoint['loss']

#optimizer = optim.Adam(model.parameters(), lr = 0.003)
#optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

last_epoch = checkpoint['epoch']+1

test_loss, accuracy, correct, total = test_one_epoch(None, loaded_model, loaded_criterion, valid_loader, len_val)

print('Test loss: {:.3f}'.format(test_loss))
print('Test accuracy: {:.1%} ({} of {} right)\n'.format(accuracy, correct, total))
    

Test loss: 10.360
Test accuracy: 100.0% (2 of 2 right)

