In [1]:
import glob
import os
import timm 
import torch
from torch.autograd import Variable

import numpy as np
import pandas as pd
from PIL import Image
import efficientnet.keras as efn

from torch import nn
from torch import optim
import torch.nn.functional as F
from torch.utils.data import Subset
from torchvision import transforms, models, datasets
from torchvision.transforms import Compose, ToTensor, Resize
from torch.utils.data import DataLoader, Dataset

from sklearn.model_selection import train_test_split
from sklearn import preprocessing


METADATA_SUBSET_PATH = "/Users/franceskoback/Documents/research/pytorch_1/metadata_100subset_df.csv"

def get_manufacturer_labels(encoder, target_variable = "(0008, 0070) Manufacturer"):
    df = pd.read_csv(METADATA_SUBSET_PATH)
    df["id"] = df["id"].astype("str").str.zfill(8)
    df["code"] = encoder.fit_transform(df[target_variable])
    
    return {row["id"]: row["code"] for i, row in df.iterrows()}

class CustomImageDataset(Dataset):
    def __init__(self):
        self.img_dir = "/Users/franceskoback/Documents/research/pytorch_1/xray_subsets"

        self.images = glob.glob(os.path.join(self.img_dir, "*.npy")) 
        self.le = preprocessing.LabelEncoder()
        self.label_map = get_manufacturer_labels(self.le)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path = self.images[idx]
        image = Image.fromarray(np.load(img_path)).convert("RGB")
        image = transforms.ToTensor()(image)
        xray_id = os.path.basename(img_path).replace(".npy", "")
        
        return {"image": image, "label": self.label_map[xray_id]}

def train_val_test_dataset(dataset, val_split=0.20):
    train_idx, rem_idx = train_test_split(list(range(len(dataset))), test_size=val_split)

    Test_size=0.5 # split equally between validataion and test sets 

    val_idx, test_idx  = train_test_split(list(range(len(rem_idx))), test_size=Test_size)



    datasets = {}
    datasets['train'] = Subset(dataset, train_idx)
    datasets['val'] = Subset(dataset, val_idx)
    datasets['test'] = Subset(dataset, test_idx)
    return datasets

In [2]:
dataset = CustomImageDataset()
datasets = train_val_test_dataset(dataset)
print(len(datasets['train'].dataset)) #6
#datasets['train'].dataset, batch_size=params["batch_size"], shuffle=True
train_loader = DataLoader(
    datasets['train'].dataset, batch_size=3, shuffle=True
)
valid_loader = DataLoader(
    datasets['val'].dataset, batch_size=3, shuffle=True
)
test_loader = DataLoader(
    datasets['test'].dataset, batch_size=3, shuffle=True
)
print(len(train_loader.dataset)) #6 
len_train=len(datasets['train'])
len_val= len(datasets['val'])
len_test= len(datasets['test'])
print("Training length", len(datasets['train']))
print("Validation length", len(datasets['val']))
print("Testing length", len(datasets['test']))

100
100
Training length 80
Validation length 10
Testing length 10


In [3]:
def Net(num_classes):
    model = models.resnet50(pretrained=True)
    
    # Freeze parameters so we don't backprop through them
    for param in model.parameters():
        param.requires_grad = False

    from collections import OrderedDict
    classifier = nn.Sequential(OrderedDict([
                              ('fc1', nn.Linear(2048, 1024)),
                              ('relu', nn.ReLU()),
                              ('fc2', nn.Linear(1024, 256)),
                              ('relu', nn.ReLU()),
                              ('fc3', nn.Linear(256, num_classes)),
                              ('output', nn.LogSoftmax(dim=1))
                              ]))

    model.fc = classifier
    return model

params = {
    "model": "resnet50",
    #"device": "cuda",
    "lr": 0.001,
    "batch_size": 3, #64
    "num_workers": 1, #20
    "n_epochs": 50, #100
    "image_size": 224, 
    "in_channels": 3, #3
    "num_classes": 3, #12
    "device": "cpu"
}

model = Net(params['num_classes'])
model.to(params["device"])
#loss_fn = nn.NLLLoss() 
loss_fn = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr = params['lr'])



In [7]:
def train_one_epoch(epoch, model, loss_fn, optimizer, train_loader, device = "cpu"):
    #put model in training state
    model.train()
    train_loss = 0.0
    

    for batch_idx, img_dicts in enumerate(train_loader,0):  
        inputs = img_dicts["image"] 
        labels = img_dicts["label"]  
        
        inputs = Variable(inputs.to(device).float())
        labels = Variable(labels.to(device).float())
        
        
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs)
        loss = loss_fn(outputs, labels)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad() # sets all grads to None 

        # print statistics
        #running_loss += loss.item()
        #if i % 2000 == 1999:    # print every 2000 mini-batches
         #   print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
            #running_loss=0
          #  i=i+1

        
        
        #
    
        train_loss+= ((1 / (batch_idx + 1)) * (loss.data.item() - train_loss))
        if batch_idx%5==0:
            print('train loss', train_loss)
    
    print('Epoch {} avg Training loss: {:.3f}'.format(epoch+1, train_loss))
    
    return model, train_loss

def test_one_epoch(epoch, model, loss_fn, loader, len_val, device = "cpu"):
    model.eval()
    
    #pbar = tqdm(enumerate(test_loader), total = len(test_loader))
    running_loss = 0
    actual_labels = []
    pred_labels = []
    
    #for step, (imgs, labels) in pbar:
    for batch_idx, img_dicts in enumerate(loader,0):    
        inputs = img_dicts["image"] 
        labels = img_dicts["label"] 
        
        inputs = Variable(inputs.to(device).float())
        labels = Variable(labels.to(device).float())
        
        log_preds = model(inputs)
        loss = loss_fn(log_preds, labels)
        
        preds = torch.exp(log_preds)
        running_loss+=((1 / (batch_idx + 1)) * (loss.data.item() - running_loss))
        
        #calculate accuracy
        top_prob, top_class = preds.topk(1, dim=1)
        pred_labels+= list((top_class.view(-1)).cpu().numpy())
        actual_labels+= list(labels.cpu().numpy())
        
        
    
    accuracy = ((np.array(pred_labels)==np.array(actual_labels)).sum())/np.array(actual_labels).size #size of test set
    correct = ((np.array(pred_labels)==np.array(actual_labels)).sum())
    total = np.array(actual_labels).size
    
    
    return running_loss, accuracy, correct, total

## Training Loop

In [8]:
train_losses = []
valid_losses = []

for epoch in range(params['n_epochs']):
    train_loss = train_one_epoch(epoch, model, loss_fn, optimizer, train_loader)
    train_losses+= [train_loss]
    valid_loss, accuracy, correct, total = test_one_epoch(epoch, model, loss_fn, valid_loader, len_val)
    valid_losses+=[valid_loss]
    print('Epoch {} avg Valid loss: {:.3f}'.format(epoch+1, valid_loss))
    print('Epoch {} Valid accuracy: {:.1%} ({} of {} right)\n'.format(epoch+1, accuracy, correct, total))
    if len(valid_losses)>1 and (valid_loss<min(valid_losses[:-1])):
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': loss_fn,
            }, 'checkpoint.tar')

    


  return F.mse_loss(input, target, reduction=self.reduction)


train loss 10.60793685913086
train loss 40.53252601623535
train loss 30.5295701677149
train loss 35.41282321512699
train loss 31.946093025661654
train loss 30.82361612870143
train loss 32.510853267485096


  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 1 avg Training loss: 31.430
Epoch 1 avg Valid loss: 31.409
Epoch 1 Valid accuracy: 7.0% (7 of 10 right)

train loss 44.95121765136719
train loss 35.008849779764816
train loss 32.57229449532249
train loss 33.6308827996254
train loss 33.07962376730782
train loss 32.16398428036616
train loss 32.17995156011273
Epoch 2 avg Training loss: 32.952
Epoch 2 avg Valid loss: 31.827
Epoch 2 Valid accuracy: 7.0% (7 of 10 right)

train loss 76.69700622558594
train loss 37.7627002398173
train loss 32.31963539123535
train loss 34.810983300209045
train loss 33.83393788337707
train loss 30.850416889557465
train loss 30.734654003574
Epoch 3 avg Training loss: 32.940
Epoch 3 avg Valid loss: 31.401
Epoch 3 Valid accuracy: 7.0% (7 of 10 right)

train loss 37.65026092529297
train loss 31.745958646138508
train loss 30.26412864164872
train loss 31.627083897590634
train loss 32.8355773062933
train loss 32.766398888367874
train loss 32.200712896162464
Epoch 4 avg Training loss: 31.414
Epoch 4 avg Valid loss

KeyboardInterrupt: 

In [19]:
# load the model that got the best validation accuracy
checkpoint = torch.load('checkpoint.tar')
loaded_model = Net(params['num_classes'])
loaded_model.to(params["device"])
loaded_model.load_state_dict(checkpoint['model_state_dict'])

loaded_criterion = checkpoint['loss']

#optimizer = optim.Adam(model.parameters(), lr = 0.003)
#optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

last_epoch = checkpoint['epoch']+1

test_loss, accuracy, correct, total = test_one_epoch(None, loaded_model, loaded_criterion, test_loader, len_val)

print('Test loss: {:.3f}'.format(test_loss))
print('Test accuracy: {:.1%} ({} of {} right)\n'.format(accuracy, correct, total))
    

Test loss: 31.823
Test accuracy: 70.0% (7 of 10 right)

