<a href="https://colab.research.google.com/github/alehpineda/xray_pneumonia_detection/blob/master/Week_04_Transfer_Learning_with_Skorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Trasnfer Learning with Skorch

Skorch makes posible to use PyTorch with sklearn

[Skorch documentation](https://skorch.readthedocs.io/en/stable/)

[Skorch - Transferlearning tutorial](https://github.com/skorch-dev/skorch/blob/master/notebooks/Transfer_Learning.ipynb)

## Goals:

- Retrain the 'inception v3' image classifier on the pneumonia dataset using __Skorch__. 

- Create a jupyter notebook of your script and be sure to show the training process as well as 2 examples of it making predictions on images from the testing dataset. 

In [10]:
# Install torchvision and skorch
! [ ! -z "$COLAB_GPU" ] && pip install torch torchvision pillow==4.1.1 skorch




In [0]:
# Install all the libreries
import os
from urllib import request
from zipfile import ZipFile
import gc, os, sys, shutil
import json

import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from torchvision import datasets, models, transforms

from skorch import NeuralNetClassifier
from skorch.helper import predefined_split

torch.manual_seed(360);


In [12]:
# Load your kaggle key json
from google.colab import files
uploaded = files.upload()


In [0]:
# Read the json
file_name = 'kaggle.json'
with open(file_name, 'r') as f:
    kaggle = json.load(f)


In [0]:
# Load your id from the kaggle.json
os.environ['KAGGLE_USERNAME'] = kaggle['username']
os.environ['KAGGLE_KEY'] = kaggle['key']


In [0]:
# Delete folders if exist
def delete_folder(folder_path, folder_name):

  dirpath = os.path.join(folder_path, folder_name)
  if os.path.exists(dirpath) and os.path.isdir(dirpath):
      shutil.rmtree(dirpath)

delete_folder('','__MACOSX')
delete_folder('','chest_xray')

# Delete file if exist
try:
    os.remove('chest_xray.zip')
except OSError:
    pass


In [19]:
# Download the dataset
!kaggle datasets download -d paultimothymooney/chest-xray-pneumonia


404 - Not Found


In [17]:
# Unzip all the files
!unzip -q chest-xray-pneumonia.zip
!unzip -q chest_xray.zip


unzip:  cannot find or open chest-xray-pneumonia.zip, chest-xray-pneumonia.zip.zip or chest-xray-pneumonia.zip.ZIP.
unzip:  cannot find or open chest_xray.zip, chest_xray.zip.zip or chest_xray.zip.ZIP.


In [18]:
# auxiliary functions to move the pictures to new classes
# Rearrange the pneumonia pictures to create another 2 classes
# (PNEUMONIA_BACTERIA, PNEUMONIA_VIRUS)

# Function to create dir
def create(dirname, destpath):
    full_path = os.path.join(destpath, dirname)
    # If the path is a dir return the path as a string
    if os.path.isdir(full_path):
      #print(dirname + ' dir already exist')
      return full_path
    # Else creathe a dir and return the path as a string
    else:
      os.mkdir(full_path)
      #print(dirname + ' dir created')
      return full_path

def create_dir_mv_files(srcpath, destpath):
  # Check if srcpath exist
  if not os.path.isdir(srcpath):
    #print(srcpath + ' does not exist')
    return 0
  # Delete ds_store
  try:
      os.remove(srcpath+'/.DS_Store')
      #print('.DS_Store deleted')
  except OSError:
      print('.DS_Store already deleted')
  
  # List origin files
  srcfiles = os.listdir(srcpath)
  # List destined directories
  destdirs = list(set([filename.split('_')[1] for filename in srcfiles]))
  # Rename list and uppercase the name
  destdirs = ['PNEUMONIA_'+x.upper() for x in destdirs]

  def move(filename, dirpath):
      # Move the file to the destined dir
      shutil.move(os.path.join(srcpath, filename), dirpath)

  # create destination directories and store their names along with full paths
  targets = [(folder, create(folder, destpath)) for folder in destdirs]

  for dirname, full_path in targets:
      for filename in srcfiles:
          if dirname.split('_')[1].lower() == filename.split('_')[1]:
              move(filename, full_path)
  
  os.rmdir(srcpath)
  #print('Files moved with success from',srcpath,'to',destpath)

# Move the train set
srcpath_train = './chest_xray/train/PNEUMONIA'
destpath_train = './chest_xray/train/'
create_dir_mv_files(srcpath_train, destpath_train)

# Move the test set
srcpath_test = './chest_xray/test/PNEUMONIA'
destpath_test = './chest_xray/test/'
create_dir_mv_files(srcpath_test, destpath_test)

# Move the validation set
srcpath_val = './chest_xray/val/PNEUMONIA'
destpath_val = './chest_xray/val/'
create_dir_mv_files(srcpath_val, destpath_val)

# So that the Validation set has also 3 classes
create('PNEUMONIA_VIRUS', destpath_val)


FileNotFoundError: ignored

In [0]:
# Save the dir routes
train_dir = './chest_xray/train'
test_dir = './chest_xray/test'
val_dir = './chest_xray/val'

In [0]:
# Create the training and validation datasets
data_dir = './chest_ray'
# Train
train_transforms = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], 
                         [0.229, 0.224, 0.225])
])
# Validation
val_transforms = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], 
                         [0.229, 0.224, 0.225])
])

train_ds = datasets.ImageFolder(
    os.path.join(data_dir, 'train'), train_transforms)
val_ds = datasets.ImageFolder(
    os.path.join(data_dir, 'test'), val_transforms)


In [0]:
# Load pretrained model
class PretrainedModel(nn.Module):
    def __init__(self, output_features):
        super().__init__()
        model = models.inception_v3(pretrained=True)
        num_ftrs = model.fc.in_features
        model.fc = nn.Linear(num_ftrs, output_features)
        self.model = model
        
    def forward(self, x):
        return self.model(x)


In [0]:
# Create a LRScheduler Callback
from skorch.callbacks import LRScheduler

lrscheduler = LRScheduler(
    policy='StepLR', step_size=7, gamma=0.1)


In [0]:
# Create a Checkpoint callback
from skorch.callbacks import Checkpoint

checkpoint = Checkpoint(
    f_params='best_model.pt', monitor='valid_acc_best')


In [0]:
# Create a Freezer to freeze all the layers besides the final layer, named model.fc
from skorch.callbacks import Freezer

freezer = Freezer(lambda x: not x.startswith('model.fc'))


In [0]:
# Define the NeutralNetClassifier
net = NeuralNetClassifier(
    PretrainedModel, 
    criterion=nn.CrossEntropyLoss,
    lr=0.001,
    batch_size=64,
    max_epochs=10,
    module__output_features=3,
    optimizer=optim.SGD,
    optimizer__momentum=0.9,
    iterator_train__shuffle=True,
    iterator_train__num_workers=4,
    iterator_valid__shuffle=True,
    iterator_valid__num_workers=4,
    train_split=predefined_split(val_ds),
    callbacks=[lrscheduler, checkpoint, freezer],
    device='cuda' # comment to train on cpu
)


In [0]:
# Fit the model
net.fit(train_ds, y=None);