In [None]:
!pip install pretrainedmodels

Collecting pretrainedmodels
[?25l  Downloading https://files.pythonhosted.org/packages/84/0e/be6a0e58447ac16c938799d49bfb5fb7a80ac35e137547fc6cee2c08c4cf/pretrainedmodels-0.7.4.tar.gz (58kB)
[K     |█████▋                          | 10kB 24.5MB/s eta 0:00:01[K     |███████████▏                    | 20kB 1.7MB/s eta 0:00:01[K     |████████████████▊               | 30kB 2.3MB/s eta 0:00:01[K     |██████████████████████▎         | 40kB 2.5MB/s eta 0:00:01[K     |███████████████████████████▉    | 51kB 2.0MB/s eta 0:00:01[K     |████████████████████████████████| 61kB 1.8MB/s 
Collecting munch
  Downloading https://files.pythonhosted.org/packages/cc/ab/85d8da5c9a45e072301beb37ad7f833cd344e04c817d97e0cc75681d248f/munch-2.5.0-py2.py3-none-any.whl
Building wheels for collected packages: pretrainedmodels
  Building wheel for pretrainedmodels (setup.py) ... [?25l[?25hdone
  Created wheel for pretrainedmodels: filename=pretrainedmodels-0.7.4-cp36-none-any.whl size=60962 sha256=3d53c

In [None]:
import numpy as np
import torch
import torchvision
from torchvision import datasets, models, transforms
import torch.utils.data as data
from torch.utils.tensorboard import SummaryWriter
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import time, os, argparse
from copy import copy , deepcopy
import multiprocessing
from torchsummary import summary
from matplotlib import pyplot as plt
from fastai.vision import *

import pretrainedmodels

import warnings
warnings.filterwarnings('ignore')

import ssl
ssl._create_default_https_context = ssl._create_unverified_context

In [None]:
path = Path('/content/drive/My Drive/data/plant_pathology/')

In [None]:
df = pd.read_csv(path/'train.csv')
df.head()

Unnamed: 0,image_id,healthy,multiple_diseases,rust,scab
0,Train_0,0,0,0,1
1,Train_1,0,1,0,0
2,Train_2,1,0,0,0
3,Train_3,0,0,1,0
4,Train_4,1,0,0,0


In [None]:
# Set the train,validation,test directory paths
train_directory = path/'train'
valid_directory = path/'valid'
test_directory = path/'test' #you had to move it images test to another subfolder

In [None]:
# Batch size
bs = 32 
# Number of epochs
num_epochs = 8
# Number of classes
num_classes = 4
# Number of workers
num_cpu = multiprocessing.cpu_count()

In [None]:
# Applying transforms to the data
image_transforms = { 
    'train': transforms.Compose([
        transforms.RandomResizedCrop(size=122, scale=(0.8, 1.0)),
        transforms.RandomRotation(degrees=15),
        transforms.RandomHorizontalFlip(),
        transforms.CenterCrop(size=122),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ]),
    'valid': transforms.Compose([
        transforms.Resize(size=122),
        transforms.CenterCrop(size=122),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ])
}

In [None]:
# Load data from folders
dataset = {
    'train': datasets.ImageFolder(root=train_directory, transform=image_transforms['train']),
    'valid': datasets.ImageFolder(root=valid_directory, transform=image_transforms['valid']),
    'test' : datasets.ImageFolder(root=test_directory, transform=image_transforms['valid']) 
    
}

In [None]:
# Size of train and validation data
dataset_sizes = {
    'train':len(dataset['train']),
    'valid':len(dataset['valid']),
    'test':len(dataset['test'])
}

In [None]:
# Create iterators for data loading
dataloaders = {
    'train':data.DataLoader(dataset['train'], batch_size=bs, shuffle=True,
                            num_workers=num_cpu), 
    'valid':data.DataLoader(dataset['valid'], batch_size=bs, shuffle=False,
                            num_workers=num_cpu),
    'test':data.DataLoader(dataset['test'], batch_size=bs, shuffle=False,
                            num_workers=num_cpu),
}

In [None]:
# Class names or target labels
class_names = dataset['train'].classes
print("Classes:", class_names)

Classes: ['healthy', 'multiple', 'rust', 'scab']


In [None]:
# Print the train and validation data sizes
print("Training-set size:",dataset_sizes['train'],
      "\nValidation-set size:", dataset_sizes['valid'],
      "\nTest-set size:", dataset_sizes['test'],)

Training-set size: 1457 
Validation-set size: 364 
Test-set size: 1821


In [None]:
# Set default device as gpu, if available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
# Load a pretrained model - PNASNet-5-Large
#https://github.com/Cadene/pretrained-models.pytorch#torchvision
model = pretrainedmodels.cafferesnet101(num_classes=1000,pretrained='imagenet')
in_features = model.last_linear.in_features #check how many ipnuts in last layer - so we can use it for last layer
model.last_linear = nn.Linear(in_features, 4) #We put input features here, and indicate that we have 4 classes, that's what we want to get
# Test few more pretrained model - PolyNet, InceptionV4

Downloading: "http://data.lip6.fr/cadene/pretrainedmodels/cafferesnet101-9d633cc0.pth" to /root/.cache/torch/checkpoints/cafferesnet101-9d633cc0.pth


HBox(children=(FloatProgress(value=0.0, max=178691422.0), HTML(value='')))




In [None]:
# Loss function
criterion = nn.CrossEntropyLoss()

In [None]:
# Optimizer 
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [None]:
# Learning rate decay
step_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1) #Changes learninig rate in learning process

Typical pytorch pipline consists of 3 steps:



1.   Design model (input & output size, forward pass)
2.   Construct loss & optimizer
3.   Traininig loop
  *   Forward pass: compute prediction
  *   Backward pass: gradients
  *   update weights (iterate until we're done)

In [None]:
import copy
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print(f'Epoch {epoch+1}/{num_epochs}')
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'valid']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                #inputs = inputs.to(device) #not needed on colab
                #labels = labels.to(device) #not needed on colab

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        optimizer.zero_grad()
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print(f'{phase} Loss: {epoch_loss} Acc: {epoch_acc}')

            # deep copy the model
            if phase == 'valid' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print() #empty line

    time_elapsed = time.time() - since
    print(f'Training complete in {time_elapsed // 60} min, {time_elapsed % 60} secs')
    print(f'Best valid Acc: {best_acc}')

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

In [None]:
train_model(model,criterion,optimizer,step_lr_scheduler, 30)

Epoch 1/30
----------


RuntimeError: ignored

In [None]:
submissions_df = pd.read_csv(path/'sample_submission.csv')

In [None]:
def prediction(model,dataloader):
    model.eval()
    test_preds = None

    for image, labels in dataloader:
        with torch.no_grad():
            out = model(image)
            if test_preds is None:
                test_preds = out.data.cpu()
            else:
                test_preds = torch.cat((test_preds, out.data.cpu()), dim=0)
    return test_preds


In [None]:
test_pred = prediction(model,dataloaders['test'])

In [None]:
submissions_df[['healthy', 'multiple_diseases', 'rust', 'scab']] = torch.softmax(test_pred, dim=1)

In [None]:
submissions_df.to_csv(path/"pytorch_polynet_30epochs.csv",index=False)
#Accuracy 52.09%
#train more epochs, unfreeze + train
#try another models

In [None]:
# Load a pretrained model - PNASNet-5-Large
#https://github.com/Cadene/pretrained-models.pytorch#torchvision
model_polynet = pretrainedmodels.polynet(num_classes=1000,pretrained='imagenet')
in_features = model_polynet.last_linear.in_features #check how many ipnuts in last layer - so we can use it for last layer
model_polynet.last_linear = nn.Linear(in_features, 4) #We put input features here, and indicate that we have 4 classes, that's what we want to get
# Test few more pretrained model - PolyNet, InceptionV4

In [None]:
train_model(model,criterion,optimizer,step_lr_scheduler, 30)

In [None]:
train_model(model,criterion,optimizer,step_lr_scheduler, 4)

Epoch 1/4
----------
train Loss: 1.4007999069001793 Acc: 0.21002059025394645
valid Loss: 1.425513431266114 Acc: 0.14835164835164835

Epoch 2/4
----------
train Loss: 1.3994295453894359 Acc: 0.2189430336307481
valid Loss: 1.420750202713432 Acc: 0.15934065934065933

Epoch 3/4
----------
train Loss: 1.3967338881430433 Acc: 0.21413864104323954
valid Loss: 1.4091647941987593 Acc: 0.20054945054945056

Epoch 4/4
----------
train Loss: 1.3988279523142577 Acc: 0.2052161976664379
valid Loss: 1.4210141216005598 Acc: 0.15384615384615385

Training complete in 23.0 min, 6.840906858444214 secs
Best valid Acc: 0.20054945054945056


Xception(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
  (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), bias=False)
  (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (block1): Block(
    (skip): Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2), bias=False)
    (skipbn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (rep): Sequential(
      (0): SeparableConv2d(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=64, bias=False)
        (pointwise): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
      )
      (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
      (3): SeparableConv2d(
        (conv1): Co

In [None]:
test_pred = prediction(model_polynet,dataloaders['test'])

submissions_df[['healthy', 'multiple_diseases', 'rust', 'scab']] = torch.softmax(test_pred, dim=1)

submissions_df.to_csv(path/"pytorch_polynet_30epochs.csv",index=False)

In [None]:
# Load a pretrained model - PNASNet-5-Large
#https://github.com/Cadene/pretrained-models.pytorch#torchvision
model = pretrainedmodels.xception(num_classes=1000,pretrained='imagenet')
in_features = model.last_linear.in_features #check how many ipnuts in last layer - so we can use it for last layer
model.last_linear = nn.Linear(in_features, 4) #We put input features here, and indicate that we have 4 classes, that's what we want to get
# Test few more pretrained model - PolyNet, InceptionV4

Downloading: "http://data.lip6.fr/cadene/pretrainedmodels/xception-43020ad28.pth" to /root/.cache/torch/checkpoints/xception-43020ad28.pth


HBox(children=(FloatProgress(value=0.0, max=91675053.0), HTML(value='')))




In [None]:
def prediction(model,dataloader):
    model.eval()
    test_preds = None

    for image, labels in dataloader:
        with torch.no_grad():
            out = model(image)
            if test_preds is None:
                test_preds = out.data.cpu()
            else:
                test_preds = torch.cat((test_preds, out.data.cpu()), dim=0)
    return test_preds

test_pred = prediction(model,dataloaders['test'])

submissions_df[['healthy', 'multiple_diseases', 'rust', 'scab']] = torch.softmax(test_pred, dim=1)

submissions_df.to_csv(path/"pytorch_resnet18.csv",index=False)