<a href="https://colab.research.google.com/github/aysunakarsu/kaggle/blob/master/Kaggle_oxford_102_flower_pytorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

This notebook demonstrates how to test a flower classifier and produces a submission.csv file for submitting to the kaggle competition 
https://www.kaggle.com/c/oxford-102-flower-pytorch. The notebook is directly accessible on google colab. The classifier is trained elsewhere, in this notebook it will be loaded from a saved checkpoint on google drive. The kaggle competition uses the same datasets as in Udacity Facebook Pytorch Challenge final project.  The data originally comes from http://www.robots.ox.ac.uk/~vgg/data/flowers/102/. There are 102 flower categories, and the test dataset contains 819 test images. For this competition the labels of the test dataset are not provided.

In [2]:

!pip install -q http://download.pytorch.org/whl/{accelerator}/torch-0.4.1-{platform}-linux_x86_64.whl torchvision


tcmalloc: large alloc 1073750016 bytes == 0x585f8000 @  0x7f5a8af902a4 0x591a07 0x5b5d56 0x502e9a 0x506859 0x502209 0x502f3d 0x506859 0x504c28 0x502540 0x502f3d 0x506859 0x504c28 0x502540 0x502f3d 0x506859 0x504c28 0x502540 0x502f3d 0x507641 0x502209 0x502f3d 0x506859 0x504c28 0x502540 0x502f3d 0x507641 0x504c28 0x502540 0x502f3d 0x507641


In [3]:
!pip install --no-cache-dir -I pillow
%matplotlib inline
import time
import os
import json
import copy
from PIL import Image
from collections import OrderedDict
import torch
from torch import nn, optim
from torch.optim import lr_scheduler
from torch.autograd import Variable
from torchvision import datasets, models, transforms
from google.colab import files
import numpy as np
import pandas as pd

Collecting pillow
[?25l  Downloading https://files.pythonhosted.org/packages/85/5e/e91792f198bbc5a0d7d3055ad552bc4062942d27eaf75c3e2783cf64eae5/Pillow-5.4.1-cp36-cp36m-manylinux1_x86_64.whl (2.0MB)
[K    100% |████████████████████████████████| 2.0MB 46.2MB/s 
[?25hInstalling collected packages: pillow
Successfully installed pillow-5.4.1


In [13]:
import PIL
print(PIL.PILLOW_VERSION)

4.0.0


In [0]:
!pip install -q kaggle

In [0]:
!mkdir -p ~/.kaggle

In [0]:
!cp kaggle.json ~/.kaggle/
!chmod 600 /root/.kaggle/kaggle.json

In [7]:
!kaggle competitions download -c oxford-102-flower-pytorch

Downloading sample_submission.csv to /content
  0% 0.00/15.2k [00:00<?, ?B/s]
100% 15.2k/15.2k [00:00<00:00, 13.3MB/s]
Downloading flower_data.zip to /content
100% 329M/330M [00:13<00:00, 33.4MB/s]
100% 330M/330M [00:13<00:00, 25.0MB/s]


In [8]:
!unzip flower_data.zip

Archive:  flower_data.zip
  inflating: flower_data/cat_to_name.json  
   creating: flower_data/test/
  inflating: flower_data/test/image_00005.jpg  
  inflating: flower_data/test/image_00006.jpg  
  inflating: flower_data/test/image_00024.jpg  
  inflating: flower_data/test/image_00025.jpg  
  inflating: flower_data/test/image_00029.jpg  
  inflating: flower_data/test/image_00050.jpg  
  inflating: flower_data/test/image_00060.jpg  
  inflating: flower_data/test/image_00065.jpg  
  inflating: flower_data/test/image_00092.jpg  
  inflating: flower_data/test/image_00099.jpg  
  inflating: flower_data/test/image_00114.jpg  
  inflating: flower_data/test/image_00116.jpg  
  inflating: flower_data/test/image_00132.jpg  
  inflating: flower_data/test/image_00176.jpg  
  inflating: flower_data/test/image_00177.jpg  
  inflating: flower_data/test/image_00187.jpg  
  inflating: flower_data/test/image_00191.jpg  
  inflating: flower_data/test/image_00202.jpg  
  inflating: flower_data/test/image

In [0]:
!mkdir ./flower_data/test/testdir

In [0]:
!cp ./flower_data/test/*.jpg ./flower_data/test/testdir/

In [12]:
train_on_gpu = torch.cuda.is_available()
if not train_on_gpu:
    print('Not your lucky day! Training on CPU ...')
else:
    print('Yay! Training on GPU ...')

Yay! Training on GPU ...


In [0]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [0]:
!pip install -U -q PyDrive
!git clone https://gist.github.com/dc7e60aa487430ea704a8cb3f2c5d6a6.git /tmp/colab_util_repo
!mv /tmp/colab_util_repo/colab_util.py colab_util.py 
!rm -r /tmp/colab_util_repo

In [0]:
from google.colab import drive
drive.mount('/content/drive')

In [0]:
def load_model(filepath):
    checkpoint = torch.load(filepath)
    model = models.densenet161(pretrained=False)
    for param in model.parameters():
        param.requires_grad = False

    # Put the classifier on the pretrained network
    model.classifier = checkpoint['classifier']
    model.load_state_dict(checkpoint['state_dict'], strict=False)
    model.eval() 
    
    return model

In [0]:
model = load_model('/content/drive/My Drive/densenet161_checkpoint.pth')

In [0]:
class ImageFolderWithPaths(datasets.ImageFolder):
    """Custom dataset that includes image file paths. Extends
    torchvision.datasets.ImageFolder
    """

    # override the __getitem__ method. this is the method dataloader calls
    def __getitem__(self, index):
        # this is what ImageFolder normally returns 
        original_tuple = super(ImageFolderWithPaths, self).__getitem__(index)
        # the image file path
        path = self.imgs[index][0]
        # make a new tuple that includes original and the path
        tuple_with_path = (original_tuple + (path,))
        return tuple_with_path

In [0]:
data_dir = './flower_data'
data_transforms = {
    'valid': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], 
                             [0.229, 0.224, 0.225])
    ]),
     'test': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], 
                             [0.229, 0.224, 0.225])
    ])
    
}

batch_size = 32

image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                                          data_transforms[x])
                  for x in ['valid']}

image_datasets['test'] =  ImageFolderWithPaths(os.path.join(data_dir, 'test'),
                                          data_transforms['test'])                                   
                                              
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=batch_size, shuffle=True, num_workers=4) for x in ['valid']}

dataloaders['test'] = torch.utils.data.DataLoader(image_datasets['test'], batch_size=batch_size, shuffle=False, num_workers=0)


dataset_sizes = {x: len(image_datasets[x]) for x in ['valid','test']}

print("Datasets load has finished:")
print("\tNumber of validation images:{}".format(dataset_sizes['valid']))
print("\tNumber of test images:{}".format(dataset_sizes['test']))

In [0]:
def kaggle_prep(k_model,k_device, k_idx_to_class, k_dataloaders, phase):
    predictions = []
    image_names = []
    
    # Requires use of test_dataloader as it has paths component
    
    k_model.eval()
    k_model.to(device)   

    with torch.no_grad():
        for data, target, paths in k_dataloaders[phase]:
            
            data, target = data.to(device), target.to(device)
            output = model(data)
            _, pred = torch.max(output, 1)
            for idx in range(len(target.data)):            
                image_names.append(paths[idx].split('/')[-1])
                predictions.append(idx_to_class[pred[idx].item()])
             
   
    return  np.vstack((image_names, predictions))

In [0]:
idx_to_class = {v: k for k, v in image_datasets['valid'].class_to_idx.items()}
print(idx_to_class)
kaggle_test_images = kaggle_prep(model, device,idx_to_class, dataloaders,'test')

In [0]:
import pandas as pd
df = pd.DataFrame({'file_name': kaggle_test_images[0], 'id': kaggle_test_images[1]})
print (df)
df.to_csv('submission_02.csv' ,index=False)
df.describe()