In [1]:
#meta 12/12/2020 Kaggle Cassava Model - Inference
#fast.ai 2020 study group Kaggle Competition
#src https://www.kaggle.com/c/cassava-leaf-disease-classification
#prev competition src https://www.kaggle.com/c/cassava-disease
#references https://arxiv.org/pdf/1908.02900.pdf

#input: df_train.pkl, export.pkl
#output: submission.csv
#pwd: /kaggle/working

#history
#1/10/2020 INFERENCE - ALPHA MODEL (POC)
#      Export saved alpha dataframe & model and create submission.csv file
#      Alpha model - manually tuned model
#      POC export a model and make submissions.
#      Successful alpha submit with 0.652 accuracy

#here 1/10/2020 INFERENCE - ALPHA MODEL (POC), REPLACE LOOP. RUNS FINE ON CPU, ERROR ON GPU.
#      Export saved alpha dataframe & model and create submission.csv file
#      Alpha model - manually tuned model
#      Tried replacing FOR LOOP with a list comprehension
#      Errored out: RuntimeError: Input type (torch.cuda.FloatTensor) and weight type (torch.FloatTensor) should be the same
#      Possible fix: move model to GPU -> learn_inf.model.cuda()
#src https://forums.fast.ai/t/runtimeerror-input-type-torch-cuda-floattensor-and-weight-type-torch-floattensor-should-be-the-same/77855


In [2]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python

import os
#import time #to track performance time
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)


#modeling
from fastai.vision.all import *


# Input data files are available in the read-only "../input/" directory
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [3]:
#--constants and variables
SEED = 24
DEBUG = 0

#set paths
path = Path('../input/cassava-leaf-disease-classification')
path_model = Path('../input/my-cassava-model-alpha')
path_data = Path('../input/my-cassava-data-alpha')

#--functions
def get_x(row): return path/'train_images'/row['image_id']
def get_y(row): return row['label']

#--jic
#define label lookup 
def lookup_label(image_id): return int(df_train[df_train['image_id']== image_id]['label'])

#test function
#lookup_label('100042118.jpg')

# Cassava Disease Classification - Inference
##### Objective: Classify pictures of cassava leaves into 1 of 4 disease categories or healthy.

Export saved data & model and create a submission.csv file, with predictions on test images.


## 0. Load Prepared Data and Model

- Input(s): df_train.pkl and export.pkl


In [4]:
#track time
t0 = time.time()

#load prepared dataframe
df_train = torch.load(path_data/'df_train.pkl')

#dataloaders
dblock = DataBlock(blocks = (ImageBlock, CategoryBlock),
                 get_x = get_x,
                 get_y = get_y,
                 splitter=ColSplitter(),
                 item_tfms = CropPad(500), batch_tfms = aug_transforms(mult=2))

dls = dblock.dataloaders(df_train, bs=64)

In [5]:
#check if loaded model exists and load it
if path_model.ls(file_exts='.pkl'):
    print(path_model)
    learn_inf = load_learner(path_model/'export.pkl')
    #move model to GPU
    #src https://forums.fast.ai/t/runtimeerror-input-type-torch-cuda-floattensor-and-weight-type-torch-floattensor-should-be-the-same/77855
    #learn_inf.model.cuda()
else:
    print("No model to export")

../input/my-cassava-model-alpha


### 0.1 Images  
1 test image

In [6]:
#load test images
test_img = get_image_files(path/'test_images')

print("Number of files: ", len(test_img))

Number of files:  1


### 0.2 Disease Labels Mapping

In [7]:
with open((path/'label_num_to_disease_map.json')) as json_file:
    target_map = json.load(json_file)
    
print(target_map)

{'0': 'Cassava Bacterial Blight (CBB)', '1': 'Cassava Brown Streak Disease (CBSD)', '2': 'Cassava Green Mottle (CGM)', '3': 'Cassava Mosaic Disease (CMD)', '4': 'Healthy'}


## 3. Predict
Predict on one file

In [8]:
#view test images - only one
#Image.open(test_img[0])

In [9]:
#predict one, returns a tuple, get 0th item
y_hat = learn_inf.predict(test_img[0])
y_hat[0]

'4'

Test dataset

In [10]:
#create test images id
test_img_id = []

[test_img_id.append(f) for f in test_img]    

#images in test files
test_img_id

[Path('../input/cassava-leaf-disease-classification/test_images/2216849948.jpg')]

In [11]:
test_dl = dls.test_dl(test_img_id)
len(test_dl)

1

## 4. Submit


In [12]:
#template
sample_submission = pd.read_csv(path/'sample_submission.csv')
submission = sample_submission.copy()

#predict
preds,_ = learn_inf.get_preds(dl=test_dl)
#preds

#submission
submission['image_id'] = [ str(item).split("/")[-1] for item in test_img_id]
submission['label'] = preds.argmax(dim=-1).numpy()
submission.head()


Unnamed: 0,image_id,label
0,2216849948.jpg,4


In [13]:
submission.to_csv('submission.csv', index=False)

t1 = time.time()
print ("Inference time (in min)", (time.time() - t0)/60)

Inference time (in min) 0.135518483320872
