In [1]:
### meta 12/12/2020 Kaggle Cassava Model - Inference
#fast.ai 2020 study group Kaggle Competition
#src https://www.kaggle.com/c/cassava-leaf-disease-classification
#prev competition src https://www.kaggle.com/c/cassava-disease
#references https://arxiv.org/pdf/1908.02900.pdf

#input: export.pkl
#output: submission.csv
#pwd: /kaggle/working
#summary: POC export a model and submit predictions on test data.

#prev versions from 1/10/2020 thru 1/13/2020
#history
#1/13/2020 MODEL - REFACTOR POC INFERENCE (CPU&GPU): TEST IMAGES DATA ONLY. NO TTA FOR POC.
#      DEBUG for testing exported mini model, test images data only -> no need for exported data, no need for dls with train data
#      Score 0.793
#      CPU submission time 1:30, GPU time 1:20

#1/13/2020 MODEL - REFACTORED POC INFERENCE (thought GPU): TEST IMAGES DATA ONLY. ADD TTA.
#      DEBUG for testing exported mini model, test images data only, add TTA(n=3)
#      Score 0.807
#      GPU submission time more than 2:15

#1/14/2020 MODEL - REFACTORED POC INFERENCE (really GPU): TEST IMAGES DATA ONLY. LEARNER CPU=False
#      DEBUG for testing exported mini model, test images data only, with TTA(n=3)
#      learn = load_learner(path, cpu=False)
#      Score 0.805
#      GPU submission time 15 min

#1/14/2020 MODEL - REFACTORED POC INFERENCE (GPU): MOBILE EDIT. MORE TTA. DONE WITH POC INFERENCE.
#      DEBUG for testing exported mini model, test images data only, with TTA(n=15)
#      learn = load_learner(path, cpu=False)
#      Score 0.810
#      GPU submission time: fast (submitted via mobile)
#      Done with POC inference.

#here 1/15/2020 MODEL - REFACTORED POC INFERENCE (GPU): DONE WITH POC INFERENCE.
#      2 modes: a) FULL w/exported full model, b) DEBUG w/exported mini model
#      $params: DEBUG = 0/1
#      $params: learn = load_learner($path_model, cpu=False), $TTA params
#               delta=v1 model exported
#               delta: preds, _ = learn_inf.tta(dl=test_dl, n=4, beta=0.25)
#      Score: .879  GPU submission time: 00:15 Rank: n/a
#      Done with POC inference.


In [2]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python

import os
#import time #to track performance time
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

#modeling
from fastai.vision.all import *

# Input data files are available in the read-only "../input/" directory
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [3]:
#--constants and variables
DEBUG = 0 #$params

#set paths
path = Path('../input/cassava-leaf-disease-classification')

#$params
if not DEBUG: 
    path_model = Path('../input/my-cassava-model-v1')
else:
    path_model = Path('../input/my-cassava-mini')

#--functions
def get_x(row): return path/'train_images'/row['image_id']
def get_y(row): return row['label']


# Cassava Disease Classification - Inference
##### Objective: Classify pictures of cassava leaves into 1 of 4 disease categories or healthy.

Export saved model and create a submission.csv file, with predictions on test images.


## 0. Load Prepared Data and Model

- Input(s): export.pkl


In [4]:
#track time
t0 = time.time()

#check if loaded model exists and load it
if path_model.ls(file_exts='.pkl'):
    print(path_model)
    learn_inf = load_learner(path_model/'export.pkl', cpu=False)
    learn_inf.to_native_fp32()
else:
    print("No model to export")

../input/my-cassava-mini


## 3. Predict

In [5]:
##Predict on one file
#view test images - only one
#test_img = get_image_files(path/'test_images')
#Image.open(test_img[0])

#predict one, returns a tuple, get 0th item
#y_hat = learn_inf.predict(test_img[0])
#y_hat[0]

Test dataset

In [6]:
#template
sample_submission = pd.read_csv(path/'sample_submission.csv')
submission = sample_submission.copy()

In [7]:
#create test images id
test_img_id = [os.path.join(path/'test_images', x) for x in submission['image_id'].values]

#images in test files
test_img_id

['../input/cassava-leaf-disease-classification/test_images/2216849948.jpg']

In [8]:
test_dl = learn_inf.dls.test_dl(test_img_id)
print(len(test_dl))

#predict
#or our TTA we will be doing a 15x ensemble similar to what was performed there
preds, _ = learn_inf.tta(dl=test_dl, n=4, beta=0.25) #$params
#preds

#submission
submission['label'] = preds.argmax(dim=-1).numpy()
submission.head()

1


Unnamed: 0,image_id,label
0,2216849948.jpg,4


## 4. Submit


In [9]:
#save to csv
submission.to_csv('submission.csv', index=False)

#total inference time
t1 = time.time()
print ("Inference time (in min)", (time.time() - t0)/60)

Inference time (in min) 0.1756665031115214
