In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
# from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_curve, auc
from tqdm import tqdm_notebook
from fastai import *
from fastai.vision import *
from fastai.callbacks import SaveModelCallback
from torchvision.models import *
import os
import cv2

print(os.listdir("../input"))

In [None]:
train_path = '../input/train/train/'
test_path = '../input/test/test/'

In [None]:
data = pd.read_csv("../input/train.csv")

In [None]:
train_names = data.values
train_labels = data['has_cactus'].values

In [None]:
# skf = StratifiedKFold(n_splits=5, random_state=321)

In [None]:
# val = []
# for tr_idx, val_idx in skf.split(train_names, train_labels):
#     val.append(val_idx)

In [None]:
tr_n, val_n, tr_idx, val_idx = train_test_split(train_names, range(len(train_names)), test_size=0.3, stratify=train_labels, random_state=321)

In [None]:
arch = resnet18
batch_size = 256
model_path = str(arch).split()[1]

In [None]:
test_names = []
for name in os.listdir(test_path):
    test_names.append(name)

df_test = pd.DataFrame(np.asarray(test_names), columns= ['id'])

In [None]:
imgDataBunch = (ImageList.from_df(data, train_path)
    .split_by_idx(val_idx)
    .label_from_df(cols='has_cactus')
    .add_test(ImageList.from_df(df_test, test_path))
    .transform(tfms=get_transforms(flip_vert=True), size=32)
    .databunch(bs=batch_size)
    .normalize(imagenet_stats)
)

In [None]:
imgDataBunch.show_batch(rows=2, figsize=(4, 4))

In [None]:
def getLearner():
    return cnn_learner(imgDataBunch, arch, pretrained=True, path='.', metrics=accuracy, ps=0.5, callback_fns=ShowGraph)

learner = getLearner()

In [None]:
learner.recorder.plot(suggestion=True)

In [None]:
learner.fit_one_cycle(5, 1e-2)

In [None]:
learner.recorder.plot_losses()

In [None]:
interp = ClassificationInterpretation.from_learner(learner)
interp.plot_confusion_matrix(title='Confusion matrix')

In [None]:
def cal_auc_and_plot(learner):
    preds, y = learner.get_preds()
    probs = np.exp(preds[:,1])
    fpr, tpr, thresholds = roc_curve(y, probs, pos_label=1)
    roc_auc = auc(fpr, tpr)
    plt.figure()
    plt.plot(fpr, tpr, color='darkorange', label='ROC curve (area = %0.2f)' % roc_auc)
    plt.plot([0, 1], [0, 1], color='navy', linestyle='--')
    plt.xlim([-0.01, 1.0])
    plt.ylim([0.0, 1.01])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver operating characteristic')
    plt.legend(loc="lower right")
    return roc_auc

In [None]:
stage1_auc = cal_auc_and_plot(learner)
stage1_auc

In [None]:
learner.save(model_path + '_stage1')

In [None]:
learner.load(model_path + '_stage1')

In [None]:
learner.unfreeze()

In [None]:
learner.lr_find()

In [None]:
learner.recorder.plot(suggestion=True)

In [None]:
learner.fit_one_cycle(10, max_lr=slice(1e-5, 1e-4), callbacks=[SaveModelCallback(learner, name='stage2')])

In [None]:
learner.load('stage2')

In [None]:
learner.recorder.plot_losses()

In [None]:
interp = ClassificationInterpretation.from_learner(learner)
interp.plot_confusion_matrix(title='Confusion matrix')

In [None]:
stage2_auc = cal_auc_and_plot(learner)

In [None]:
if stage2_auc < stage1_auc:
    learner.load(model_path + '_stage1')

## TTA

In [None]:
preds_test, y_test = learner.TTA(ds_type=DatasetType.Test)

In [None]:
cactus_preds = preds_test[:, 1]
cactus_preds = cactus_preds.tolist()

In [None]:
SAMPLE_SUB = '/kaggle/input/sample_submission.csv'
sample_df = pd.read_csv(SAMPLE_SUB)
sample_list = list(sample_df.id)

In [None]:
pred_list = [p for p in cactus_preds]

# To know the id's, we create a dict of id:pred
pred_dic = dict((key, value) for (key, value) in zip(learner.data.test_ds.items, pred_list))

In [None]:
pred_list_cor = [pred_dic[test_path + idx] for idx in sample_list]

In [None]:
# Next, a Pandas dataframe with id and label columns.
df_sub = pd.DataFrame({'id':sample_list,'has_cactus':pred_list_cor})

# Export to csv
df_sub.to_csv('{0}_submission.csv'.format(model_path), header=True, index=False)

In [None]:
df_sub.head()