## Lesson 1 of fastai

In [1]:
# Put these at the top of every notebook, to get automatic reloading and inline plotting
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import os

from fastai.transforms import *
from fastai.conv_learner import *
from fastai.model import *
from fastai.dataset import *
from fastai.sgdr import *
from fastai.plots import *

from matplotlib import patches, patheffects
import pandas as pd

In [3]:
print('cuda available: ', torch.cuda.is_available())
print('cudnn enabled : ',torch.backends.cudnn.enabled)

cuda available:  True
cudnn enabled :  True


In [4]:
homedir = os.path.expanduser('~')
# datapath = Path(os.path.join(homedir,'data/kaggle/dogs-vs-cats-redux-kernels-edition'))
datapath = Path(os.path.join(homedir,'data/kaggle/dogs-vs-cats-redux-kernels-edition/dogscats'))
train_files = list(datapath.joinpath('train').iterdir())


"""moves the images to 'classname/filename'

Arguments:
    src: source directory (string)
    dst: destination directory (string)
"""
def move_to_folders(src,dst,classes):
    #create folder if they do not exist
    for class_name in classes:
        class_folder = os.path.join(src, class_name)
        if not os.path.isdir(class_folder):
            os.makedirs(class_folder)
            print('created folder - ', class_name)
    
    #loop through files
    for name in os.listdir(src):
        file_path = os.path.join(src, name)
        #ignore if not file
        if not os.path.isfile(file_path):
            continue
        
        #move to the matching class folder
        for class_name in classes:
            if class_name in name:
                shutil.move(file_path,os.path.join(dst, class_name, name))
                break
                

""" Create validation data

Arguments:
    datadir: base directory of type pathlib.path
    k: no of files in each class
"""
def create_val_set(datadir,k,classes):
    traindir = datadir.joinpath('train')
    valdir = datadir.joinpath('valid')
    
    #create validation directory
    if not os.path.isdir(str(valdir)):
        os.makedirs(valdir)
    
    #check number of files
    for class_name in classes:
        pass
        #incomplete

#move train images to class folders
# traindir = datapath.joinpath('train')
# classes = ['cat','dog']
# move_to_folders(str(traindir), str(traindir.as_posix()),classes)

#create validation set
# valid_dir = datapath.joinpath('valid')

In [5]:
def show_image(image, figsize=None, ax=None):
    if not ax:
        fig, ax = plt.subplots(figsize=figsize)
        
    ax.imshow(image)
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
    return ax

def show_images(images,nrows=3,ncols=3,figsize=(3,3)):
    fig, axes = plt.subplots(nrows, ncols)
    for idx, ax in enumerate(axes.flat):
        if idx >= len(images):
            break
        show_image(images[idx],figsize=figsize,ax=ax)

# imgs = [open_image(o.as_posix()) for o in random.choices(train_files,k=9)]
# show_images(imgs,figsize=(8,8))

In [6]:
arch = resnet34
sz=224
print(datapath)

data = ImageClassifierData.from_paths(datapath, tfms=tfms_from_model(arch, sz))
learn = ConvLearner.pretrained(arch, data, precompute=True)
learn.fit(0.01,n_cycle=2)

/home/achinta/data/kaggle/dogs-vs-cats-redux-kernels-edition/dogscats


HBox(children=(IntProgress(value=0, description='Epoch', max=2), HTML(value='')))

epoch      trn_loss   val_loss   accuracy                     
    0      0.059089   0.028947   0.99      
    1      0.041674   0.02709    0.9905                        



[array([0.02709]), 0.9905]

In [None]:
data.classes

In [None]:
log_preds = learn.predict()
log_preds.shape

In [None]:
preds = np.argmax(log_preds,axis=1)
probs = np.exp(log_preds[:,1])
len(probs)

In [7]:
#return random image indexes with the class 'mask'
def rand_by_mask(mask):
    return np.random.choice(np.where(mask)[0],4,replace=False)

#get random images (where prediction is correct/incorrect)
def rand_by_correct(preds, y,is_correct):
    return rand_by_mask((preds == y)==is_correct)

def plot_val_with_title(idxs, title):
    imgs = np.stack([data.val_ds[x][0] for x in idxs])
    title_probs = [probs[x] for x in idxs]
    print(title)
    return plots(data.val_ds.denorm(imgs),rows=1, titles=title_probs)

def draw_outline(o, lw):
    o.set_path_effects([patheffects.Stroke(linewidth=lw, foreground='black'), patheffects.Normal()])

def draw_text(ax, xy, text, size=14):
    text = ax.text(*xy,text,verticalalignment='top',color='white',fontsize=size,weight='bold')
    draw_outline(text,1)

def plots(imgs,figsize=(12,6),rows=3,cols=3, titles=None):
    fig, axes = plt.subplots(rows, cols)
    for idx, ax in enumerate(axes.flat):
        if idx >= rows*cols:
            break
        ax.imshow(imgs[idx])
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)
        draw_text(ax,(0,0),titles[idx])    
        
def most_by_mask(mask, mult):
    idxs = np.where(mask)[0]
    return idxs[np.argsort(mult * probs[idxs])[:4]]

def most_by_correct(y, is_correct): 
    mult = -1 if (y==1)==is_correct else 1
    return most_by_mask(((preds == data.val_y)==is_correct) & (data.val_y == y), mult)

In [None]:
plot_val_with_title(rand_by_correct(preds, data.val_y, True),"Correctly Classified")

In [None]:
plot_val_with_title(rand_by_correct(preds, data.val_y, False),"Wrongly Classified")

### Choosing a learning rate

In [None]:
learn = ConvLearner.pretrained(arch, data, precompute=True)
lrf = learn.lr_find()

In [None]:
learn.sched.plot_lr()

In [None]:
learn.sched.plot()

In [8]:
tfms = tfms_from_model(resnet34, sz, aug_tfms=transforms_side_on,max_zoom=1.1)
data = ImageClassifierData.from_paths(datapath, tfms=tfms,test_name='test1')
# data = ImageClassifierData.from_paths(datapath, tfms=tfms_from_model(arch, sz))
learn = ConvLearner.pretrained(arch, data)

In [None]:
learn.fit(1e-2, 1)

In [None]:
learn.precompute=False
learn.fit(1e-2, 3, cycle_len=1)

In [None]:
learn.sched.plot_lr()

In [None]:
learn.unfreeze()

In [None]:
lr = np.array([1e-4,1e-3,1e-2])

In [None]:
learn.fit(lr,3, cycle_len=1,cycle_mult=2)

In [None]:
#learn.save('cats_dogs_diff_lr')

In [9]:
learn.unfreeze()
learn.load('cats_dogs_diff_lr')

In [10]:
preds = learn.predict(is_test=True)

### get the test file indexes

In [11]:
files = os.listdir(datapath.joinpath('test1'))
files = [o.split('.')[0] for o in files]
files[:3]

['3883', '3739', '1044']

In [12]:
probs = np.exp(preds[:,1])
df = pd.DataFrame({'id':files,'label':probs})
df.to_csv('submit.csv',index=False)

In [13]:
!kaggle competitions submit -c dogs-vs-cats-redux-kernels-edition -f submit.csv -m 'No float correction'

Successfully submitted to Dogs vs. Cats Redux: Kernels Edition

This resulted in an kaggle score of 0.06900 (position 161/1314