In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
from fastai.imports import *
from fastai.transforms import *
from fastai.conv_learner import *
from fastai.model import *
from fastai.dataset import *
from fastai.sgdr import *
from fastai.plots import *

In [3]:
PATH = 'data/plant_classification/'

In [4]:
arch = resnet50

In [5]:
def get_data(sz=100, bs=64):
    tfms = tfms_from_model(arch, sz, aug_tfms=transforms_top_down, max_zoom=1.1)
    data = ImageClassifierData.from_paths(PATH, bs=bs, tfms=tfms, test_name='test')
    return data

In [6]:
learn = ConvLearner.pretrained(arch, get_data())

In [7]:
lr = 1e-2
lrs = np.array([lr/9, lr/3, lr])

In [8]:
learn.fit(lr, 3, cycle_len=1)

epoch      trn_loss   val_loss   accuracy                 
    0      1.643471   1.177781   0.610417  
    1      1.367115   0.987149   0.668229                 
    2      1.199182   0.872503   0.697917                 



[0.8725026, 0.6979166686534881]

In [9]:
learn.unfreeze()
learn.fit(lrs, 3, cycle_len=1, cycle_mult=2)
learn.save('150_v4')

epoch      trn_loss   val_loss   accuracy                  
    0      0.682542   0.449264   0.842708  
    1      0.449983   0.242462   0.914062                  
    2      0.30717    0.177566   0.939062                  
    3      0.297701   0.179533   0.929688                  
    4      0.248524   0.122235   0.957812                  
    5      0.185652   0.099967   0.964062                  
    6      0.164574   0.099029   0.967188                  



In [10]:
learn.set_data(get_data(250))
learn.freeze()
learn.fit(lr, 3, cycle_len=1)

epoch      trn_loss   val_loss   accuracy                  
    0      0.358936   0.250336   0.916667  
    1      0.285731   0.227374   0.921354                  
    2      0.255433   0.217117   0.927604                  



[0.21711692, 0.9276041686534882]

In [11]:
learn.unfreeze()
learn.fit(lrs, 3, cycle_len=1, cycle_mult=2)
learn.save('250_v4')

epoch      trn_loss   val_loss   accuracy                  
    0      0.213299   0.13944    0.954167  
    1      0.171639   0.105602   0.960938                  
    2      0.124713   0.08888    0.976562                  
    3      0.132706   0.091723   0.970312                  
    4      0.103488   0.070506   0.972917                  
    5      0.077904   0.046792   0.9875                     
    6      0.069373   0.051079   0.9875                     



In [8]:
learn.load('250_v4')
learn.set_data(get_data(400, 45))
learn.freeze()
learn.fit(lr, 3, cycle_len=1)

epoch      trn_loss   val_loss   accuracy                     
    0      0.098995   0.062962   0.987302  
    1      0.085703   0.060021   0.985714                     
    2      0.086866   0.058244   0.987302                     



[0.058244042, 0.9873015880584717]

In [9]:
learn.set_data(get_data(400, 20))
learn.unfreeze()
learn.fit(lrs, 3, cycle_len=1, cycle_mult=2)
learn.save('400_v4')

epoch      trn_loss   val_loss   accuracy                    
    0      0.216132   0.120531   0.956667  
    1      0.215294   0.123722   0.963333                    
    2      0.118022   0.074163   0.985                       
    3      0.20882    0.157494   0.951667                    
    4      0.162244   0.056359   0.986667                    
    5      0.086652   0.051035   0.99                         
    6      0.07446    0.042237   0.99                         



In [10]:
learn.freeze()
learn.fit(lr, 3, cycle_len=3)

epoch      trn_loss   val_loss   accuracy                     
    0      0.045656   0.038337   0.99      
    1      0.055991   0.033197   0.99                         
    2      0.040366   0.034715   0.99                         
    3      0.050763   0.030847   0.99                         
    4      0.044116   0.02999    0.99                         
    5      0.046029   0.031742   0.99                         
    6      0.047259   0.031147   0.99                         
    7      0.042571   0.026043   0.99                         
    8      0.047502   0.025312   0.99                         



[0.02531221, 0.9899999976158143]

In [11]:
learn.save('400_v4_.989')

In [12]:
log_preds,y = learn.TTA(is_test=True)
probs = np.mean(np.exp(log_preds),0)

                                             

## Create csv with results

In [13]:
classes = learn.data.classes
choices = np.argmax(probs, axis=1)
# Returns the indices of the maximum values along an axis

res = []
for i in np.nditer(choices):
    res.append(classes[i])

In [14]:
df = pd.DataFrame(res)

In [15]:
df.insert(0, 'file', [n[5:] for n in learn.data.test_ds.fnames])
# subset the string (remove 'test/')

In [16]:
df.columns = ['file','species']

In [17]:
SUBM = f'{PATH}subm/'
os.makedirs(SUBM, exist_ok=True)
df.to_csv(f'{SUBM}subm_v4.csv', index=False)

In [18]:
!kg submit {SUBM}'subm_v4.csv' -c plant-seedlings-classification

0.98236
