In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
from fastai.imports import *
from fastai.transforms import *
from fastai.conv_learner import *
from fastai.model import *
from fastai.dataset import *
from fastai.sgdr import *
from fastai.plots import *

In [3]:
from sklearn.metrics import f1_score

# f1_score(y_true, y_pred, labels=None, pos_label=1, average=’binary’, sample_weight=None)
def mod_acc_to_f1(preds, targs):
    preds = torch.max(preds, dim=1)[1]
    return f1_score(targs, preds, average='micro')
#     return (preds==targs).float().mean()

metrics=[mod_acc_to_f1]

In [4]:
PATH = 'data/plant_classification/'

In [5]:
arch = resnet50

In [6]:
def get_data(sz=150, bs=64):
    tfms = tfms_from_model(arch, sz, aug_tfms=transforms_top_down, max_zoom=1.1)
    data = ImageClassifierData.from_paths(PATH, bs=bs, tfms=tfms, test_name='test')
    return data

In [7]:
learn = ConvLearner.pretrained(arch, get_data())

In [8]:
lr = 1e-2
lrs = np.array([lr/9, lr/3, lr])

In [9]:
learn.fit(lr, 3, cycle_len=1, metrics=metrics)

epoch      trn_loss   val_loss   mod_acc_to_f1            
    0      1.417702   0.920433   0.705208  
    1      1.077439   0.745122   0.739062                 
    2      0.928245   0.688372   0.755729                  



[0.68837154, 0.7557291666666667]

In [10]:
learn.fit(lr, 5, cycle_len=1, metrics=metrics)

epoch      trn_loss   val_loss   mod_acc_to_f1             
    0      0.79556    0.625774   0.791667  
    1      0.771172   0.598684   0.795833                  
    2      0.728975   0.536926   0.820833                  
    3      0.717789   0.481029   0.855729                  
    4      0.676266   0.477383   0.843229                  



[0.4773833, 0.8432291666666666]

In [11]:
learn.fit(lr, 5, cycle_len=1, metrics=metrics)

epoch      trn_loss   val_loss   mod_acc_to_f1             
    0      0.679756   0.463155   0.847917  
    1      0.647352   0.452207   0.855729                  
    2      0.60796    0.410614   0.863542                  
    3      0.624067   0.419113   0.866667                  
    4      0.595946   0.398256   0.877604                  



[0.39825588, 0.8776041666666666]

In [None]:
learn.unfreeze()
learn.fit(lrs, 3, cycle_len=1, cycle_mult=2, metrics=metrics)
learn.save('150_v4')

epoch      trn_loss   val_loss   accuracy                  
    0      0.472633   0.273685   0.910417  
    1      0.303626   0.162908   0.953125                  
    2      0.200178   0.145555   0.935417                  
    3      0.212108   0.111992   0.95625                   
    4      0.179488   0.095017   0.96875                   
                                                           

In [9]:
learn.load('150_v4')
learn.set_data(get_data(250))
learn.freeze()
learn.fit(lr, 3, cycle_len=1, metrics=metrics)

epoch      trn_loss   val_loss   mod_acc_to_f1             
    0      0.139254   0.128817   0.951562  
    1      0.133365   0.099093   0.967188                  
    2      0.124198   0.102121   0.9625                    



[0.102121055, 0.9625]

In [11]:
learn.unfreeze()
learn.fit(lrs, 3, cycle_len=1, cycle_mult=2, metrics=metrics)
learn.save('250_v4')

epoch      trn_loss   val_loss   mod_acc_to_f1 accuracy    
    0      0.1353     0.096606   0.964062   0.964062  
    1      0.129837   0.066773   0.979688   0.979688       
    2      0.102873   0.068519   0.984375   0.984375       
    3      0.086312   0.092635   0.971875   0.971875        
    4      0.079079   0.038812   0.992188   0.992188        
    5      0.064395   0.040512   0.992188   0.992188        
    6      0.055661   0.031987   0.990625   0.990625        



In [9]:
learn.load('250_v4')
learn.set_data(get_data(350, 50))
learn.freeze()
learn.fit(lr, 3, cycle_len=1, metrics=metrics)

epoch      trn_loss   val_loss   mod_acc_to_f1              
    0      0.07502    0.042621   0.993333  
    1      0.070867   0.038838   0.993333                   
    2      0.069196   0.037724   0.993333                   



[0.03772404, 0.9933333333333333]

In [10]:
learn.fit(lr, 3, cycle_len=1, metrics=metrics)
learn.save('350_v5')

epoch      trn_loss   val_loss   mod_acc_to_f1              
    0      0.058769   0.038473   0.991667  
    1      0.054043   0.034966   0.993333                   
    2      0.053156   0.034829   0.991667                   



In [11]:
learn.set_data(get_data(350, 30))
learn.unfreeze()
learn.fit(lrs, 3, cycle_len=1, cycle_mult=2, metrics=metrics)
learn.save('350_v5')

epoch      trn_loss   val_loss   mod_acc_to_f1               
    0      0.172275   0.080541   0.977222  
    1      0.1604     0.07582    0.975                       
    2      0.092516   0.052155   0.983333                     
    3      0.127527   0.045056   0.985                       
    4      0.092619   0.060323   0.98                         
    5      0.049538   0.022185   0.996667                     
    6      0.053091   0.021683   0.996667                     



In [12]:
learn.freeze()
learn.fit(lr, 5, cycle_len=1, metrics=metrics)

epoch      trn_loss   val_loss   mod_acc_to_f1                
    0      0.031915   0.018097   0.998333  
    1      0.027791   0.017465   0.996667                     
    2      0.03203    0.018473   0.996667                     
    3      0.035652   0.017342   0.995                        
    4      0.029713   0.017538   0.996667                     



[0.017538367, 0.9966666666666667]

In [24]:
learn.fit(lr, 3, cycle_len=1, metrics=metrics)

epoch      trn_loss   val_loss   mod_acc_to_f1                
    0      0.022338   0.014747   0.996667  
    1      0.026981   0.0149     0.996667                     
    2      0.033308   0.013511   0.998333                     



[0.013510953, 0.9983333333333334]

In [25]:
learn.fit(lr, 3, cycle_len=1, metrics=metrics)

epoch      trn_loss   val_loss   mod_acc_to_f1                
    0      0.0281     0.012985   0.998333  
    1      0.029147   0.013049   0.995                        
    2      0.025595   0.010607   0.998333                     



[0.010606637, 0.9983333333333334]

In [38]:
learn.set_data(get_data(400, 50))
learn.freeze()
learn.fit(lr, 3, cycle_len=1, metrics=metrics)

epoch      trn_loss   val_loss   mod_acc_to_f1              
    0      0.021205   0.011788   0.998333  
    1      0.024002   0.01205    0.998333                   
    2      0.019458   0.010173   0.998333                   



[0.010172943, 0.9983333333333334]

In [39]:
learn.save('400_v6')
learn.set_data(get_data(400, 25))
learn.unfreeze()
learn.fit(lrs, 3, cycle_len=1, cycle_mult=2, metrics=metrics)
learn.save('400_v6')

epoch      trn_loss   val_loss   mod_acc_to_f1                
    0      0.069266   0.041566   0.986667  
    1      0.115684   0.054516   0.978333                     
    2      0.060602   0.032566   0.993333                     
    3      0.116656   0.068586   0.98                         
    4      0.078937   0.019366   0.996667                     
    5      0.037762   0.015878   0.995                        
    6      0.030782   0.01448    0.995                        



In [40]:
learn.freeze()
learn.fit(lr, 3, cycle_len=1, metrics=metrics)

epoch      trn_loss   val_loss   mod_acc_to_f1                
    0      0.024939   0.015117   0.995     
    1      0.023771   0.009587   0.998333                     
    2      0.022548   0.008959   0.998333                     



[0.008958911, 0.9983333333333334]

In [50]:
learn.set_data(get_data(400, 50))
learn.fit(lrs, 3, cycle_len=1, metrics=metrics)

epoch      trn_loss   val_loss   mod_acc_to_f1              
    0      0.013984   0.008485   0.996667  
    1      0.016758   0.008078   0.996667                   
    2      0.015298   0.008099   0.996667                   



[0.008099246, 0.9966666666666667]

In [51]:
learn.fit(lrs, 3, cycle_len=1, cycle_mult=2, metrics=metrics)

epoch      trn_loss   val_loss   mod_acc_to_f1              
    0      0.017277   0.006912   0.996667  
    1      0.01183    0.007265   1.0                        
    2      0.013391   0.006397   0.996667                   
    3      0.015631   0.00665    0.998333                   
    4      0.013111   0.007115   0.998333                   
    5      0.018757   0.006864   0.998333                   
    6      0.011383   0.006502   0.998333                   



[0.0065018986, 0.9983333333333334]

In [61]:
learn.fit(lrs, 5, cycle_len=1, cycle_mult=2, metrics=metrics)

epoch      trn_loss   val_loss   mod_acc_to_f1              
    0      0.016362   0.006562   0.998333  
    1      0.014735   0.00711    0.996667                   
    2      0.010748   0.006391   0.998333                   
    3      0.014025   0.005502   0.998333                    
    4      0.014401   0.005514   0.998333                   
    5      0.013779   0.005832   0.998333                   
    6      0.014031   0.005778   0.998333                    
    7      0.010413   0.005432   0.998333                    
    8      0.0092     0.00517    0.998333                    
    9      0.01221    0.005732   0.998333                    
    10     0.012749   0.005797   0.998333                    
    11     0.012733   0.005737   0.998333                   
    12     0.010048   0.006071   0.998333                    
    13     0.010116   0.005976   0.998333                    
    14     0.011272   0.005683   0.998333                    
    15     0.0164     0.005256  

[0.004256842, 0.9983333333333334]

In [62]:
learn.save('400_v7')

In [63]:
log_preds,y = learn.TTA(is_test=True)
probs = np.median(np.exp(log_preds),0)

                                             

## Create csv with results

In [64]:
classes = learn.data.classes
choices = np.argmax(probs, axis=1)
# Returns the indices of the maximum values along an axis

In [65]:
res = []
for i in np.nditer(choices):
    res.append(classes[i])

In [66]:
df = pd.DataFrame(res)

In [67]:
df.insert(0, 'file', [n[5:] for n in learn.data.test_ds.fnames])
# subset the string (remove 'test/')

In [68]:
df.columns = ['file','species']

In [81]:
SUBM = f'{PATH}subm/'
os.makedirs(SUBM, exist_ok=True)
df.to_csv(f'{SUBM}subm_v8.csv', index=False)

In [82]:
!kg submit {SUBM}'subm_v8.csv' -c plant-seedlings-classification

list indices must be integers or slices, not str
