In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
from fastai.imports import *
from fastai.transforms import *
from fastai.conv_learner import *
from fastai.model import *
from fastai.dataset import *
from fastai.sgdr import *
from fastai.plots import *

In [3]:
PATH = 'data/plant_classification/'

In [4]:
arch = resnet50

In [5]:
def get_data(sz=100, bs=64):
    tfms = tfms_from_model(arch, sz, aug_tfms=transforms_top_down, max_zoom=1.1)
    data = ImageClassifierData.from_paths(PATH, bs=bs, tfms=tfms, test_name='test')
    return data

In [6]:
learn = ConvLearner.pretrained(arch, get_data())

In [7]:
lr = 1e-2
lrs = np.array([lr/9, lr/3, lr])

In [8]:
learn.fit(lr, 3, cycle_len=1)

[ 0.       1.66727  1.17036  0.5974 ]                     
[ 1.       1.32497  0.92643  0.69375]                     
[ 2.       1.15079  0.86203  0.70573]                     



In [9]:
learn.unfreeze()
learn.fit(lrs, 3, cycle_len=1, cycle_mult=2)
learn.save('100_v3')

[ 0.       0.65787  0.44487  0.84896]                      
[ 1.       0.41535  0.25907  0.90312]                      
[ 2.       0.28026  0.27876  0.90469]                      
[ 3.       0.25622  0.23478  0.91719]                      
[ 4.       0.21595  0.18584  0.9375 ]                      
[ 5.       0.17124  0.17366  0.94844]                      
[ 6.       0.14073  0.18379  0.94219]                      



In [10]:
learn.set_data(get_data(200))
learn.freeze()
learn.fit(lr, 3, cycle_len=1)

[ 0.       0.26205  0.24065  0.9125 ]                      
[ 1.       0.2299   0.22201  0.92188]                      
[ 2.       0.20762  0.21138  0.91719]                      



In [11]:
learn.unfreeze()
learn.fit(lrs, 3, cycle_len=1, cycle_mult=2)
learn.save('200_v3')

[ 0.       0.21021  0.16802  0.93906]                      
[ 1.       0.18534  0.13727  0.96406]                      
[ 2.       0.1335   0.12197  0.95937]                      
[ 3.       0.12653  0.15249  0.94375]                      
[ 4.       0.1194   0.10446  0.96719]                       
[ 5.       0.08486  0.07002  0.97344]                       
[ 6.       0.06915  0.07907  0.97344]                       



In [8]:
learn.load('200_v3')
learn.set_data(get_data(350, 30))
learn.freeze()
learn.fit(lr, 3, cycle_len=1)

[ 0.       0.14266  0.16677  0.945  ]                        
[ 1.       0.12506  0.16288  0.93833]                        
[ 2.       0.11569  0.14238  0.95333]                        



In [9]:
learn.unfreeze()
learn.fit(lrs, 3, cycle_len=1, cycle_mult=2)
learn.save('350_v3')

[ 0.       0.16611  0.16695  0.94667]                        
[ 1.       0.12354  0.08984  0.97   ]                        
[ 2.       0.0832   0.06877  0.98   ]                         
[ 3.       0.13605  0.19384  0.93667]                        
[ 4.       0.0884   0.11241  0.96833]                         
[ 5.       0.05303  0.09434  0.97   ]                         
[ 6.       0.04643  0.06867  0.98167]                         



In [10]:
log_preds,y = learn.TTA(is_test=True)
probs = np.mean(np.exp(log_preds),0)

                                             

## Create csv with results

In [11]:
classes = learn.data.classes
choices = np.argmax(probs, axis=1)
# Returns the indices of the maximum values along an axis

res = []
for i in np.nditer(choices):
    res.append(classes[i])

In [12]:
df = pd.DataFrame(res)

In [13]:
df.insert(0, 'file', [n[5:] for n in learn.data.test_ds.fnames])
# subset the string (remove 'test/')

In [14]:
df.columns = ['file','species']

In [15]:
SUBM = f'{PATH}subm/'
os.makedirs(SUBM, exist_ok=True)
df.to_csv(f'{SUBM}subm_v3.csv', index=False)