In [None]:
# This is a Library that has all the built in Utilities that makes things easier

!pip install tez

About Tez:
https://github.com/abhishekkrthakur/tez/tree/main/tez

In [None]:
import os
import albumentations
import matplotlib.pyplot as plt
%matplotlib inline
import pandas as pd

import tez
from tez.datasets import ImageDataset
from tez.callbacks import EarlyStopping

import torch
import torch.nn as nn
import torchvision  #For using a Pretrained model

from sklearn import metrics, model_selection



In [None]:
#Fetching the Training Dataset
dfx= pd.read_csv('../input/cassava-leaf-disease-classification/train.csv')
dfx.head()

In [None]:
dfx.label.value_counts()

This is an Unbalanced dataset

In [None]:
dfx.label.values

In [None]:
df_train, df_valid= model_selection.train_test_split(
            dfx,
            test_size=0.1,
            random_state=42,
            stratify=dfx.label.values     #Stratify makes sure that the test data and train data contains all classes
)


In [None]:
df_train.head()

In [None]:
df_train=df_train.reset_index(drop=True)
df_train.head()

In [None]:
df_valid=df_valid.reset_index(drop=True)
df_valid.head()

In [None]:
print(df_train.shape)
print(df_valid.shape)

In [None]:
image_path='../input/cassava-leaf-disease-classification/train_images/'
train_image_paths= [os.path.join(image_path,x) for x in df_train.image_id.values]
valid_image_paths= [os.path.join(image_path,x) for x in df_valid.image_id.values]

In [None]:
train_image_paths[:5]

In [None]:
train_targets=df_train.label.values
valid_targets=df_valid.label.values

In [None]:
train_dataset=ImageDataset(image_paths=train_image_paths,
                          targets=train_targets,
                          augmentations=None)

In [None]:
train_dataset[0]

In [None]:
def plot_img(img_dict):
    img_tensor=img_dict['image']
    target=img_dict['targets']
    print(target)
    plt.figure(figsize=(5,5))
    image= img_tensor.permute(1,2,0)/255   #Note permute is used to swap the axis whereas .view() is used to reshape tensor
    plt.imshow(image)

In [None]:
plot_img(train_dataset[10])

In [None]:
train_aug= albumentations.Compose([
    albumentations.RandomResizedCrop(256,256),
    albumentations.Transpose(p=0.5),
    albumentations.HorizontalFlip(p=0.5),
    albumentations.VerticalFlip(p=0.5)
])

In [None]:
#Adding the Augmentations
train_dataset=ImageDataset(image_paths=train_image_paths,
                          targets=train_targets,
                          augmentations=train_aug)


In [None]:
plot_img(train_dataset[10])

In [None]:
# Here it is not necessary to use the same augmentation 
#as we just need to validate. Good to use different augmentations

valid_aug= albumentations.Compose([
    albumentations.CenterCrop(256,256,p=1.0),
    albumentations.Resize(256,256),
    albumentations.Transpose(p=0.5),
    albumentations.HorizontalFlip(p=0.5),
    albumentations.VerticalFlip(p=0.5)
])

#Adding the
valid_dataset=ImageDataset(image_paths=valid_image_paths,
                          targets=valid_targets,
                          augmentations=valid_aug)


## Create the Model

In [None]:
class LeafModel(tez.Model):
    def __init__(self, num_classes, pretrained=True):
        super().__init__()
        self.convnet= torchvision.models.resnet18(pretrained=pretrained)  #Loading the pretrained Resnet model
        

In [None]:
torchvision.models.resnet18(pretrained=False)

In the end, we have the AdaptiveAvgPooling, so e can input images of different sizes.

And we have a Fully Connected layer with 1000 outputs and so we need to replace the final fully connected layer with our own version.

## Redefining the Model

In [None]:
class LeafModel(tez.Model):
    def __init__(self, num_classes, pretrained=True):
        super().__init__()
        self.convnet= torchvision.models.resnet18(pretrained=pretrained)  #Loading the pretrained Resnet model
        self.convnet.fc=nn.Linear(512, num_classes)
        self.step_scheduler_after= 'epoch'  #We can step after each epoch or batch. Here we are stepping after each epoch
        
    def loss(self,outputs, targets):
        if targets is None:
            return None
        return nn.CrossEntropyLoss()(outputs,targets)
    
    def monitor_metrics(self, outputs, targets):
        outputs= torch.argmax(outputs, dim=1).cpu().detach().numpy()
        targets= targets.cpu().detach().numpy()
        acc= metrics.accuracy_score(targets, outputs)
        return {'accuracy': acc}
    
    def fetch_optimizer(self):
        opt= torch.optim.Adam(self.parameters(), lr=1e-3) #Instead of Model.parameters, we are using self.parameters
        return opt
    
    def fetch_scheduler(self):
        sch= torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=0.7)
        return sch
    
    
    
    def forward(self, image, targets=None):
        outputs= self.convnet(image)  #This is the model
        if targets is not None:
            loss= self.loss(outputs, targets)
            mon_metrics= self.monitor_metrics(outputs, targets)
            return outputs, loss, mon_metrics
            
        return outputs,None,None
    

In [None]:
model=LeafModel(num_classes=dfx.label.nunique(), pretrained=True)

In [None]:
model

In [None]:
img= train_dataset[0]['image']
y= train_dataset[0]['targets']
print(img.shape)
print(y.shape)

We need to unsqueeze the y to convert it to the form which is accepted

In [None]:
y.unsqueeze(0).shape

In [None]:
img.unsqueeze(0).shape

In [None]:
model(img.unsqueeze(0),y.unsqueeze(0))

In [None]:
es=EarlyStopping(monitor='valid_accuracy', 
                 model_path='model.bin', 
                 patience=2,
                mode='max')

model.fit(train_dataset,
         valid_dataset=valid_dataset,
         train_bs=32,
         valid_bs=64,
         device='cuda',
         callbacks=[es],
         fp16=True,
         epochs=10)

In [None]:
assert False

In [None]:
test_dfx=pd.read_csv('../input/cassava-leaf-disease-classification/sample_submission.csv')
test_targets= test_dfx.label.values   #These are some fake values
image_path='../input/cassava-leaf-disease-classification/test_images/'
test_image_paths= [os.path.join(image_path,x) for x in test_dfx.image_id.values]


In [None]:
test_targets

In [None]:
test_aug= albumentations.Compose([
    albumentations.CenterCrop(256,256,p=1.0),
    albumentations.Resize(256,256),
    albumentations.Transpose(p=0.5),
    albumentations.HorizontalFlip(p=0.5),
    albumentations.VerticalFlip(p=0.5)
])

#Adding the
test_dataset=ImageDataset(image_paths=test_image_paths,
                          targets=test_targets,
                          augmentations=test_aug)


### Predictions

In [None]:
preds= model.predict(test_dataset, batch_size=64, n_jobs=1)
print(preds)
final_preds=None

for p in preds:
    if final_preds is None:
        final_preds=p
    else:
        print('Not None')
        final_preds= np.vstack((final_preds,p))
        

In [None]:
final_preds

In [None]:
final_preds=final_preds.argmax(axis=1)
final_preds

In [None]:
test_dfx.label=final_preds
test_dfx.to_csv('submission.csv',index=False)