<a href="https://colab.research.google.com/github/csnick93/sports_classification/blob/main/SportsClassification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#TODO
# - get the following to work
#     - mixup, label smoothing and tta
# - mixup, label smoothing
#   - debug either in notebook or in terminal script why we are getting 
#       the weird prediction behavior
#   - also understand using debugging what is happening
# - tta
#   - debug as well to understand why performance becomes so much worse

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
from pathlib import Path
cloud_dir = Path('/content/drive/My Drive/SportsClassification')


In [None]:
!rsync --info=progress2 ./drive/My\ Drive/SportsClassification/data.zip . && unzip data.zip 

In [None]:
!rsync --info=progress2 ./drive/My\ Drive/SportsClassification/mlruns.zip . && unzip mlruns.zip

In [None]:
!mkdir ~/.kaggle
!rsync --info=progress2 ./drive/My\ Drive/SportsClassification/kaggle.json ~/.kaggle

In [None]:
data_dir = Path('/content/data')
mlflow_dir = Path('/content/mlruns')
assert(data_dir.exists())
assert(mlflow_dir.exists())

# Getting the code repo

In [None]:
!pip install --upgrade pip

In [None]:
!git clone https://github.com/csnick93/sports_classification.git

In [None]:
!pip install -q mlflow kaggle

In [None]:
!pip install fastai==2.1.8 nbdev --upgrade

In [None]:
!pip install onnx onnxruntime

# Imports

In [None]:
from fastai.vision.all import *
from fastai.data.all import *
import pandas as pd
from tqdm.notebook import tqdm
import onnx
import onnxruntime 
import torch

# Experiment Configs

In [None]:
#config
class Config:
  def __init__(self,
              data_dir,
              augmentations = False,
              img_size = 224,  
              num_epochs = 5,
              num_freeze_epochs = 1,
              model_arch = resnet18,
              data_subset=False,
              mixup_alpha=0,
              multi_class=True):
      self.data_dir = data_dir
      self.augmentations = augmentations
      self.img_size = img_size
      self.num_epochs = num_epochs
      self.num_freeze_epochs = num_freeze_epochs
      self.model_arch = model_arch
      self.data_subset = data_subset
      self.mixup_alpha = mixup_alpha
      self.multi_class=multi_class

      if self.data_subset: 
        self.train_val_file = self.data_dir/'subset_train_val_data.csv'
      else:
        self.train_val_file = self.data_dir/'train_val_data.csv'


  def __str__(self):
    return '%s_%i_%i_%s_%s_%s'%(str(self.augmentations), self.img_size, 
                          self.num_epochs, str(self.model_arch).split(' ')[1],
                          self.data_subset, self.multi_class)
  
  def mlflow_config(self):
    return list(self.__dict__.items())
  
config = Config(data_dir, augmentations=True, img_size=224, 
                num_epochs=5, num_freeze_epochs = 5,
                model_arch=resnet18, data_subset = True,
                mixup_alpha = 0.0, multi_class=True)

* For data subset:
  * ResNet18:
    * num_freeze_epochs: after epoch 8, starting to overfit

# Get Data and inspect

In [None]:
train_val_folder = get_image_files(config.data_dir/"train")
train_val_data = pd.read_csv(config.train_val_file)

In [None]:
category_block = CategoryBlock
label_reader = ColReader(1)
if config.multi_class:
  category_block = MultiCategoryBlock
  label_reader = ColReader(1, label_delim=' ')
  
if config.augmentations:
  data_block = DataBlock(blocks=(ImageBlock, category_block),
                        splitter=ColSplitter(),
                        get_x=ColReader(0, pref=config.data_dir),
                        get_y=label_reader,
                        item_tfms=Resize(2*config.img_size),
                        batch_tfms=aug_transforms(size=config.img_size, 
                                                  min_scale=0.75)
                        )
else:
  data_block = DataBlock(blocks=(ImageBlock, category_block),
                        splitter=ColSplitter(),
                        get_x=ColReader(0, pref=config.data_dir),
                        get_y=label_reader,
                        item_tfms=Resize(config.img_size)
                        )

In [None]:
dls = data_block.dataloaders(train_val_data)

In [None]:
dls.show_batch()

In [None]:
# mixup = MixUp(0.5)
# learn = Learner(dls, config.model_arch, loss_func=CrossEntropyLossFlat(), cbs=[mixup])
# learn.epoch,learn.training = 0,True
# learn.dl = dls.train
# b = dls.one_batch()
# learn._split(b)
# learn('before_batch')

# _,axs = plt.subplots(3,3, figsize=(9,9))
# dls.show_batch(b=(mixup.x,mixup.y), ctxs=axs.flatten())

# Start Training

In [None]:
metrics = error_rate
monitor = 'error_rate'
if config.multi_class:
  metrics = partial(accuracy_multi, thresh=0.5)
  monitor = 'accuracy_multi'
if config.mixup_alpha > 0:
  mixup = MixUp(alpha = config.mixup_alpha)
  learn = cnn_learner(dls, config.model_arch, metrics=metrics, 
                      cbs = [SaveModelCallback(monitor=monitor, fname='best_model'),
                             mixup])
else:
  learn = cnn_learner(dls, config.model_arch, metrics=metrics, 
                      cbs = [SaveModelCallback(monitor=monitor, fname='best_model')])

In [None]:
lr_min, lr_steep = learn.lr_find()

In [None]:
learn.fit_one_cycle(config.num_freeze_epochs, 3e-3)

In [None]:
learn.recorder.plot_loss()

In [None]:
learn.unfreeze()
learn.lr_find()

In [None]:
learn.fit_one_cycle(config.num_epochs, lr_max=slice(3e-6,3e-4))

In [None]:
learn.recorder.plot_loss()

In [None]:
learning_results = [('final_train_loss', learn.final_record[0]), 
                    ('final_val_loss', learn.final_record[1]),
                    (f'final_{monitor}', learn.final_record[2])]

In [None]:
learning_results

# Inspect results

In [None]:
files = get_image_files('/content/data/test')

In [None]:
learn.predict(files[0])

In [None]:
learn.show_results(max_n=9)

In [None]:
interp = Interpretation.from_learner(learn)

In [None]:
interp.plot_top_losses(16, figsize=(15,10))


In [None]:
if not config.multi_class:
  class_interp = ClassificationInterpretation.from_learner(learn)
  class_interp.plot_confusion_matrix(title='Confusion matrix', figsize=(10,10))

In [None]:
if not config.multi_class:
  class_interp.most_confused(min_val=5)

# Make prediction on test set

In [None]:
test_dl = dls.test_dl(get_image_files(data_dir/"test"))

In [None]:
test_dl.show_batch()

In [None]:
preds = learn.get_preds(dl=test_dl)

In [None]:
pred_ind = torch.argmax(preds[0], axis=1)

In [None]:
pred_cat = [dls.vocab[p] for p in pred_ind]

In [None]:
model_path = 'models/' + str(config)+'.pkl'
learn.export(fname = model_path)

In [None]:
test_files = get_image_files(data_dir/"test")
test_files = test_files.map(lambda x :str(x).replace('/content/data','.') )

In [None]:
predictions = {'image': test_files, 'sports': pred_cat}

In [None]:
pred_df = pd.DataFrame(predictions)

In [None]:
test_prediction_file = 'test_evaluation.csv'
pred_df.to_csv(test_prediction_file, index=False)

# Perform TTA on model

Need to create two test dataloaders each covering one half of the test dataset (as tta is only applied on validation part, and we can't just have a dataloader with only validation). Run tta() over both those dataloaders and then concatenate the results.

In [None]:
test_dl = dls.test_dl(get_image_files(data_dir/"test"))
preds = learn.tta(dl=test_dl)[0]
pred_ind = torch.argmax(preds, axis=1)
pred_cat = [dls.vocab[p] for p in pred_ind] 

In [None]:
tta_predictions = {'image': get_image_files(data_dir/"test"), 'sports': pred_cat}
tta_pred_df = pd.DataFrame(tta_predictions)
tta_pred_df.image = tta_pred_df.image.apply(lambda x : str(x).replace('/content/data', '.'))

In [None]:
tta_test_prediction_file = 'tta_test_evaluation.csv'
tta_pred_df.to_csv(tta_test_prediction_file, index=False)

# Interpretation using CAM

In [None]:
class Hook:
  def __init__(self, mod):
    self.hook = mod.register_forward_hook(self.hook_func)
  def hook_func(self, mod, inp, out): # module, input, output always required as input
    self.stored = out.detach().clone()
  def __enter__(self, *args):
    return self
  def __exit__(self, *args):  # to automatically remove hook to avoid memory leakage
    self.hook.remove()
  

In [None]:
test_img, = first(dls.test_dl([get_image_files(data_dir/"test")[5]]))

In [None]:
with Hook(learn.model[0]) as hook:
  with torch.no_grad():
    output = learn.model.eval()(test_img)
  act = hook.stored[0]

In [None]:
cam_map = torch.einsum('ck, kij->cij', learn.model[1][-1].weight, act)

In [None]:
im_dec = TensorImage(dls.train.decode((test_img,))[0][0]) # decoding needed due to normalization of loader
_, ax = plt.subplots()
im_dec.show(ctx=ax)
ax.imshow(cam_map[1].detach().cpu(), alpha=0.6, extent = (0,224,224,0), interpolation='bilinear', cmap='jet')

# Interpretation using GradCam

In [None]:
dls.vocab

In [None]:
class HookBwd:
  def __init__(self, mod):
    self.hook = mod.register_backward_hook(self.hook_func)
  def hook_func(self, mod, grad_inp, grad_out):
    self.stored = grad_out[0].detach().clone()
  def __enter__(self, *args):
    return self
  def __exit__(self, *args):
    self.hook.remove()

In [None]:
class_idx = 0 # badminton
layer_idx = -1
with HookBwd(learn.model[0][layer_idx]) as hookg:
  with Hook(learn.model[0][layer_idx]) as hook:
    output = learn.model.eval()(test_img)
    act = hook.stored
  output[0, class_idx].backward()
  grad = hookg.stored

In [None]:
w = grad[0].mean(dim=[1,2], keepdim=True)
cam_map = (w*act[0]).sum(0)

im_dec = TensorImage(dls.train.decode((test_img,))[0][0]) # decoding needed due to normalization of loader
_, ax = plt.subplots()
im_dec.show(ctx=ax)
ax.imshow(cam_map.detach().cpu(), alpha=0.6, extent = (0,224,224,0), interpolation='bilinear', cmap='jet')

# Log the results

In [None]:
import mlflow
from mlflow import log_metric, log_param, log_artifacts,log_artifact

In [None]:
mlflow.set_tracking_uri(str(mlflow_dir))

In [None]:
def get_max_run_id(experiment_id):
    runs = mlflow.search_runs(experiment_ids=[experiment_id]) 
    run_id = len(runs)
    return run_id

def connect_to_experiment(experiment_name):
    experiment = mlflow.get_experiment_by_name(experiment_name)
    if experiment is None:
        experiment_id = mlflow.create_experiment(experiment_name)
        run_id = 1 
    else:
        experiment_id = experiment.experiment_id 
        run_id = get_max_run_id(experiment_id) 
    return experiment_id, run_id

In [None]:
experiment_id, run_id = connect_to_experiment('sports_classification')
mlflow.start_run(run_name='sports_classification_run', experiment_id=experiment_id)

In [None]:
for config_tuple in config.mlflow_config():
  log_param(*config_tuple)

In [None]:
for result_tuple in learning_results:
  log_metric(*result_tuple)

In [None]:
log_artifact(model_path)
log_artifact(test_prediction_file)

In [None]:
mlflow.end_run()

# Updating mlruns on to google drive

In [None]:
!zip -r mlruns.zip mlruns

In [None]:
!rsync --info=progress2 mlruns.zip ./drive/My\ Drive/SportsClassification/ 

# Loading existing model for further work

In [None]:
experiment = mlflow.get_experiment_by_name('sports_classification')
assert(experiment is not None)

In [None]:
runs = mlflow.search_runs(experiment_ids=[experiment.experiment_id]) 

In [None]:
runs

In [None]:
# artifact_uri = runs.artifact_uri.iloc[0]

In [None]:
# models = [str(f) for f in Path(artifact_uri).ls() if '.pkl' in str(f)]
# assert(len(models)==1)
# model = models[0]

In [None]:
#learn = load_learner(model)

# Onnx conversion and quality check

## Conversion to onnx

In [None]:
im_tensor = first(learn.dls[0])[0][:1]

In [None]:
torch.onnx.export(learn.model,               
                  im_tensor,                         
                  "sports_classifier.onnx",  
                  export_params=True,        
                  opset_version=10,          
                  do_constant_folding=True,  
                  input_names = ['input'],   
                  output_names = ['output'], 
                  dynamic_axes={'input' : {0 : 'batch_size'},    
                                'output' : {0 : 'batch_size'}})

In [None]:
onnx_model = onnx.load("sports_classifier.onnx")

In [None]:
onnx.checker.check_model(onnx_model)

## Compute validation accuracy

In [None]:
ort_session = onnxruntime.InferenceSession("sports_classifier.onnx")

In [None]:
ctr = 0
correct = 0
for batch in learn.dls[0]:
  im_batch, label_batch = batch
  for im, label in zip(im_batch, label_batch):
    ort_inputs = {ort_session.get_inputs()[0].name: np.expand_dims(im.cpu().numpy(),0)}
    ort_outs = ort_session.run(None, ort_inputs)
    predicted_label = np.argmax(ort_outs[0])
    ctr += 1
    if config.multi_class:
      label = np.argmax(label.cpu().numpy())
    correct += (predicted_label == label)
print(f'Accuracy: {correct/ctr}')