# Snake Species Identification Challenge

**Authors**:
- Angus Mackenzie (1106817)
- Nathan Michlo (1386161)

------------------------

## Imports & Modules

In [1]:
from pprint import pprint
from tqdm import tqdm

import fastai
import os
import json
import pandas as pd
import pytorch_lightning as pl
import sys
import urllib.request

from ssic import util
from ssic import data

**Add Local Modules to `PYTHON_PATH`**
- This assumes that jupyter server was launched from the `root` directory of the project

In [2]:
# SAVE ORIGINAL or RESTORE TO ORIGINAL
util.restore_python_path()

# Methods to visualise CNN activations: https://github.com/utkuozbulak/pytorch-cnn-visualizations
util.add_python_path('vendor/pytorch-cnn-visualizations')
# Mish activation function: https://github.com/digantamisra98/Mish
util.add_python_path('vendor/Mish')
# Variance of the Adaptive Learning Rate: https://github.com/LiyuanLucasLiu/RAdam
util.add_python_path('vendor/RAdam')
# Lookahead optimizer: https://github.com/alphadl/lookahead.pytorch
util.add_python_path('vendor/lookahead.pytorch')
# Ranger=RAdam+Lookahead: https://github.com/lessw2020/Ranger-Deep-Learning-Optimizer
util.add_python_path('vendor/Ranger-Deep-Learning-Optimizer')

------------------------------

## Environment
File that stores environment variables

In [3]:
util.load_env()

[[92mLOADED[0m]: /home/nmichlo/workspace/snake-id/.env


------------------------

## Load Data

**Paths**

In [4]:
# pretty much only need to change DATASET_DIR or STORAGE_DIR
DATASET_DIR          = util.get_env_path('DATASET_DIR', 'data')
STORAGE_DIR          = util.get_env_path('STORAGE_DIR', 'out')
# based off of DATASET_DIR
DATASET_SSIC_CLASSES = util.get_env_path('DATASET_SSIC_CLASSES', os.path.join(DATASET_DIR, 'class_idx_mapping.csv'))
DATASET_SSIC_TRAIN   = util.get_env_path('DATASET_SSIC_TRAIN', os.path.join(DATASET_DIR, 'train'))  # path pattern: {DATASET_SSIC_TRAIN}/class-{class_id}/{uuid}.{ext}
DATASET_SSIC_TEST    = util.get_env_path('DATASET_SSIC_TEST', os.path.join(DATASET_DIR, 'round1'))  # path pattern: {DATASET_SSIC_TEST}/{uuid}.{ext}

[[92mDATASET_DIR[0m]: [90m/home/nmichlo/downloads/datasets/ssic[0m
[[92mSTORAGE_DIR[0m]: [90m/home/nmichlo/workspace/snake-id/out[0m
[[92mDATASET_SSIC_CLASSES[0m]: [90m/home/nmichlo/downloads/datasets/ssic/class_idx_mapping.csv[0m
[[92mDATASET_SSIC_TRAIN[0m]: [90m/home/nmichlo/downloads/datasets/ssic/train[0m
[[92mDATASET_SSIC_TEST[0m]: [90m/home/nmichlo/downloads/datasets/ssic/round1[0m


**Classes**

In [5]:
# classes
CLASS_NAME_MAP = data.get_ssic_class_name_map(DATASET_SSIC_CLASSES) # int -> str
NAME_CLASS_MAP = data.get_ssic_name_class_map(DATASET_SSIC_CLASSES) # str -> int

[[92mLOADED[0m]: 45 classes from: /home/nmichlo/downloads/datasets/ssic/class_idx_mapping.csv
[[92mLOADED[0m]: 45 classes from: /home/nmichlo/downloads/datasets/ssic/class_idx_mapping.csv


**Image Info**

In [6]:
# image info | dictionaries of {name: str, path: str, class: int}
VALID_IMG_INFO, INVALID_IMG_INFO = util.cache_data(
    os.path.join(STORAGE_DIR, 'img_paths.json'),
    lambda: data.get_ssic_train_img_info(DATASET_SSIC_TRAIN)
)

# Make sure that all classes appear in valid data and vice versa
assert len({info['class_id'] for info in VALID_IMG_INFO.values()} - set(CLASS_NAME_MAP)) == 0
assert len(set(CLASS_NAME_MAP) - {info['class_id'] for info in VALID_IMG_INFO.values()}) == 0

print(f'  valid:   {len(VALID_IMG_INFO)}')
print(f'  invalid: {len(INVALID_IMG_INFO)}')

[[92mLOADED[0m]: /home/nmichlo/workspace/snake-id/out/img_paths.json
  valid:   82417
  invalid: 184


**Bounding Boxes**

In [7]:
# source article: https://medium.com/@Stormblessed/2460292bcfb

# FORMAT: [ {annotations: [], class: 'image', filename: '{uuid}.{ext}', annotations: [{class: 'rect', height: float, width: float, x: float, y: float}, ...]}, ... ]
BOUNDING_BOXES = util.cache_data(
    os.path.join(STORAGE_DIR, 'annotations.json'),
    lambda: json.load(urllib.request.urlopen('https://drive.google.com/uc?id=18dx_5Ngmc56fDRZ6YZA_elX-0ehtV5U6'))
)

print(f'  bounding boxes: {len(BOUNDING_BOXES)}')

[[92mLOADED[0m]: /home/nmichlo/workspace/snake-id/out/annotations.json
  bounding boxes: 1423


-------------------------

## Train Bounding Box Network

In [10]:
util.set_random_seed(42)



[SEEDED]: 42
