In [1]:
#mount drive
from google.colab import drive
drive.mount('/content/drive', force_remount=True)
!ls

Mounted at /content/drive
drive  sample_data


In [2]:
# move into project directory
repo_name = "crop-damage-classification"
%cd /content/drive/MyDrive/Personal-Projects/$repo_name
!ls

/content/drive/MyDrive/Personal-Projects/crop-damage-classification
common	     dataloading  Index.ipynb  output		    project-structure.md  transforms
config.yaml  experiments  index.py     preprocess	    README.md		  visualization
data	     Index_bc.py  models       preprocess_input.py  run.yaml


In [3]:
# set up environment
# comment if not required
'''
!pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
!pip install matplotlib numpy pandas pyyaml opencv-python
'''

'\n!pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118\n!pip install matplotlib numpy pandas pyyaml opencv-python\n'

# Following cells are for downloading data

In [4]:
# this cell is for downloading data.
# as of yet data is not hosted and is available in the private data folder
# comment if not required
!pip install boto3
!pip install tqdm



In [4]:
# setup some imports
#custom imports
from transforms.transforms import ToTensor, Resize, CenterCrop
from dataloading.dataset import CropDataset
from common.utils import get_exp_params, init_config, get_config, save2config, get_modelinfo, get_saved_model, read_json
from models.resnet18 import Resnet18
from models.custom_models import get_model
from experiments.experiments import Experiment
from visualization.visualization import Visualization
from experiments.test_model import ModelTester
from preprocess.preprocessor import Preprocessor

#py imports
import random
import numpy as np
import os
import torch
from torchvision import transforms
from torch.utils.data import DataLoader

In [6]:
import boto3
from pathlib import Path
from botocore import UNSIGNED
from botocore.client import Config
from tqdm.notebook import tqdm

def get_file_folders(s3_client, bucket_name, prefix=""):
    file_names = []
    folders = []

    default_kwargs = {
        "Bucket": bucket_name,
        "Prefix": prefix
    }
    next_token = ""

    while next_token is not None:
        updated_kwargs = default_kwargs.copy()
        if next_token != "":
            updated_kwargs["ContinuationToken"] = next_token

        response = s3_client.list_objects_v2(**updated_kwargs)
        contents = response.get("Contents")

        for result in contents:
            key = result.get("Key")
            if key[-1] == "/":
                folders.append(key)
            else:
                file_names.append(key)

        next_token = response.get("NextContinuationToken")

    return file_names, folders

def download_files(s3_client, bucket_name, local_path, file_names, folders):
    local_path = Path(local_path)

    for folder in tqdm(folders):
        folder_path = Path.joinpath(local_path, folder)
				# Create all folders in the path
        folder_path.mkdir(parents=True, exist_ok=True)

    for file_name in tqdm(file_names):
        file_path = Path.joinpath(local_path, file_name)
				# Create folder for parent directory
        file_path.parent.mkdir(parents=True, exist_ok=True)
        s3_client.download_file(
            bucket_name,
            file_name,
            str(file_path)
        )

data_path = 'data/input/images'
if not(os.path.exists(os.path.join(os.getcwd(), data_path))):
    client = boto3.client('s3', config=Config(signature_version=UNSIGNED))
    file_names, folders = get_file_folders(client, 'cgiar-crop-damage-classification-challenge')
    download_files(
        client,
        'cgiar-crop-damage-classification-challenge',
        "/content/drive/MyDrive/Personal-Projects/crop-damage-classification/data/input",
        file_names,
        folders
    )

In [5]:
# initialize directories and config data
init_config()
config = get_config()
print('Config parameters\n')
print(config)

Config parameters

{'X_key': 'image', 'data_dir': '/content/drive/MyDrive/Personal-Projects/crop-damage-classification/data', 'device': 'cpu', 'img_dir': '/content/drive/MyDrive/Personal-Projects/crop-damage-classification/data/input/images', 'output_dir': '/content/drive/MyDrive/Personal-Projects/crop-damage-classification/output', 'root_dir': '/content/drive/MyDrive/Personal-Projects/crop-damage-classification', 'use_gpu': False, 'y_key': 'label'}


In [6]:
# read experiment parameters
exp_params = get_exp_params()
print('Experiment parameters\n')
print(exp_params)

Experiment parameters

{'transform': {'resize_dim': 256, 'crop_dim': 224}, 'train': {'shuffle_data': True, 'batch_size': 128, 'val_split_method': 'k-fold', 'k': 5, 'val_percentage': 20, 'loss': 'cross-entropy', 'batch_interval': 512, 'epoch_interval': 1, 'num_epochs': 10}, 'model': {'name': 'resnet18', 'optimizer': 'Adam', 'lr': 0.001, 'weight_decay': 1e-05, 'amsgrad': False, 'momentum': 0.9}, 'test_model': False}


In [7]:
#initialize randomness seed
seed = 123
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True

In [8]:
#preprocess data or load preprocessed data

#build label dict
label_dict = {
    'DR': 0,
    'G': 1,
    'ND': 2,
    'WD': 3,
    'other': 4
}

class_dict = {
    0: 'DR',
    1: 'G',
    2: 'ND',
    3: 'WD',
    4: 'other'
}

In [9]:
#save X_key and y_key
save2config('X_key', 'image')
save2config('y_key', 'label')

#transform data
data_transforms = transforms.Compose([ToTensor(), Resize(exp_params['transform']['resize_dim']), CenterCrop(exp_params['transform']['crop_dim'])])

#convert to dataset
ftr_dataset = CropDataset('input/Train.csv', label_dict, False, transforms=data_transforms)
test_dataset = CropDataset('input/Test.csv', label_dict, True, transforms=data_transforms)
smlen = int(0.01 * len(ftr_dataset))
smftr_dataset = torch.utils.data.Subset(ftr_dataset, list(range(smlen)))
print('Full train dataset length:', len(ftr_dataset))
print('Test dataset length:', len(test_dataset))
print('Subset train dataset length:', smlen, '\n')



Full train dataset length: 26068
Test dataset length: 8663
Subset train dataset length: 260 



In [12]:
'''
#running experiment on small subset of the dataset
exp = Experiment(exp_params['model']['name'], smftr_dataset)
model_history = exp.train()
'''

"\n#running experiment on small subset of the dataset\nexp = Experiment(exp_params['model']['name'], smftr_dataset)\nmodel_history = exp.train()\n"

In [16]:
'''
print("Getting metrics for small data")
preop = Preprocessor()
all_folds_metrics = preop.get_dataset_metrics(smftr_dataset, exp_params['train']['val_split_method'])
print(all_folds_metrics)
'''

Getting metrics for small data
[52, 104, 156, 208]
	Calculating metric for split 0 starting with 0, ending with 52




si 0
si 52 52
	Calculating metric for split 1 starting with 52, ending with 104




si 52
si 104 104
	Calculating metric for split 2 starting with 104, ending with 156




si 104
si 156 156
	Calculating metric for split 3 starting with 156, ending with 208




si 156
si 208 208
{0: ([0.4691032844499407, 0.4642999397722259, 0.3367527026156445], [0.25391136811674037, 0.25511066727296733, 0.29634439087307474], [0.2539114585104833, 0.2551107573869328, 0.2963444952522965]), 1: ([0.4693464024142023, 0.4647783213452792, 0.33516081830332584], [0.25097124265802273, 0.2509423582253417, 0.2935421279130286], [0.2509713321361989, 0.2509424470412791, 0.2935422314196143]), 2: ([0.46598202827447943, 0.4625296299462866, 0.3345988147630704], [0.25433435014241607, 0.2563203380297623, 0.2973605209886066], [0.2543344406678774, 0.25632042852042414, 0.29736062568425525]), 3: ([0.47045614981483663, 0.46831747207811697, 0.34043276211154105], [0.2549591611211113, 0.2553095468361302, 0.2982085550849592], [0.2549592518411412, 0.2553096370120277, 0.29820866004468904])}


In [None]:
print("Getting metrics for data")
preop = Preprocessor()
all_folds_metrics = preop.get_dataset_metrics(ftr_dataset, exp_params['train']['val_split_method'])
print(all_folds_metrics)

Getting metrics for data
	Calculating metric for split 0 starting with 0, ending with 5213




In [None]:
#model training on full dataset
exp = Experiment(exp_params['model']['name'], ftr_dataset)
model_history = exp.train()

In [None]:
print("Getting metrics for data")
preop = Preprocessor()
all_folds_metrics = preop.get_dataset_metrics(ftr_dataset, exp_params['train']['val_split_method'])
print(all_folds_metrics)

In [None]:
# get best model
model = get_model(exp_params["model"]["name"])
model = get_saved_model(model, '')
model_info = get_modelinfo('')
best_fold = model_info['fold']
metric_path = os.path.join(config["root_dir"], "models/checkpoints/all_folds_metrics.json")
all_folds_metrics = read_json(metric_path)
print('All folds metrics')
print(all_folds_metrics)

print("\nModel validation results")
print(model_info['results']['trlosshistory'])
#visualization results
vis = Visualization(model_info, model_history)
vis.get_results()

In [None]:
#model testing
print("\n\nTesting Saved Model")
mt = ModelTester(model, test_dataset, all_folds_metrics[best_fold])
mt.test_and_save_csv(class_dict)