In [1]:
#mount drive
from google.colab import drive
drive.mount('/content/drive', force_remount=True)
!ls

Mounted at /content/drive
drive  sample_data


In [2]:
# move into project directory
repo_name = "crop-damage-classification"
%cd /content/drive/MyDrive/Personal-Projects/$repo_name
!ls

/content/drive/MyDrive/Personal-Projects/crop-damage-classification
common	     dataloading  Index.ipynb  preprocess	     run.yaml
config.yaml  experiments  index.py     project-structure.md  transforms
data	     Index_bc.py  models       README.md	     visualization


In [3]:
# set up environment
# comment if not required
'''
!pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
!pip install matplotlib numpy pandas pyyaml opencv-python
'''

'\n!pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118\n!pip install matplotlib numpy pandas pyyaml opencv-python\n'

# Following cells are for downloading data

In [4]:
# this cell is for downloading data.
# as of yet data is not hosted and is available in the private data folder
# comment if not required
!pip install boto3
!pip install tqdm



In [5]:
# setup some imports
#custom imports
from transforms.transforms import ToTensor, Resize, CenterCrop
from dataloading.dataset import CropDataset
from common.utils import get_exp_params, init_config, get_config, save2config
from models.resnet18 import Resnet18
#from experiments.experiments import Experiment
from visualization.visualization import Visualization

#py imports
import random
import numpy as np
import os
import torch
from torchvision import transforms
from torch.utils.data import DataLoader

In [7]:
import boto3
from pathlib import Path
from botocore import UNSIGNED
from botocore.client import Config
from tqdm.notebook import tqdm

def get_file_folders(s3_client, bucket_name, prefix=""):
    file_names = []
    folders = []

    default_kwargs = {
        "Bucket": bucket_name,
        "Prefix": prefix
    }
    next_token = ""

    while next_token is not None:
        updated_kwargs = default_kwargs.copy()
        if next_token != "":
            updated_kwargs["ContinuationToken"] = next_token

        response = s3_client.list_objects_v2(**updated_kwargs)
        contents = response.get("Contents")

        for result in contents:
            key = result.get("Key")
            if key[-1] == "/":
                folders.append(key)
            else:
                file_names.append(key)

        next_token = response.get("NextContinuationToken")

    return file_names, folders

def download_files(s3_client, bucket_name, local_path, file_names, folders):
    local_path = Path(local_path)

    for folder in tqdm(folders):
        folder_path = Path.joinpath(local_path, folder)
				# Create all folders in the path
        folder_path.mkdir(parents=True, exist_ok=True)

    for file_name in tqdm(file_names):
        file_path = Path.joinpath(local_path, file_name)
				# Create folder for parent directory
        file_path.parent.mkdir(parents=True, exist_ok=True)
        s3_client.download_file(
            bucket_name,
            file_name,
            str(file_path)
        )

data_path = 'data/input/images'
if not(os.path.exists(os.path.join(os.getcwd(), data_path))):
    client = boto3.client('s3', config=Config(signature_version=UNSIGNED))
    file_names, folders = get_file_folders(client, 'cgiar-crop-damage-classification-challenge')
    download_files(
        client,
        'cgiar-crop-damage-classification-challenge',
        "/content/drive/MyDrive/Personal-Projects/crop-damage-classification/data/input",
        file_names,
        folders
    )

In [8]:
# initialize directories and config data
init_config()
config = get_config()
print('Config parameters\n')
print(config)

Config parameters

{'X_key': 'image', 'data_dir': '/content/drive/MyDrive/Personal-Projects/crop-damage-classification/data', 'img_dir': '/content/drive/MyDrive/Personal-Projects/crop-damage-classification/data/input/images', 'output_dir': '/content/drive/MyDrive/Personal-Projects/crop-damage-classification/output', 'root_dir': '/content/drive/MyDrive/Personal-Projects/crop-damage-classification', 'use_gpu': True, 'y_key': 'label'}


In [9]:
# read experiment parameters
exp_params = get_exp_params()
print('Experiment parameters\n')
print(exp_params)

Experiment parameters

{'transform': {'resize_dim': 256, 'crop_dim': 224}, 'train': {'shuffle_data': True, 'batch_size': 128, 'val_split_method': 'k-fold', 'k': 2, 'val_percentage': 20, 'loss': 'cross-entropy', 'batch_interval': 512, 'epoch_interval': 1, 'num_epochs': 1}, 'model': {'name': 'resnet18', 'optimizer': 'Adam', 'lr': 0.001, 'weight_decay': 1e-05, 'amsgrad': False, 'momentum': 0.9}, 'test_model': False}


In [10]:
#initialize randomness seed
seed = 123
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True

In [11]:
#preprocess data or load preprocessed data

#build label dict
label_dict = {
    'DR': 0,
    'G': 1,
    'ND': 2,
    'WD': 3,
    'other': 4
}

class_dict = {
    0: 'DR',
    1: 'G',
    2: 'ND',
    3: 'WD',
    4: 'other'
}

In [12]:
#save X_key and y_key
save2config('X_key', 'image')
save2config('y_key', 'label')

#transform data
data_transforms = transforms.Compose([ToTensor(), Resize(exp_params['transform']['resize_dim']), CenterCrop(exp_params['transform']['crop_dim'])])

#convert to dataset
ftr_dataset = CropDataset('input/Train.csv', label_dict, False, transforms=data_transforms)
test_dataset = CropDataset('input/Test.csv', label_dict, True, transforms=data_transforms)
smlen = int(0.05 * len(ftr_dataset))
smftr_dataset = torch.utils.data.Subset(ftr_dataset, list(range(smlen)))
print('Full train dataset length:', len(ftr_dataset))
print('Test dataset length:', len(test_dataset))
print('Subset train dataset length:', smlen, '\n')



Full train dataset length: 26068
Test dataset length: 8663
Subset train dataset length: 1303 



In [13]:
#model import

if exp_params['model']['name'] == 'resnet18':
    model = Resnet18(5, False)
else:
    raise SystemExit("Error: Invalid model name passed! Check run.yaml")


In [None]:
#running experiment on small subset of the dataset
exp = Experiment(model, smftr_dataset)
model_info = exp.train()
print("\nModel validation results")

#visualization results
vis = Visualization(model_info)
vis.get_results()

In [None]:
'''
#model training on full dataset
exp = Experiment(model, ftr_dataset)
model_info = exp.train()
print("\nModel validation results")

#visualization results
vis = Visualization(model_info)
vis.get_results()
'''

In [17]:
#running experiment on small subset of the dataset
exp = Experiment(model, smftr_dataset)

In [19]:
#model testing
print("Testing Best Model")
exp.test(model, test_dataset, class_dict)
#print("\nTesting Last Model")
#exp.test(model_info["last_model"], test_dataset, label_dict)

Testing Best Model
Running through test dataset




	Running through batch 0


  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 1


  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 2


  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 3


  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 4


  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 5


  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 6


  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 7


  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 8


  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 9


  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 10


  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 11


  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 12


  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 13


  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 14


  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 15


  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 16


  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 17


  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 18


  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 19


  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 20


  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 21


  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 22


  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 23


  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 24


  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 25


  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 26


  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 27


  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 28


  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 29


  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 30


  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 31


  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 32


  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 33


  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 34


  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 35


  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 36


  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 37


  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 38


  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 39


  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 40


  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 41


  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 42


  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 43


  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 44


  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 45


  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 46


  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 47


  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 48


  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 49


  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 50


  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 51


  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 52


  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 53


  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 54


  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 55


  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 56


  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 57


  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 58


  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 59


  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 60


  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 61


  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 62


  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 63


  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 64


  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 65


  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 66


  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 67


  results_df = pd.concat([results_df, batch_df], 0)
