In [2]:
#mount drive
from google.colab import drive
drive.mount('/content/drive')
!ls

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
drive  sample_data


In [3]:
# move into project directory
repo_name = "crop-damage-classification"
%cd /content/drive/MyDrive/Personal-Projects/$repo_name
!ls

/content/drive/MyDrive/Personal-Projects/crop-damage-classification
common	     dataloading  Index.ipynb  output		     README.md	 visualization
config.yaml  experiments  index.py     preprocess	     run.yaml
data	     Index_bc.py  models       project-structure.md  transforms


In [4]:
# set up environment
# comment if not required
'''
!pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
!pip install matplotlib numpy pandas pyyaml opencv-python
'''

'\n!pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118\n!pip install matplotlib numpy pandas pyyaml opencv-python\n'

# Following cells are for downloading data

In [5]:
# this cell is for downloading data.
# as of yet data is not hosted and is available in the private data folder
# comment if not required
!pip install boto3
!pip install tqdm

Collecting boto3
  Downloading boto3-1.34.16-py3-none-any.whl (139 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m139.3/139.3 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting botocore<1.35.0,>=1.34.16 (from boto3)
  Downloading botocore-1.34.16-py3-none-any.whl (11.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.9/11.9 MB[0m [31m34.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting jmespath<2.0.0,>=0.7.1 (from boto3)
  Downloading jmespath-1.0.1-py3-none-any.whl (20 kB)
Collecting s3transfer<0.11.0,>=0.10.0 (from boto3)
  Downloading s3transfer-0.10.0-py3-none-any.whl (82 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m82.1/82.1 kB[0m [31m12.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: jmespath, botocore, s3transfer, boto3
Successfully installed boto3-1.34.16 botocore-1.34.16 jmespath-1.0.1 s3transfer-0.10.0


In [4]:
# setup some imports
#custom imports
from transforms.transforms import ToTensor, Resize, CenterCrop
from dataloading.dataset import CropDataset
from common.utils import get_exp_params, init_config, get_config, save2config, get_saved_model
from models.resnet18 import Resnet18
from experiments.experiments import Experiment
from visualization.visualization import Visualization

#py imports
import random
import numpy as np
import os
import torch
from torchvision import transforms
from torch.utils.data import DataLoader

In [6]:
import boto3
from pathlib import Path
from botocore import UNSIGNED
from botocore.client import Config
from tqdm.notebook import tqdm

def get_file_folders(s3_client, bucket_name, prefix=""):
    file_names = []
    folders = []

    default_kwargs = {
        "Bucket": bucket_name,
        "Prefix": prefix
    }
    next_token = ""

    while next_token is not None:
        updated_kwargs = default_kwargs.copy()
        if next_token != "":
            updated_kwargs["ContinuationToken"] = next_token

        response = s3_client.list_objects_v2(**updated_kwargs)
        contents = response.get("Contents")

        for result in contents:
            key = result.get("Key")
            if key[-1] == "/":
                folders.append(key)
            else:
                file_names.append(key)

        next_token = response.get("NextContinuationToken")

    return file_names, folders

def download_files(s3_client, bucket_name, local_path, file_names, folders):
    local_path = Path(local_path)

    for folder in tqdm(folders):
        folder_path = Path.joinpath(local_path, folder)
				# Create all folders in the path
        folder_path.mkdir(parents=True, exist_ok=True)

    for file_name in tqdm(file_names):
        file_path = Path.joinpath(local_path, file_name)
				# Create folder for parent directory
        file_path.parent.mkdir(parents=True, exist_ok=True)
        s3_client.download_file(
            bucket_name,
            file_name,
            str(file_path)
        )

data_path = 'data/input/images'
if not(os.path.exists(os.path.join(os.getcwd(), data_path))):
    client = boto3.client('s3', config=Config(signature_version=UNSIGNED))
    file_names, folders = get_file_folders(client, 'cgiar-crop-damage-classification-challenge')
    download_files(
        client,
        'cgiar-crop-damage-classification-challenge',
        "/content/drive/MyDrive/Personal-Projects/crop-damage-classification/data/input",
        file_names,
        folders
    )

In [7]:
# initialize directories and config data
init_config()
config = get_config()
print('Config parameters\n')
print(config)

Config parameters

{'X_key': 'image', 'data_dir': '/content/drive/MyDrive/Personal-Projects/crop-damage-classification/data', 'img_dir': '/content/drive/MyDrive/Personal-Projects/crop-damage-classification/data/input/images', 'output_dir': '/content/drive/MyDrive/Personal-Projects/crop-damage-classification/output', 'root_dir': '/content/drive/MyDrive/Personal-Projects/crop-damage-classification', 'use_gpu': True, 'y_key': 'label'}


In [8]:
# read experiment parameters
exp_params = get_exp_params()
print('Experiment parameters\n')
print(exp_params)

Experiment parameters

{'transform': {'resize_dim': 256, 'crop_dim': 224}, 'train': {'shuffle_data': True, 'batch_size': 128, 'val_split_method': 'k-fold', 'k': 5, 'val_percentage': 20, 'loss': 'cross-entropy', 'batch_interval': 512, 'epoch_interval': 1, 'num_epochs': 10}, 'model': {'name': 'resnet18', 'optimizer': 'Adam', 'lr': 0.001, 'weight_decay': 1e-05, 'amsgrad': False, 'momentum': 0.9}, 'test_model': False}


In [9]:
#initialize randomness seed
seed = 123
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True

In [10]:
#preprocess data or load preprocessed data

#build label dict
label_dict = {
    'DR': 0,
    'G': 1,
    'ND': 2,
    'WD': 3,
    'other': 4
}

class_dict = {
    0: 'DR',
    1: 'G',
    2: 'ND',
    3: 'WD',
    4: 'other'
}

In [11]:
#save X_key and y_key
save2config('X_key', 'image')
save2config('y_key', 'label')

#transform data
data_transforms = transforms.Compose([ToTensor(),
                                      Resize(exp_params['transform']['resize_dim']),
                                      CenterCrop(exp_params['transform']['crop_dim'])])

#convert to dataset
ftr_dataset = CropDataset('input/Train.csv', label_dict, False, transforms=data_transforms)
test_dataset = CropDataset('input/Test.csv', label_dict, True, transforms=data_transforms)
smlen = int(0.05 * len(ftr_dataset))
smftr_dataset = torch.utils.data.Subset(ftr_dataset, list(range(smlen)))
print('Full train dataset length:', len(ftr_dataset))
print('Test dataset length:', len(test_dataset))
print('Subset train dataset length:', smlen, '\n')



Full train dataset length: 26068
Test dataset length: 8663
Subset train dataset length: 1303 



In [12]:
#model import

if exp_params['model']['name'] == 'resnet18':
    model = Resnet18(5, False)
else:
    raise SystemExit("Error: Invalid model name passed! Check run.yaml")


In [None]:
'''
# define experiment
exp = Experiment(model, smftr_dataset)

#running experiment on small subset of the dataset
model_info = exp.train()
print("\nModel validation results")

#visualization results
vis = Visualization(model_info)
vis.get_results()

exp.save_model(model_info["best_model"], model_info, "best_model")
'''

In [13]:
#model training on full dataset
exp = Experiment(model, ftr_dataset)
model_info = exp.train()
print("\nModel validation results")

#visualization results
vis = Visualization(model_info)
vis.get_results()

exp.save_model(model_info["best_model"], model_info, "best_model")

Running split 0
	Running Epoch 0
		Running through training dataset




KeyboardInterrupt: 

In [10]:
model = get_saved_model(model, '')

#model testing
print("Testing Best Model")
exp.test(model, test_dataset, class_dict)


Testing Best Model
Running through test dataset




	Running through batch 0
	Running through batch 1
	Running through batch 2
	Running through batch 3
	Running through batch 4
	Running through batch 5
	Running through batch 6
	Running through batch 7
	Running through batch 8
	Running through batch 9
	Running through batch 10
	Running through batch 11
	Running through batch 12
	Running through batch 13
	Running through batch 14
	Running through batch 15
	Running through batch 16
	Running through batch 17
	Running through batch 18
	Running through batch 19
	Running through batch 20
	Running through batch 21
	Running through batch 22
	Running through batch 23
	Running through batch 24
	Running through batch 25
	Running through batch 26
	Running through batch 27
	Running through batch 28
	Running through batch 29
	Running through batch 30
	Running through batch 31
	Running through batch 32
	Running through batch 33
	Running through batch 34
	Running through batch 35
	Running through batch 36
	Running through batch 37
	Running through batch

  op = F.softmax(model(batch[self.X_key].float()))
  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 65


  op = F.softmax(model(batch[self.X_key].float()))
  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 66


  op = F.softmax(model(batch[self.X_key].float()))
  results_df = pd.concat([results_df, batch_df], 0)


	Running through batch 67


  op = F.softmax(model(batch[self.X_key].float()))
  results_df = pd.concat([results_df, batch_df], 0)
