# This colab notebook must be run on a **P100** GPU instance otherwise it will crash. Use the Cell-1 to ensure that it has a **P100** GPU instance 

Cell-1: Ensure the required gpu instance (P100)

In [1]:
#no.of sockets i.e available slots for physical processors
!lscpu | grep 'Socket(s):'
#no.of cores each processor is having 
!lscpu | grep 'Core(s) per socket:'
#no.of threads each core is having
!lscpu | grep 'Thread(s) per core'
#GPU count and name
!nvidia-smi -L
#use this command to see GPU activity while doing Deep Learning tasks, for this command 'nvidia-smi' and for above one to work, go to 'Runtime > change runtime type > Hardware Accelerator > GPU'
!nvidia-smi

Socket(s):           1
Core(s) per socket:  1
Thread(s) per core:  2
GPU 0: Tesla P100-PCIE-16GB (UUID: GPU-e754b97b-d438-6a96-7c88-b140c0ab1ceb)
Wed Oct  7 08:21:59 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 455.23.05    Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   34C    P0    25W / 250W |      0MiB / 16280MiB |      0%      Default |
|                               |                      |                 ERR! |
+-------------------------------+----------------------+----------------------+
                      

Cell-2: Add Google Drive

In [2]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


Cell-3: Install Required Dependencies

In [3]:
!pip install efficientnet_pytorch==0.7.0 
!pip install albumentations==0.4.5
!pip install torch==1.6.0+cu101 torchvision==0.7.0+cu101 -f https://download.pytorch.org/whl/torch\_stable.html -q\

Collecting efficientnet_pytorch==0.7.0
  Downloading https://files.pythonhosted.org/packages/4e/83/f9c5f44060f996279e474185ebcbd8dbd91179593bffb9abe3afa55d085b/efficientnet_pytorch-0.7.0.tar.gz
Building wheels for collected packages: efficientnet-pytorch
  Building wheel for efficientnet-pytorch (setup.py) ... [?25l[?25hdone
  Created wheel for efficientnet-pytorch: filename=efficientnet_pytorch-0.7.0-cp36-none-any.whl size=16031 sha256=3023528227dbd64561a47d427395b2e7a969dc5248cbd7265fafca0e07a25db8
  Stored in directory: /root/.cache/pip/wheels/e9/c6/e1/7a808b26406239712cfce4b5ceeb67d9513ae32aa4b31445c6
Successfully built efficientnet-pytorch
Installing collected packages: efficientnet-pytorch
Successfully installed efficientnet-pytorch-0.7.0
Collecting albumentations==0.4.5
[?25l  Downloading https://files.pythonhosted.org/packages/8d/40/a343ecacc7e22fe52ab9a16b84dc6165ba05ee17e3729adeb3e2ffa2b37b/albumentations-0.4.5.tar.gz (116kB)
[K     |████████████████████████████████| 122k

Cell-4: Run this cell to generate current fold weight ( Estimated Time for training this fold is around 1 hour 50 minutes )

In [None]:
import sys
sys.path.insert(0, "/content/gdrive/My Drive/zindi_cgiar_wheat_growth_stage_challenge/src_lq2")

from dataset import *
from model import *
from trainer import *
from utils import *

import numpy as np
from sklearn.model_selection import StratifiedKFold
from torch.utils.data import DataLoader

config = {
    'n_folds': 5,
    'random_seed': 5400,
    'run_fold': 3,
    'model_name': 'efficientnet-b1', 
    'global_dim': 1280,
    'batch_size': 92,
    'n_core': 2,
    'weight_saving_path': '/content/gdrive/My Drive/zindi_cgiar_wheat_growth_stage_challenge/train_lq2_only_effnet_b1_step1/weights/',
    'resume_checkpoint_path': None,
    'lr': 0.01,
    'total_epochs': 100,
    }


if __name__ == '__main__':
    set_random_state(config['random_seed']) 
    
    imgs = np.load('/content/gdrive/My Drive/zindi_cgiar_wheat_growth_stage_challenge/zindi_npy_data/train_imgs.npy')
    labels = np.load('/content/gdrive/My Drive/zindi_cgiar_wheat_growth_stage_challenge/zindi_npy_data/train_labels.npy')
    labels_quality = np.load('/content/gdrive/My Drive/zindi_cgiar_wheat_growth_stage_challenge/zindi_npy_data/train_labels_quality.npy')

    imgs = imgs[labels_quality == 2]
    labels = labels[labels_quality == 2]
    labels = labels - 1 

    skf = StratifiedKFold(n_splits=config['n_folds'], shuffle=True, random_state=config['random_seed'])    
    for fold_number, (train_index, val_index) in enumerate(skf.split(X=imgs, y=labels)):
        if fold_number != config['run_fold']:
            continue
                        
        train_dataset = ZCDataset(
                            imgs[train_index],
                            labels[train_index],
                            transform=get_train_transforms(),
                            test=False,
                            )               
        train_loader = DataLoader(
                            train_dataset, 
                            batch_size=config['batch_size'], 
                            shuffle=True, 
                            num_workers=config['n_core'],
                            drop_last=True,
                            pin_memory=True,        
                            )

        val_dataset = ZCDataset(
                            imgs[val_index],
                            labels[val_index],
                            transform=get_val_transforms(),                          
                            test=True,
                            )        
        val_loader = DataLoader(
                            val_dataset, 
                            batch_size=config['batch_size'], 
                            shuffle=False, 
                            num_workers=config['n_core'],
                            pin_memory=True,
                            )
        
        del imgs, labels

        model = CNN_Model(config['model_name'], config['global_dim'])        
        
        args = { 
                'model': model,
                'Loaders': [train_loader,val_loader],
                'metrics': {'Loss':AverageMeter, 'f1_score':PrintMeter, 'rmse':PrintMeter},                       
                'checkpoint_saving_path': config['weight_saving_path'],
                'resume_train_from_checkpoint': False,
                'resume_checkpoint_path': config['resume_checkpoint_path'],
                'lr': config['lr'],
                'fold': fold_number,
                'epochsTorun': config['total_epochs'],
                'batch_size': config['batch_size'],
                'test_run_for_error': False,
                'problem_name': 'zindi_cigar',
                }         
        Trainer = ModelTrainer(**args)
        Trainer.fit()        

Downloading: "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b1-f1951068.pth" to /root/.cache/torch/hub/checkpoints/efficientnet-b1-f1951068.pth


HBox(children=(FloatProgress(value=0.0, max=31519111.0), HTML(value='')))


Loaded pretrained weights for efficientnet-b1


	add_(Number alpha, Tensor other)
Consider using one of the following signatures instead:
	add_(Tensor other, *, Number alpha) (Triggered internally at  /pytorch/torch/csrc/utils/python_arg_parser.cpp:766.)
  exp_avg.mul_(beta1).add_(1 - beta1, grad)
(Train) Fold 3 Epoch 1/100: : 25 batches [00:44,  1.78s/ batches, TrainLoss=3.1356, Trainf1_score=0.0000, Trainrmse=0.0000]
(Valid) Fold 3 Epoch 1/100:   0%|          | 0/7 [00:00<?, ? batches/s]




(Valid) Fold 3 Epoch 1/100: : 8 batches [00:02,  2.82 batches/s, ValLoss=3.0627, Valf1_score=0.0060, Valrmse=3.0329]
(Train) Fold 3 Epoch 2/100:   0%|          | 0/24 [00:00<?, ? batches/s]


 Val Loss is improved from 9999.0000 to 3.0627! 
 Val f1 score is improved from -9999.0000 to 0.0060! 


(Train) Fold 3 Epoch 2/100:  46%|████▌     | 11/24 [00:18<00:22,  1.71s/ batches, TrainLoss=3.0143, Trainf1_score=0.0000, Trainrmse=0.0000]