# Upload Data and Import Libraries

In [1]:
from zipfile import ZipFile 
# specifying the zip file name 
file_name = "/content/drive/My Drive/ZINDI_CGIAR/data.zip"
# opening the zip file in READ mode 
with ZipFile(file_name, 'r') as zi: 
    # extracting all the files 
    print('Extracting all the files now...') 
    zi.extractall() 
    print('Done!')

Extracting all the files now...
Done!


In [19]:
!nvidia-smi

Tue Oct  6 18:24:15 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 455.23.05    Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   71C    P0    31W /  70W |   3623MiB / 15079MiB |      0%      Default |
|                               |                      |                 ERR! |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [2]:
!cp "/content/drive/My Drive/ZINDI_CGIAR/Train.csv" . 
!cp "/content/drive/My Drive/ZINDI_CGIAR/SampleSubmission.csv" .

In [3]:
!pip install efficientnet_pytorch

Collecting efficientnet_pytorch
  Downloading https://files.pythonhosted.org/packages/4e/83/f9c5f44060f996279e474185ebcbd8dbd91179593bffb9abe3afa55d085b/efficientnet_pytorch-0.7.0.tar.gz
Building wheels for collected packages: efficientnet-pytorch
  Building wheel for efficientnet-pytorch (setup.py) ... [?25l[?25hdone
  Created wheel for efficientnet-pytorch: filename=efficientnet_pytorch-0.7.0-cp36-none-any.whl size=16031 sha256=180528042b6b8b4cea7190394177bdce9e6e1ea734aa1eefc852d3acebf2e83e
  Stored in directory: /root/.cache/pip/wheels/e9/c6/e1/7a808b26406239712cfce4b5ceeb67d9513ae32aa4b31445c6
Successfully built efficientnet-pytorch
Installing collected packages: efficientnet-pytorch
Successfully installed efficientnet-pytorch-0.7.0


In [4]:
import os
from PIL import Image
from sklearn.model_selection import train_test_split
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, Dataset
import torch.nn as nn
from torchvision import transforms
import torch
import torchvision
import torchvision.models as models
from efficientnet_pytorch import EfficientNet
from torch.optim.lr_scheduler import MultiStepLR
from torch.optim.lr_scheduler import OneCycleLR
import pandas as pd 
import numpy as np
import sklearn
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, roc_auc_score
from tqdm import tqdm_notebook as tqdm 
from sklearn.model_selection import train_test_split
import albumentations
from albumentations import torch as AT
import cv2

In [20]:
import random
import numpy as np
SEED_VAL  = 1000
# Set the seed value all over the place to make this reproducible.
def seed_all(SEED):
  random.seed(SEED_VAL)
  np.random.seed(SEED_VAL)
  torch.manual_seed(SEED_VAL)
  torch.cuda.manual_seed_all(SEED_VAL)
  os.environ['PYTHONHASHSEED'] = str(SEED_VAL)
  torch.backends.cudnn.deterministic = True
  torch.backends.cudnn.benchmark = False

# Preparing Dataset 

In [6]:
def cv_reader(path):
  img = cv2.imread(path)
  img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
  return img

class ImageDataset(Dataset):
    def __init__(self, data_path, df, transform=None,mode='train'):
        self.df = df
        self.loader = cv_reader
        self.transform = transform
        self.dir = data_path
        self.mode = mode

    def __getitem__(self, index):
        image_name = self.df.image_name[index]
        image = self.loader(os.path.join(self.dir, image_name+'.jpeg'))
        if self.transform is not None:
          image = self.transform(image=image)
          image = image['image']
        
        

        if self.mode == 'train':
            label = self.df.target[index]
            return {'image' : torch.tensor(image,dtype=torch.float), 
                'label' : torch.tensor(label,dtype = torch.float) }
            
        return {'image' : torch.tensor(image,dtype=torch.float), 
}            
        
    
    def __len__(self):
        return self.df.shape[0]

In [7]:
class Net(nn.Module):
    def __init__(self,name):
        super(Net, self).__init__()
        self.name  = name
        if name == 'b0':
          self.arch =  EfficientNet.from_pretrained('efficientnet-b0')
          self.arch._fc = nn.Linear(in_features=1280, out_features=1, bias=True)
        elif name == 'b1':
          self.arch =  EfficientNet.from_pretrained('efficientnet-b1')
          self.arch._fc = nn.Linear(in_features=1280, out_features=1, bias=True)
        elif name == 'b2':
          self.arch =  EfficientNet.from_pretrained('efficientnet-b2')
          self.arch._fc = nn.Linear(in_features=1408, out_features=1, bias=True)
        elif name =='b3':
          self.arch =  EfficientNet.from_pretrained('efficientnet-b3')
          self.arch._fc = nn.Linear(in_features=1536, out_features=1, bias=True)
        elif name =='b4':
          self.arch =  EfficientNet.from_pretrained('efficientnet-b4')
          self.arch._fc = nn.Linear(in_features=1792, out_features=1, bias=True)
        elif name =='b5':
          self.arch =  EfficientNet.from_pretrained('efficientnet-b5')
          self.arch._fc = nn.Linear(in_features=2048, out_features=1, bias=True)
        elif name =='b6':
          self.arch =  EfficientNet.from_pretrained('efficientnet-b6')
          self.arch._fc = nn.Linear(in_features=2304, out_features=1, bias=True)
        elif name =='b7':
          self.arch =  EfficientNet.from_pretrained('efficientnet-b7')
          self.arch._fc = nn.Linear(in_features=2560, out_features=1, bias=True)
        elif name == 'densenet121':
          self.arch = models.densenet121(pretrained=True)
          num_ftrs = self.arch.classifier.in_features
          self.arch.classifier = nn.Linear(num_ftrs,1,bias=True)
        elif name == 'densenet161':
          self.arch = models.densenet161(pretrained=True)
          num_ftrs = self.arch.classifier.in_features
          self.arch.classifier = nn.Linear(num_ftrs,1,bias=True)
        elif name == 'densenet169':
          self.arch = models.densenet169(pretrained=True)
          num_ftrs = self.arch.classifier.in_features
          self.arch.classifier = nn.Linear(num_ftrs,1,bias=True)
        elif name == 'densenet201':
          self.arch = models.densenet201(pretrained=True)
          num_ftrs = self.arch.classifier.in_features
          self.arch.classifier = nn.Linear(num_ftrs,1,bias=True)
        elif name == 'resnet50':
          self.arch = models.resnet50(pretrained=True)
          num_ftrs = self.arch.fc.in_features
          self.arch.fc = nn.Linear(num_ftrs,1,bias=True)
        elif name == 'resnet101':
          self.arch = models.resnet101(pretrained=True)
          num_ftrs = self.arch.fc.in_features
          self.arch.fc = nn.Linear(num_ftrs,1,bias=True)
        elif name == 'resnet152':
          self.arch = models.resnet152(pretrained=True)
          num_ftrs = self.arch.fc.in_features
          self.arch.fc = nn.Linear(num_ftrs,1,bias=True)
        elif name == 'resnet34':
          self.arch = models.resnet34(pretrained=True)
          num_ftrs = self.arch.fc.in_features
          self.arch.fc = nn.Linear(num_ftrs,1,bias=True)
        elif name == 'resnext101':
          self.arch = models.resnext101_32x8d(pretrained=True)
          self.arch.drop_out = nn.Dropout(p=0.2)
          num_ftrs = self.arch.fc.in_features
          self.arch.fc = nn.Linear(num_ftrs,1,bias=True)
        elif name == 'resnext50':
          self.arch = models.resnext50_32x4d(pretrained=True)
          self.arch.drop_out = nn.Dropout(p=0.2)
          num_ftrs = self.arch.fc.in_features
          self.arch.fc = nn.Linear(num_ftrs,1,bias=True)
        elif name == 'mobilenetv2':
          self.arch = models.mobilenet_v2(pretrained=True)
          self.arch.classifier = nn.Sequential(
              nn.Dropout(p=0.1, inplace=False),
              nn.Linear(in_features=1280, out_features=1, bias=True),
          )

        elif name == 'alexnet':
          self.arch = models.alexnet(pretrained=False)
          self.arch.classifier = nn.Sequential(
              
                  nn.Dropout(p=0.5, inplace=False),
                  nn.Linear(in_features=9216, out_features=1, bias=True)
                                                )

        elif name =='rexnetv1':
            model = rexnetv1.ReXNetV1(width_mult=1.0)
            model.output.conv2D = nn.Conv2d(1280, 1, kernel_size=(1, 1), stride=(1, 1))
          
    def forward(self, x):
        """
        """
        x = self.arch(x)
        return x

In [8]:
class AverageMeter():
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


In [9]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [10]:
def loss_fn_mse(outputs,targets):
  criterion = nn.MSELoss()
  loss = criterion(outputs,targets)
  return loss 

In [11]:
def loss_fn_mae(outputs,targets):
  criterion = nn.L1Loss()
  loss = criterion(outputs,targets)
  return loss 

In [12]:
def loss_fn_smoothl1(outputs,targets):
  criterion = nn.SmoothL1Loss()
  loss = criterion(outputs,targets)
  return loss 

In [13]:
def train_fn(train_data_loader,model,optimizer,device,scheduler = None):
  model.train()
  losses = AverageMeter()
  tk0 = tqdm(train_data_loader, total=len(train_data_loader))
  tot_loss = 0
  for bi,d in enumerate(tk0):
    images = d['image']
    labels = d['label']

    #send them to device 
    images = images.to(device,dtype=torch.float)
    labels = labels.to(device,dtype=torch.float)
    optimizer.zero_grad()

    outputs  = model(images)
    
    if LOSS == 'MSE':
      loss = loss_fn_mse(outputs,labels.unsqueeze(1))
      loss.backward()
      optimizer.step()
      tot_loss = tot_loss + loss.item()
      losses.update(loss.item(), labels.size(0))
      tk0.set_postfix(loss_mse=losses.avg)
    elif LOSS == 'MAE':
      loss = loss_fn_mae(outputs,labels.unsqueeze(1))
      loss.backward()
      optimizer.step()
      tot_loss = tot_loss + loss.item()
      losses.update(loss.item(), labels.size(0))
      tk0.set_postfix(loss_mae=losses.avg)
    elif LOSS == 'SMOOTH_L1':
      loss = loss_fn_smoothl1(outputs,labels.unsqueeze(1))
      loss.backward()
      optimizer.step()
      tot_loss = tot_loss + loss.item()
      losses.update(loss.item(), labels.size(0))
      tk0.set_postfix(loss_smooth_l1=losses.avg)

    if scheduler is not None:
      scheduler.step()
  return losses.avg
  

In [14]:
def eval_fn(valid_data_loader,model,device):
  model.eval()
  tot_loss = 0
  final_outputs = []
  final_targets = []
  with torch.no_grad():
    for bi,d in enumerate(valid_data_loader):

      images = d['image']
      labels = d['label']

      #send them to device 
      images = images.to(device,dtype=torch.float)
      labels = labels.to(device,dtype=torch.float)
     

      outputs  = model(images)

      if LOSS == 'MSE':
        loss = loss_fn_mse(outputs,labels.unsqueeze(1))
        tot_loss = tot_loss + loss.item()
        
      elif LOSS == 'MAE':
        loss = loss_fn_mae(outputs,labels.unsqueeze(1))
        tot_loss = tot_loss + loss.item()
      elif LOSS == 'SMOOTH_L1':
        loss = loss_fn_smoothl1(outputs,labels.unsqueeze(1))
        tot_loss = tot_loss + loss.item()

      
      final_outputs.append(outputs.cpu().detach().numpy())
      final_targets.append(labels.cpu().numpy())


    final_targets = np.concatenate(final_targets)
    final_outputs = np.concatenate(final_outputs)
    final_outputs = final_outputs.reshape(final_targets.shape )
    mean_loss_val  = tot_loss/len(valid_data_loader)
    rmse_score = np.sqrt(((final_targets - final_outputs) ** 2).mean())

    print(f"Validation loss {LOSS} for this epoch: ",mean_loss_val)
    print('Validation rmse for this epoch',rmse_score)
  return rmse_score


In [15]:
input_shape = [128,256]
def get_transforms():
  val_transform = albumentations.Compose([
      albumentations.Resize(input_shape[0],input_shape[1]),
      albumentations.Normalize(),
      AT.ToTensor()
      ])
  train_transform = albumentations.Compose([
      albumentations.Resize(input_shape[0],input_shape[1]),
      albumentations.Normalize(),
      AT.ToTensor()
      ])
  return train_transform,val_transform

## Stratfied Kfolds

In [16]:
NAME = 'resnext50'
EPOCHS = 20
TRAIN_BATCH_SIZE = 32
LR = 3e-4
LOSS = 'MSE'
skf = StratifiedKFold(n_splits=6,random_state=SEED_VAL)

all_rmse_scores = []
def run_folds():
  seed_all(SEED_VAL)
  train_transform,val_transform = get_transforms()
  train = pd.read_csv('Train.csv')
  train = train[train.label_quality == 2]
  train = train.rename(columns = {'UID':'image_name','growth_stage':'target'})
  DF = pd.DataFrame()
  for i,(train_index,val_index) in enumerate(skf.split(train,y=train.target)):
    print(f"#########################  Fold {i+1}/{skf.n_splits}  #########################")
    train_df , valid_df = train.iloc[train_index,:],train.iloc[val_index,:]
    train_df = train_df.reset_index()
    valid_df = valid_df.reset_index()
    train_dataset = ImageDataset('Images',train_df,train_transform)
    valid_dataset = ImageDataset('Images',valid_df,val_transform)
    train_data_loader = DataLoader(dataset=train_dataset,shuffle=True,batch_size=TRAIN_BATCH_SIZE)
    valid_data_loader = DataLoader(dataset=valid_dataset,shuffle=False,batch_size=16)
    device = torch.device("cuda")
    model = Net(NAME)
    model.to(device)
    num_train_steps = int(len(train_df) /TRAIN_BATCH_SIZE * EPOCHS )
    optimizer = torch.optim.AdamW(model.parameters(), lr=LR)
    best_rmse = 1500
    
    for epoch in range(EPOCHS):
      print("----------------EPOCH "+str(epoch+1)+"---------------------")
      rmse_train = train_fn(train_data_loader, model, optimizer, device,scheduler=None)
      rmse_val = eval_fn(valid_data_loader ,model, device)
      if rmse_val<best_rmse:
        best_rmse = rmse_val 
        torch.save(model.state_dict(),f"best_model_{i}") 
    print(f'best VAL_RMSE for fold {i+1}: ',best_rmse)
    DF = DF.append({'FODL':1+i,'BEST_VAL_RMSE':best_rmse},ignore_index=True)
    all_rmse_scores.append(best_rmse)
  print(f"MEAN RMSE: {np.mean(all_rmse_scores)}")
  return DF



In [None]:
kfold_results = run_folds()

In [None]:
kfold_results

# Predicting

In [None]:
test_transform = albumentations.Compose([
      albumentations.Resize(input_shape[0],input_shape[1]),
      albumentations.Normalize(),
      AT.ToTensor()
      ])
  
test = pd.read_csv('SampleSubmission.csv')
test.rename(columns={'UID':'image_name'},inplace=True)
test_dataset = ImageDataset('Images',test,test_transform,mode='test')
test_data_loader = DataLoader(dataset=test_dataset,shuffle=False,batch_size=32)

In [None]:
all_folds = []
for i in range(skf.n_splits):
  best_model = Net(NAME)
  best_model.load_state_dict(torch.load(f'best_model_{i}'))
  best_model.to(device)
  best_model.eval()
  final_outputs = []
  with torch.no_grad():
    tk0 = tqdm(test_data_loader, total=len(test_data_loader))
    for bi,d in enumerate(tk0):
      images = d['image']
      #send them to device 
      images = images.to(device,dtype=torch.float)
      outputs  = best_model(images)
      final_outputs.append(outputs.cpu().detach().numpy())
  final_outputs = np.concatenate(final_outputs)
  all_folds.append(final_outputs)

In [None]:
ss = pd.read_csv('/content/SampleSubmission.csv')

In [None]:
ss['growth_stage'] = np.mean(all_folds,axis=0)
ss['growth_stage'] = ss['growth_stage'].clip(1,7)
ss.head()

In [None]:
ss.to_csv(f'{NAME}_highQ_floats_Folds={skf.n_splits}input_shape=[{input_shape[0]}X{input_shape[1]}]_lr={LR}_epochs={EPOCHS}_bs={TRAIN_BATCH_SIZE}_SEED={SEED_VAL}.csv',index=False)