In [1]:
import numpy as np 
import pandas as pd 
import shutil
import os
import zipfile
import torch
import torch.nn as nn
import cv2
import matplotlib.pyplot as plt
import torchvision
from torch.utils.data import Dataset, DataLoader, ConcatDataset
from torch.utils.data.sampler import SubsetRandomSampler
from torchvision import transforms
import copy
import tqdm
from PIL import Image
from albumentations import pytorch as AT
import albumentations as A
import torchvision.datasets as dataset
from google.colab.patches import cv2_imshow
from sklearn.model_selection import train_test_split
from torch.utils.data import Subset
import natsort
!pip install -U skorch
%matplotlib inline
import time

Collecting skorch
[?25l  Downloading https://files.pythonhosted.org/packages/18/c7/2f6434f9360c91a4bf14ae85f634758e5dacd3539cca4266a60be9f881ae/skorch-0.9.0-py3-none-any.whl (125kB)
[K     |████████████████████████████████| 133kB 5.9MB/s 
Installing collected packages: skorch
Successfully installed skorch-0.9.0


In [2]:
!nvidia-smi

Sun Mar 21 13:22:56 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.56       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla K80           Off  | 00000000:00:04.0 Off |                    0 |
| N/A   64C    P8    31W / 149W |      0MiB / 11441MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [3]:

import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import random_split

from skorch import NeuralNetClassifier
from skorch.dataset import CVSplit
from skorch.callbacks import LRScheduler, Checkpoint 
from skorch.callbacks import Freezer, EarlyStopping

from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.model_selection import cross_val_score

# for multiprocessing
import multiprocessing as mp


In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [13]:
train_dir='/content/drive/MyDrive/dataset/dataset'
image_label='/content/drive/MyDrive/last_labels.csv'
train_files=os.listdir(train_dir)
labels=pd.read_csv(image_label)

In [14]:
len(train_files)

1552

In [15]:
labels['class'].value_counts()

9     145
10    136
3     132
12    126
8     126
6     120
4     120
2     120
0     120
11    112
7     108
5      96
1      91
Name: class, dtype: int64

In [16]:
labels

Unnamed: 0,image,class
0,500,0
1,500_0,0
2,500_1,0
3,500_2,0
4,501,0
...,...,...
1547,79_0,12
1548,79_1,12
1549,79_2,12
1550,79_3,12


In [21]:
import multiprocessing
class DatasetRetriever(Dataset):
  def __init__(self, dir,image_list,labels, transform=None,mode='train'):
        self.dir = dir
        self.labels = labels
        self.image_list=image_list
        self.label=0
        self.mode=mode
        self.image_name=None
        self.full_path=None
        self.transform=transform
        self.image=None

  def __len__(self):
        return len(self.image_list)

  def __getitem__(self, idx: int):       
        self.image_name = self.image_list[idx]
        self.full_path = os.path.join(self.dir, self.image_name)
        if self.mode=='train':
            #if self.image_name.split('.')[0] in self.labels['image'].unique():
                self.image = cv2.imread(self.full_path)
                self.image = cv2.cvtColor(self.image, cv2.COLOR_BGR2RGB)
                self.label= int(self.labels[self.labels['image']==self.image_name.split('.')[0]]['class'].values[0])
        else:
            self.image = cv2.imread(self.full_path)
            self.image = cv2.cvtColor(self.image, cv2.COLOR_BGR2RGB)
        if self.transform:
            augmented = self.transform(image=self.image)
            self.image = augmented['image']
        if self.mode == 'train':
            return self.image, self.label
        else:
            return self.image, self.image_name.split('.')[0]



In [22]:
batch_size = 1
num_workers = multiprocessing.cpu_count()
img_size=96

In [23]:
data_transforms = A.Compose([
    A.Resize(96, 96),                         
    A.augmentations.transforms.HorizontalFlip(p=0.5),
    A.augmentations.transforms.Transpose(p=0.5),
    A.augmentations.transforms.ShiftScaleRotate(p=0.5),
    A.augmentations.transforms.VerticalFlip(p=0.5),
    A.augmentations.transforms.HueSaturationValue(hue_shift_limit=0.1, sat_shift_limit=0.1, val_shift_limit=0.1, p=0.5),
    A.augmentations.transforms.RandomBrightnessContrast(brightness_limit=(-0.1,0.1), contrast_limit=(-0.1, 0.1), p=0.5),
    #A.Crop(x_min=int(3*img_size/10),x_max=int(7*img_size/10),y_min=int(*img_size/10),y_max=int(6*img_size/10)),
    #A.Normalize((2.7176417e-05, -0.00011741407, -0.0004583559), (0.0038161594, 0.003801249, 0.0035198121))
    A.Normalize(),
    AT.ToTensor()
    ])

data_transforms_test = A.Compose([
    A.Resize(96, 96),
     A.Normalize(),
    AT.ToTensor()
    ])

In [24]:
trainset = DatasetRetriever(train_dir, train_files,labels, transform = data_transforms)
#testset = DatasetRetriever(test_dir, test_files,image_label,transform=data_transforms_test, mode = "test")

In [25]:
len(trainset)

1552

In [26]:
valid_size = int(len(train_files) * 0.2)
trainset, validset = random_split(trainset, 
                                  (len(train_files)-valid_size, valid_size))


In [33]:

trainloader = torch.utils.data.DataLoader(trainset, pin_memory=True, 
                                        batch_size=batch_size)
validloader = DataLoader(validset, batch_size=batch_size, pin_memory=True)

'''
testloader = torch.utils.data.DataLoader(testset, batch_size = batch_size,
                                         num_workers = num_workers)'''

'\ntestloader = torch.utils.data.DataLoader(testset, batch_size = batch_size,\n                                         num_workers = num_workers)'

In [17]:
samples,files = next(iter(validloader))

In [19]:
files

tensor([12,  7,  8,  4, 11,  1, 11,  1,  8, 10,  3,  9, 10,  2,  3,  3,  4, 10,
         0,  4])

In [None]:
'''mean0=[]
mean1=[]
mean2=[]
std1=[]
std2=[]
std3=[]

for i in range(449):
  mean0.append(samples[i,0,:,:].mean())
  mean1.append(samples[i,1,:,:].mean())
  mean2.append(samples[i,2,:,:].mean())
  std1.append(samples[i,0,:,:].std())
  std2.append(samples[i,1,:,:].std())
  std3.append(samples[i,2,:,:].std())

print((np.mean(mean0),np.mean(mean1),np.mean(mean2)),(np.mean(std1),np.mean(std2),np.mean(std3)))'''

'mean0=[]\nmean1=[]\nmean2=[]\nstd1=[]\nstd2=[]\nstd3=[]\n\nfor i in range(449):\n  mean0.append(samples[i,0,:,:].mean())\n  mean1.append(samples[i,1,:,:].mean())\n  mean2.append(samples[i,2,:,:].mean())\n  std1.append(samples[i,0,:,:].std())\n  std2.append(samples[i,1,:,:].std())\n  std3.append(samples[i,2,:,:].std())\n\nprint((np.mean(mean0),np.mean(mean1),np.mean(mean2)),(np.mean(std1),np.mean(std2),np.mean(std3)))'

In [18]:
model = torchvision.models.mobilenet_v2(pretrained=True,progress=True)

In [19]:
in_features = model.classifier[1].in_features
model.classifier[1]=nn.Linear(in_features, 13)

In [30]:
model

MobileNetV2(
  (features): Sequential(
    (0): ConvBNActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): ConvBNActivation(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): ConvBNActivation(
          (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(96, eps=1e-05, momen

In [31]:
def train_model(model_conv, train_loader, valid_loader, criterion, optimizer, sheduler, n_epochs):
    model_conv.to(device)
    valid_loss_min = np.Inf
    patience = 20
    # сколько эпох ждем до отключения
    p = 0
    # иначе останавливаем обучение
    stop = False

    # количество эпох
    for epoch in range(1, n_epochs+1):
          print(time.ctime(), 'Epoch:', epoch)
          b_i=0
          train_loss = []

          for batch_i, (data, target) in enumerate(train_loader):
              b_i+=1
              data, target = data.to(device), target.to(device)
              optimizer.zero_grad()
              output = model_conv(data)
              loss = criterion(output, target)
              train_loss.append(loss.item())
              loss.backward()
              optimizer.step()
              if b_i % 10 == 0: 
                  print(b_i)
      # запускаем валидацию
          model_conv.eval()
          val_loss = []
          for batch_i, (data, target) in enumerate(valid_loader):
              data, target = data.to(device), target.to(device)
              output = model_conv(data)
              loss = criterion(output, target)
              val_loss.append(loss.item()) 

          print(f"Epoch {epoch}, train loss: {np.mean(train_loss):.4f}, valid loss: {np.mean(val_loss):.4f}, lr: {optimizer.param_groups[0]['lr']}")

          valid_loss = np.mean(val_loss)
          sheduler.step(valid_loss)
          if valid_loss <= valid_loss_min:
              print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(
              valid_loss_min,
              valid_loss))
              torch.save(model_conv.state_dict(), '/content/drive/MyDrive/chess_weights/model_1.pt')
              
              valid_loss_min = valid_loss
              p = 0

          # проверяем как дела на валидации
          if valid_loss > valid_loss_min:
              p += 1
              
              print(f'{p} epochs of increasing val loss')
              if p > patience:
                  print('Stopping training')
                  stop = True
                  break        

          if stop:
              break

        
    return model_conv, train_loss, val_loss

In [33]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

In [34]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),lr=0.001)#,momentum=0.9,nesterov=True,lr=0.1)
#optimizer = torch.optim.SGD(model.parameters(),lr=0.00025,momentum=0.9,nesterov=True)
#scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.2)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3, verbose=True)

In [35]:
model_mob, train_loss, val_loss = train_model(model, trainloader, validloader, criterion, 
                              optimizer, scheduler, n_epochs=400)

Sun Mar 21 10:31:58 2021 Epoch: 1
10
20
30
40
50
60
Epoch 1, train loss: 1.1346, valid loss: 0.7476, lr: 0.001
Validation loss decreased (inf --> 0.747617).  Saving model ...
Sun Mar 21 10:41:31 2021 Epoch: 2
10
20
30
40
50
60
Epoch 2, train loss: 2.6539, valid loss: 2.6109, lr: 0.001
1 epochs of increasing val loss
Sun Mar 21 10:41:38 2021 Epoch: 3
10
20
30
40
50
60
Epoch 3, train loss: 2.5740, valid loss: 2.5926, lr: 0.001
2 epochs of increasing val loss
Sun Mar 21 10:41:44 2021 Epoch: 4
10
20
30
40
50
60
Epoch 4, train loss: 2.5682, valid loss: 2.5870, lr: 0.001
3 epochs of increasing val loss
Sun Mar 21 10:41:51 2021 Epoch: 5
10
20
30
40
50
60
Epoch 5, train loss: 2.5656, valid loss: 2.5858, lr: 0.001
Epoch     5: reducing learning rate of group 0 to 5.0000e-04.
4 epochs of increasing val loss
Sun Mar 21 10:41:58 2021 Epoch: 6
10
20
30
40
50
60
Epoch 6, train loss: 2.5596, valid loss: 2.5806, lr: 0.0005
5 epochs of increasing val loss
Sun Mar 21 10:42:04 2021 Epoch: 7
10
20
30
40
5

In [27]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device 

device(type='cuda', index=0)

In [28]:

# class which uses DenseNet169 pretrained model
# + added custom classifier in the last layer
class DenseNet169(nn.Module):
    def __init__(self, output_features, num_units=512, drop=0.3,
                 num_units1=256, drop1=0.1):
        super().__init__()
        model = torchvision.models.densenet169(pretrained=False)
        n_inputs = model.classifier.in_features
        model.classifier = nn.Sequential( nn.Linear(n_inputs, output_features))
        self.model = model
        
    def forward(self, x):
        return self.model(x)

# class which uses VGG16 pretrained model
class GoogleNet(nn.Module):
    def __init__(self, output_features, num_units=512, drop=0.4,
                 num_units1=256, drop1=0.1):
        super().__init__()
        model = torchvision.models.googlenet(pretrained=False,aux_logits=False)
        
        n_inputs = model.fc.in_features
        model.fc = nn.Sequential( nn.Linear(n_inputs, output_features))
        self.model = model
        
    def forward(self, x):
        return self.model(x)
    
class MobileNet(nn.Module):
    def __init__(self, output_features, num_units=512, drop=0.5,
                 num_units1=256, drop1=0.2):
        super().__init__()
        model =  torchvision.models.mobilenet_v2(pretrained=False)
        n_inputs = model.classifier[1].in_features
        model.classifier = nn.Sequential(
                                nn.Dropout(p=drop1), 
                                nn.Linear(n_inputs, output_features))
        self.model = model
        
    def forward(self, x):
        return self.model(x)

In [29]:
lr_scheduler_densenet = LRScheduler(policy='StepLR',
                                  step_size=8,gamma=0.15)
# callback for saving the best on validation accuracy model
checkpoint_densenet = Checkpoint(f_params='/content/drive/My Drive/Wheat_Growth_Stage/models/best_model4_densenet169.pkl',
                                 monitor='valid_acc_best')
# callback for freezing all layer of the model except the last layer
#freezer_densenet = Freezer(lambda x: not x.startswith('model.classifier'))
# callback for early stopping
early_stopping_densenet = EarlyStopping(patience=10)

# VGG16
# callback for Reduce on Plateau scheduler 
lr_scheduler_googlenet = LRScheduler(policy='StepLR',
                                  step_size=8,gamma=0.15)
# callback for saving the best on validation accuracy model
checkpoint_googlenet = Checkpoint(f_params='/content/drive/MyDrive/chess_weights/best_model4_googlenet.pkl',
                            monitor='valid_acc_best')
# callback for freezing all layer of the model except the last layer
#freezer_vgg = Freezer(lambda x: not x.startswith('model.classifier'))
# callback for early stopping
early_stopping_googlenet = EarlyStopping(patience=10)

lr_scheduler_mobilenet = LRScheduler(policy='StepLR',
                                  step_size=8,gamma=0.2)
# callback for saving the best on validation accuracy model
checkpoint_mobilenet = Checkpoint(f_params='/content/drive/MyDrive/chess_weights/best_model_mobilenet.pkl',
                            monitor='valid_acc_best')
# callback for freezing all layer of the model except the last layer
#freezer_vgg = Freezer(lambda x: not x.startswith('model.classifier'))
# callback for early stopping
early_stopping_mobilenet = EarlyStopping(patience=10)

In [30]:
mobilenet = NeuralNetClassifier(
    # pretrained ResNet50 + custom classifier 
    module=MobileNet,          
    # fine tuning model's inner parameters
    module__output_features=13,
    module__num_units=512,
    module__drop=0.5,
    module__num_units1=512,
    module__drop1=0.5,
    # criterion
    criterion=nn.CrossEntropyLoss,
    # batch_size = 128
    batch_size=batch_size,
    # number of epochs to train
    max_epochs=100,
    # optimizer Adam used
    optimizer=torch.optim.Adam,
    optimizer__lr = 0.0025,
    optimizer__weight_decay=1e-6,
    # shuffle dataset while loading
    iterator_train__shuffle=True,
    # load in parallel
    iterator_train__num_workers=num_workers,
    # stratified kfold split of loaded dataset
    train_split=CVSplit(cv=5, stratified=True, random_state=42),
    # callbacks declared earlier
    callbacks=[lr_scheduler_mobilenet, checkpoint_mobilenet, 
                early_stopping_mobilenet],
    # use GPU or CPU
    device="cuda:0" if torch.cuda.is_available() else "cpu"
)



In [None]:
# NeuralNetClassifier for based on VGG16 with custom parameters
googlenet = NeuralNetClassifier(
    # pretrained VGG16
    module=GoogleNet,
    # fine tuning model's inner parameters
    module__output_features=7, 
    # criterion
    criterion=nn.CrossEntropyLoss,
    # batch_size = 128
    batch_size=batch_size,
    # number of epochs to train
    max_epochs=100,
    # optimizer Adam used
    optimizer=torch.optim.Adam,
    optimizer__lr = 0.01,
    optimizer__weight_decay=1e-6,
    # shuffle dataset while loading
    iterator_train__shuffle=True,
    # load in parallel
    iterator_train__num_workers=num_workers, 
    # stratified kfold split of loaded dataset
    train_split=CVSplit(cv=10, stratified=True, random_state=42),
    # callbacks declared earlier
    callbacks=[lr_scheduler_googlenet, checkpoint_googlenet,
              early_stopping_googlenet],
    # use GPU or CPU
    device="cuda:0" if torch.cuda.is_available() else "cpu"
)



In [25]:
y_train = np.array([y for X, y in iter(trainset)])


In [None]:
mobilenet.fit(trainset, y=y_train)

In [31]:
mobilenet.initialize()

#mobilenet.initialize()
mobilenet.load_params(f_params='/content/drive/MyDrive/chess_weights/best_model4_mobilenet.pkl')
