In [50]:
import torch
import torch.nn as nn
from torchvision.transforms import ToTensor, ToPILImage

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from PIL import Image
from pathlib import Path
import plotly.express as px
import math
from tqdm import tqdm

In [10]:
PATH = Path("E:/user/Notebooks/data/plant-pathology-2020-fgvc7/images/")

In [11]:
img = Image.open(PATH/"Train_1815.jpg")
px.imshow(img);

In [12]:
#train_df_names = pd.read_csv("train.csv")["image_id"].values
#test_df_names = pd.read_csv("test.csv")["image_id"].values
#len(train_df_names), len(test_df_names)

In [13]:
#train_imgs = [np.array(Image.open(PATH/(i+".jpg"))) for i in train_df_names[:1]]
#len(train_imgs) ,train_imgs[0].shape

In [51]:
def transform_image(pth): #Hocanın yazdığı
    img = Image.open(pth)
    return ((ToTensor()(img.resize((256,256)))).type(torch.float32))

In [52]:
class Dataset:
    def __init__(self, df, path):
        self.df = df
        self.path = path
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idxs):
        if isinstance(idxs, int):
            imgs = transform_image(self.path/(self.df["image_id"][idxs]+".jpg"))
            labels = torch.tensor(self.df.iloc[:,1:].iloc[idxs].values, dtype=torch.float32)
            ids = self.df.iloc[:,0].iloc[idxs]
            
            return imgs.cuda(), labels.cuda()
        else:
            sub_df = self.df.iloc[idxs]
            imgs = []
            labels = []
            ids = []
            for i in range(len(idxs)):
                img = transform_image(self.path/(sub_df["image_id"].iloc[i]+".jpg"))
                image_id = sub_df["image_id"].iloc[i]
                ids.append(image_id)
                imgs.append(img)
                label = torch.tensor(sub_df.iloc[:,1:].iloc[i].values, dtype=torch.float32)
                labels.append(label)
                
            return torch.stack(imgs,dim=0).cuda(), torch.stack(labels,dim=0).cuda()

In [53]:
class DataLoader():
    def __init__(self, ds, bs): 
        self.ds, self.bs = ds, bs
    def __iter__(self):
        n = len(self.ds)
        l = torch.randperm(n)

        for i in range(0, n, self.bs): 
            idxs_l = l[i:i+self.bs]
            yield self.ds[idxs_l]

In [54]:
train_last = pd.read_csv("E:/user/Notebooks/data/plant-pathology-2020-fgvc7/train_last_df.csv")
train_aug = pd.read_csv("E:/user/Notebooks/data/plant-pathology-2020-fgvc7/train_aug_df.csv")

In [55]:
print('healthy',len(train_last[train_last['healthy']==1]))
print('multiple_diseases',len(train_last[train_last['multiple_diseases']==1]))
print('rust',len(train_last[train_last['rust']==1]))
print('scab',len(train_last[train_last['scab']==1]))

healthy 413
multiple_diseases 528
rust 498
scab 474


In [56]:
train_df=pd.DataFrame(columns=['image_id','healthy','multiple_diseases','rust','scab'])

In [57]:
frames = [train_last, train_aug]
train_df = pd.concat(frames)

In [58]:
train_df

Unnamed: 0,image_id,healthy,multiple_diseases,rust,scab
0,Train_886_3,0,1,0,0
1,Train_1794,1,0,0,0
2,Train_592,1,0,0,0
3,Train_980,1,0,0,0
4,Train_631,0,0,0,1
...,...,...,...,...,...
5734,Train_1129_1_0,0,1,0,0
5735,Train_1554_1_2,0,1,0,0
5736,Train_812_1,1,0,0,0
5737,Train_1113_1,0,0,1,0


In [59]:
valid_df = pd.read_csv("E:/user/Notebooks/data/plant-pathology-2020-fgvc7/valid_last_df.csv")
train_df.shape, valid_df.shape

((7652, 5), (363, 5))

In [60]:
TRAIN_PATH = Path('E:/user/Notebooks/data/plant-pathology-2020-fgvc7/train_karisik')
VALID_PATH = Path('E:/user/Notebooks/data/plant-pathology-2020-fgvc7/valid_last')

In [61]:
train_ds = Dataset(train_df,TRAIN_PATH)
train_dl = DataLoader(train_ds, 128)

valid_ds = Dataset(valid_df, VALID_PATH)
valid_dl = DataLoader(valid_ds, 128)

In [62]:
x1,y1= next(iter(valid_dl))
x1.shape, y1.shape

(torch.Size([128, 3, 256, 256]), torch.Size([128, 4]))

In [63]:
def accuracy(predictions, target):
    predictions = predictions.view(-1,4)
    target = target.view(-1,4)
    preds = [torch.argmax(i).item() for i in predictions]
    targs = [torch.argmax(i).item() for i in target]
    return np.mean([preds[i]==targs[i] for i in range(len(preds))])

In [64]:
def validation_acc(model, valid_dl):
    return torch.tensor([accuracy(model(xb), yb) for xb, yb in valid_dl]).mean().item()

In [65]:
x1,y1= next(iter(valid_dl))
x1.shape, y1.shape

(torch.Size([128, 3, 256, 256]), torch.Size([128, 4]))

In [66]:
model = nn.Sequential(
        nn.Conv2d(3,8,5,padding=2,stride=2),nn.ReLU(), nn.BatchNorm2d(8), nn.Dropout2d(0.05),
        nn.Conv2d(8,16,5,padding=1,stride=2), nn.ReLU(), nn.BatchNorm2d(16), nn.Dropout2d(0.1),
        nn.Conv2d(16,32,5,padding=1,stride=2), nn.ReLU(), nn.BatchNorm2d(32), nn.Dropout2d(0.2),
        nn.Flatten(),
        nn.Linear(32*31*31,64), nn.ReLU(), nn.Dropout(0.4),
        nn.Linear(64,4)
).cuda()

In [67]:
validation_acc(model, valid_dl)

0.2643107476635514

Experiment parameters
lr_find_epochs = 2
start_lr = 1e-7
end_lr = 0.1
lr_lambda = lambda x: math.exp(x * math.log(end_lr / start_lr) / (lr_find_epochs* 128))
#scheduler = torch.optim.lr_scheduler.LambdaLR(optim, lr_lambda)

In [68]:
optim = torch.optim.Adam(model.parameters(), lr=0.002)

#from torch_lr_finder import LRFinder
lr_finder = LRFinder(model, optim, criterion, device="cuda")
lr_finder.range_test(train_dl, end_lr=100, num_iter=100)
lr_finder.plot() # to inspect the loss-learning rate graph
lr_finder.reset() # to reset the model and optimizer to their initial state

In [69]:
criterion = nn.BCEWithLogitsLoss() # Ne işe yarıyor?

In [70]:
def train_model(model, loss_criterion, optimizer, data_loader, valid_data_loader, epochs, valid_epoch):
    model = model.train()
    
    for epoch in range(1,epochs+1):
        model = model.train()
        general_loss = 0
        
        for xb, yb in data_loader: #xb image, yb label döndürüyor. İkisi de tensor
            optimizer.zero_grad()
            
            predictions = model(xb)
#             y = predictions.detach().cpu().numpy()
#             np.savetxt("output.csv",y)
            
            loss = loss_criterion(predictions, yb)
            loss.backward()
            
            optimizer.step()
            
            general_loss += loss.item()

        if epoch % valid_epoch == 0:
            model = model.eval()
            with torch.no_grad():
                print("Epoch: [%d] | Loss: %.3f | Train Accuracy: %.2f | Validation Accuracy: %.2f"  % (epoch, general_loss, accuracy(predictions,yb), validation_acc(model,valid_dl))) # accuracy fonksiyonu yaz.

In [71]:
train_model(model, criterion, optim, train_dl, valid_dl, 20, 1)

Epoch: [1] | Loss: 37.610 | Train Accuracy: 0.33 | Validation Accuracy: 0.33
Epoch: [2] | Loss: 33.994 | Train Accuracy: 0.39 | Validation Accuracy: 0.36
Epoch: [3] | Loss: 32.086 | Train Accuracy: 0.48 | Validation Accuracy: 0.34
Epoch: [4] | Loss: 29.672 | Train Accuracy: 0.41 | Validation Accuracy: 0.33
Epoch: [5] | Loss: 25.771 | Train Accuracy: 0.62 | Validation Accuracy: 0.35
Epoch: [6] | Loss: 21.484 | Train Accuracy: 0.62 | Validation Accuracy: 0.42
Epoch: [7] | Loss: 18.626 | Train Accuracy: 0.65 | Validation Accuracy: 0.42
Epoch: [8] | Loss: 15.501 | Train Accuracy: 0.72 | Validation Accuracy: 0.48
Epoch: [9] | Loss: 13.758 | Train Accuracy: 0.80 | Validation Accuracy: 0.38
Epoch: [10] | Loss: 12.011 | Train Accuracy: 0.85 | Validation Accuracy: 0.53
Epoch: [11] | Loss: 11.221 | Train Accuracy: 0.86 | Validation Accuracy: 0.51
Epoch: [12] | Loss: 9.416 | Train Accuracy: 0.91 | Validation Accuracy: 0.52
Epoch: [13] | Loss: 8.854 | Train Accuracy: 0.87 | Validation Accuracy: 0.

In [72]:
!pip install h5py



In [76]:
import h5py
torch.save(model.state_dict(),'E:/user/Notebooks/data/plant-pathology-2020-fgvc7/base_model.h5')
#model.save("E:/user/Notebooks/data/plant-pathology-2020-fgvc7/base_model.h5")