In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import os
import time
from copy import deepcopy

from PIL import Image
import matplotlib.pyplot as plt

from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
from sklearn.model_selection import train_test_split

import torch
from torchvision.models import resnet50

from sklearn.metrics import accuracy_score

In [None]:
def create_dataset(path, train=True):
    what = 'train'
    if not train:
        what = 'valid'
    path = os.path.join(path, what)
    not_healthy = os.path.join(path, "Tomato___Late_blight")
    unhealthy = [x[2] for x in os.walk(not_healthy)]
    unhealthy = unhealthy[0]
    imgs = [os.path.join(not_healthy, i) for i in unhealthy]
    labels = ['Not Healthy']*len(unhealthy)
    healthy_path = os.path.join(path, "Tomato___healthy")
    healthy = [x[2] for x in os.walk(healthy_path)]
    imgs += [os.path.join(healthy_path, i) for i in healthy[0]]
    labels += ['Healthy']*len(healthy[0])
    dd = {'images': imgs, 'label': labels}
    return pd.DataFrame(dd)

In [None]:
path = "/kaggle/input/lateblighttomatodisease/Tomato_Dataset"
train = create_dataset(path)
validate = create_dataset(path, False)

In [None]:
label_index = {'Not Healthy': 0,
              'Healthy': 1}
index_label = {0: 'Not Healthy',
              1:'Healthy'}

train['label'] = train['label'].map(label_index)
validate['label'] = validate['label'].map(label_index)

In [None]:
frq = train['label'].value_counts()
plt.pie(frq, labels=frq.index, autopct='%0.2f%%')
plt.show()

<h1 style='font-size:52px;text-align:center;'>Important features and parameters for data preprocessing and model training 🏋️‍♂️<h1>

In [None]:
EPOCHS = 20
BATCH = 16
STEP = 5
GAMMA = 0.1
LR = 0.1
SIZE = 224
NUM_CLASSES = 2

In [None]:
transform = transforms.Compose([transforms.ToPILImage(),
                               transforms.ToTensor(),
                               transforms.Resize((SIZE, SIZE)),
                               transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))])

<h1 style='font-size:52px;text-align:center;'>Dataset chain ⛓<h1>

In [None]:
class ImgDataset(Dataset):
    def __init__(self, data, transform):
        super(ImgDataset, self).__init__()
        self.data = data.values
        self.transform = transform
        
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, x):
        img, label = self.data[x, :]
        img = np.asarray(Image.open(img))
        
        if self.transform is not None:
            img = self.transform(img)
            
        return img, label

In [None]:
val, test = train_test_split(validate, random_state=42, test_size=0.1)

In [None]:
train_ds = ImgDataset(train, transform)
val_ds = ImgDataset(val, transform)

In [None]:
train_dl = DataLoader(train_ds, batch_size=BATCH, shuffle=True)
val_dl = DataLoader(val_ds, batch_size=BATCH, shuffle=False)

<h1 style='font-size:52px;text-align:center;'>Defining model, optimizer 🚀, criterion✅ and scheduler⏲<h1>
    
<div style='text-align:center;font-size:26px;'>
    <p style='display:inline-block;'>ResNet50 model architecture:</p>
    <img src='https://miro.medium.com/v2/resize:fit:1400/1*rPktw9-nz-dy9CFcddMBdQ.jpeg'>
</div>

In [None]:
def init_model(n_classes):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = resnet50(weights="IMAGENET1K_V1")
    num_ftrs = model.fc.in_features
    model.fc = torch.nn.Linear(num_ftrs, n_classes)
    model = model.to(device)
    return model

In [None]:
model = init_model(NUM_CLASSES)
optimizer = torch.optim.Adam(model.parameters(), lr=LR)
criterion = torch.nn.CrossEntropyLoss()
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=STEP, gamma=GAMMA)

<h1 style='font-size:52px;text-align:center;'>🏋️‍♀️ Model training 🏋️‍♂️<h1>

In [None]:
best_model = deepcopy(model)
best_acc = 0

train_history = []
val_history = []
acc_history = []
t_start = time.time()
for i in range(1, EPOCHS+1):
    start = time.time()
    model.train()
    train_loss = 0
    train_total = 0
    for idx, (image, label) in enumerate(train_dl):
        optimizer.zero_grad()
        
        if torch.cuda.is_available():
            image, label = image.cuda(), label.cuda()
        
        out = model(image)
        
        loss = criterion(out, label)
        train_loss += loss.item()
        train_total += out.size(0)
        loss.backward()
        optimizer.step()
        
    loss_train = train_loss/train_total
    val_loss = 0
    val_total = 0
    val_acc = 0
    model.eval()
    with torch.no_grad():
        for idx, (image, label) in enumerate(val_dl):
            if torch.cuda.is_available():
                image, label = image.cuda(), label.cuda()
            out = model(image)
            loss = criterion(out, label)
            val_loss += loss.item()
            val_total += out.size(0)
            val_acc += (out.argmax(1) == label).sum().item()
            
    loss_val = val_loss/val_total
    loss_acc = val_acc/val_total
    
    if loss_acc > best_acc:
        best_acc = loss_acc
        best_model = deepcopy(model)
        
    end = time.time()
    scheduler.step()
    
    train_history += [loss_train]
    val_history += [loss_val]
    acc_history += [loss_acc]
        
    print("Epoch {} || train loss: {} || val loss: {} || acc: {} || time: {}".format(i,
                                                                                    loss_train,
                                                                                    loss_val,
                                                                                    loss_acc,
                                                                                    end-start))
    

t_end = time.time()    
print("Total time required {}".format(t_end-t_start))

<h1 style='font-size:52px;text-align:center;'>📉 Performance evaluation 📈<h1>

In [None]:
epochs = list(range(1, EPOCHS+1))
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(15, 5))
axes[0].plot(epochs, train_history)
axes[0].plot(epochs, val_history)
axes[0].legend(["Train Loss", "Validation Loss"])
axes[1].plot(epochs, acc_history)
axes[0].set_title("Train and Validation loss")
axes[1].set_title("Accuracy")
plt.tight_layout()
plt.show()

<h1 style='font-size:52px;text-align:center;'>Prediction pipeline<h1>

In [None]:
def predict(path):
    img = np.asarray(Image.open(path))
    image = transform(img)
    image = image.unsqueeze(0) 
    best_model.eval()
    if torch.cuda.is_available():
        image = image.cuda()
    out = best_model(image)
    confidences = np.squeeze(out)
    prediction = out.argmax(1).cpu().detach().numpy()
    return prediction[0]

In [None]:
first = 15
fig, axes = plt.subplots(nrows = 5, ncols = 3, figsize=(15, 10))
k = 0
for i in range(5):
    for j in range(3):
        prediction = index_label[predict(test.iloc[k, 0])]
        expected = index_label[test.iloc[k, 1]]
        title = "Predicted value {}\nExpected value: {}".format(prediction, expected)
        img = np.asarray(Image.open(test.iloc[k, 0]))
        axes[i][j].imshow(img)
        axes[i][j].set_title(title)
        k += 1
        
plt.tight_layout()
plt.show()

<h1 style='font-size:52px;text-align:center;'>Predict others, evaluating results<h1>

In [None]:
predicted = []
expect = []
for i in range(len(test)):
    predicted += [predict(test.iloc[i, 0])]
    expect += [test.iloc[i, 1]]
    
print(accuracy_score(predicted, expect))