In [95]:
import os
import torch
import pandas as pd
import torchvision.transforms as transforms

from torch import Tensor
from pathlib import Path
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torchmetrics import Accuracy

from PIL import Image


In [13]:
DATASET_FOLDER = Path("data/rice_leaf_diseases_dataset")
DATASET_FOLDER.is_dir()

True

In [14]:
df = {"image": [], "label": []}
for root, dir, file in os.walk(DATASET_FOLDER):
    if len(file) > 0:
        for f in file:
            p = Path(root) / f
            df["image"].append(str(p))
            df["label"].append(p.parent.name)

In [15]:
df = pd.DataFrame(df)
y = df.loc[:, ["label"]]
X = df.drop(["label"],axis=1)

print(X.shape, y.shape)

(4684, 1) (4684, 1)


In [16]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True, random_state=42)

print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

(3747, 1) (3747, 1) (937, 1) (937, 1)


In [17]:


class CMYKToRGB(object):
    def __call__(self, img: Image.Image) -> Image.Image:
        if img.mode == 'RGBA' or img.mode == "CMYK":
            img = img.convert('RGB')
        
        
        return img

    def __repr__(self):
        return self.__class__.__name__ + '()'


transform = transforms.Compose([
    CMYKToRGB(),
    transforms.PILToTensor()
])

In [18]:

class CustomImageDataset(Dataset):
    def __init__(self, X: pd.DataFrame, y: pd.DataFrame, transform=transform) -> None:
        self.X = X
        self.y = y
        self.transform = transform

    def __len__(self):
        return self.X.shape[0]

    def __getitem__(self, index: int) -> tuple[Tensor, str]:
        p = str(self.X.iloc[index, 0])
        label = str(self.y.iloc[index, 0])
        
        img: Tensor = self.transform(Image.open(p)) # type: ignore
        return img, label

In [101]:
from sklearn.preprocessing import LabelEncoder

class_names = ["cat", "dog", "fish", "dog", "cat", "fish"]
lbl = LabelEncoder()

class_indices = lbl.fit_transform(class_names)

type(class_indices)

torch.from_numpy(class_indices)


tensor([0, 1, 2, 1, 0, 2])

In [112]:
IN_CHANNELS = 3
IMAGE_WIDTH = 300
IMAGE_HEIGHT = 300
TRAIN_BATCH_SIZE = 32
TEST_BATCH_SIZE  = 32
TOTAL_TRAIN_BATCHES = y_train.shape[0] // TRAIN_BATCH_SIZE + \
    (1 if y_train.shape[0] % TRAIN_BATCH_SIZE != 0 else 0)
TOTAL_TEST_BATCHES = y_test.shape[0] // TEST_BATCH_SIZE + \
    (1 if y_test.shape[0] % TEST_BATCH_SIZE != 0 else 0)

print((TOTAL_TRAIN_BATCHES, TRAIN_BATCH_SIZE, IN_CHANNELS, IMAGE_WIDTH, IMAGE_HEIGHT))
print((TOTAL_TEST_BATCHES, TEST_BATCH_SIZE, IN_CHANNELS, IMAGE_WIDTH, IMAGE_HEIGHT))

(118, 32, 3, 300, 300)
(30, 32, 3, 300, 300)


In [113]:
train_dataset = CustomImageDataset(X_train, y_train)
test_dataset = CustomImageDataset(X_test, y_test)

train_dataloader = DataLoader(train_dataset, TRAIN_BATCH_SIZE, num_workers=2)
test_dataloader = DataLoader(
    test_dataset, TEST_BATCH_SIZE, shuffle=True, num_workers=2)

In [144]:
class model_cnn(nn.Module):
    def __init__(self):
        super().__init__()

        self.block1 = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=10, kernel_size=4, stride=2), # 300 * 300
            nn.ReLU(),
            nn.Conv2d(in_channels=10, out_channels=10, kernel_size=3, stride=1), # 149 * 149
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2), # 74 * 74
        )

        self.block2 = nn.Sequential(
            nn.Flatten(),
            nn.Linear(10*73*73, 10),
            nn.ReLU(),
            nn.Linear(10, 10),
            nn.ReLU(),
            nn.Linear(10, 3)
        )


    def forward(self, x):
        return self.block2(self.block1(x))


model_0 = model_cnn()
model_0

model_cnn(
  (block1): Sequential(
    (0): Conv2d(3, 10, kernel_size=(4, 4), stride=(2, 2))
    (1): ReLU()
    (2): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (block2): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=53290, out_features=10, bias=True)
    (2): ReLU()
    (3): Linear(in_features=10, out_features=10, bias=True)
    (4): ReLU()
    (5): Linear(in_features=10, out_features=3, bias=True)
  )
)

In [145]:
import torchinfo

torchinfo.summary(model_0,input_size=[1,IN_CHANNELS,IMAGE_WIDTH,IMAGE_HEIGHT])

Layer (type:depth-idx)                   Output Shape              Param #
model_cnn                                [1, 3]                    --
├─Sequential: 1-1                        [1, 10, 73, 73]           --
│    └─Conv2d: 2-1                       [1, 10, 149, 149]         490
│    └─ReLU: 2-2                         [1, 10, 149, 149]         --
│    └─Conv2d: 2-3                       [1, 10, 147, 147]         910
│    └─ReLU: 2-4                         [1, 10, 147, 147]         --
│    └─MaxPool2d: 2-5                    [1, 10, 73, 73]           --
├─Sequential: 1-2                        [1, 3]                    --
│    └─Flatten: 2-6                      [1, 53290]                --
│    └─Linear: 2-7                       [1, 10]                   532,910
│    └─ReLU: 2-8                         [1, 10]                   --
│    └─Linear: 2-9                       [1, 10]                   110
│    └─ReLU: 2-10                        [1, 10]                   --
│    └─

In [146]:
loss_fn = nn.CrossEntropyLoss()
accuracy = Accuracy(task="multiclass",num_classes=3)
optimizer = torch.optim.Adam(params=model_0.parameters(),lr=0.2)

In [147]:
from tqdm.auto import tqdm
from sklearn.preprocessing import LabelEncoder
torch.manual_seed(42)
epochs = 10
lbl = LabelEncoder()


for epoch in tqdm(range(1, epochs+1)):
    train_loss = 0
    train_acc = 0
    for batch, (X, y) in enumerate(train_dataloader):
        model_0.train()

        X = X.type(torch.float32)
        y_indices = torch.from_numpy(lbl.fit_transform(y))

        y_logits = model_0(X)
        y_preds = torch.softmax(y_logits, dim=1)

        l = loss_fn(y_logits, y_indices)
        train_loss += l.item()

        optimizer.zero_grad()

        l.backward()

        optimizer.step()

        train_acc += accuracy(y_preds, y_indices)

    train_loss = train_loss / len(train_dataloader)
    train_acc = train_acc / len(train_dataloader)

    test_loss = 0
    test_acc = 0
    for batch, (X, y) in enumerate(test_dataloader):
        model_0.eval()

        with torch.inference_mode():
            X = X.type(torch.float32)
            y_test_indices = torch.from_numpy(lbl.fit_transform(y))

            y_test_logits = model_0(X)
            y_test_preds = torch.softmax(y_test_logits, dim=1)

            l = loss_fn(y_test_logits, y_test_indices)
            test_loss += l.item()
            test_acc += accuracy(y_test_preds, y_test_indices)

    test_loss_1 = test_loss / len(test_dataloader)
    test_acc_1 = test_acc / len(test_dataloader)

    print(f"epoch: {epoch} | train_loss: {train_loss} | train_acc: {train_acc} | test_loss: {test_loss_1} | test_acc: {test_acc_1}")

  0%|          | 0/10 [00:00<?, ?it/s]

epoch: 1 | train_loss: 22068.753537854907 | train_acc: 0.35478460788726807 | test_loss: 1.1179152806599935 | test_acc: 0.32766202092170715
epoch: 2 | train_loss: 1.1058746626821614 | train_acc: 0.3539901077747345 | test_loss: 1.1181634823481241 | test_acc: 0.33981481194496155
epoch: 3 | train_loss: 1.1062543816485648 | train_acc: 0.3481638431549072 | test_loss: 1.1184303959210713 | test_acc: 0.33564814925193787
epoch: 4 | train_loss: 1.1064071028919544 | train_acc: 0.3481638431549072 | test_loss: 1.118838628133138 | test_acc: 0.33298608660697937
epoch: 5 | train_loss: 1.1064857183876684 | train_acc: 0.3481638431549072 | test_loss: 1.1186490416526795 | test_acc: 0.33298608660697937
epoch: 6 | train_loss: 1.1065325252080367 | train_acc: 0.3478989899158478 | test_loss: 1.1184845964113872 | test_acc: 0.33298608660697937
epoch: 7 | train_loss: 1.1065632690817624 | train_acc: 0.34604519605636597 | test_loss: 1.1187743663787841 | test_acc: 0.33298608660697937
epoch: 8 | train_loss: 1.10658480

In [None]:
import torchvision

torchvision.models.efficientnet_b0()