# Rice Classification with CNN (using CUDA - this is the old version , the new version has a better approach and accuracy)

dataset contains 75000 images from 5 diffrent type of rices :

1. Arborio
2. Basmati
3. Ipsala
4. Jasmine
5. Karacadag

15000 images for each class of rice

dataset : https://www.kaggle.com/datasets/muratkokludataset/rice-image-dataset

### choosing device

In [1]:
import torch

device = (
    "cuda"
    if torch.cuda.is_available()
    else "cpu"
)

print(f"Using {device} device")

Using cuda device


### creating dataset class


In [2]:
from torch.utils.data import Dataset
import os
from torchvision.io import read_image
from math import floor
from torchvision.transforms import Grayscale,Resize


labelsDict = {
    0: "Arborio",
    1: "Basmati",
    2: "Ipsala",
    3: "Jasmine",
    4: "Karacadag"
}

class Custom_Dataset(Dataset):
    def __init__(self,labels ,img_dir,transform=None):
        self.labels = labels
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, idx):
        # label = labelsDict[floor(idx/15000)]
        label = self.labels[idx]
        label = label.to(device)
        img_name = labelsDict[int(label)]+" (" + str(idx -(15000*floor(idx/15000))+1) +").jpg"
        img_path = os.path.join(os.getcwd(),self.img_dir, img_name)
        image = read_image(img_path)
        # label = int(label.item())
        # image = Grayscale()(image) #remove 3 image channels
        image = Resize(size=(150,150),antialias=True)(image)
        # image = image.squeeze()
        image = image.float()
        image = image.to(device)
        if(self.transform):
            image = self.transform(image)
        return image, label
    


### creating our dataset

In [3]:
labels = torch.zeros(75000)
labels[15000:30000] = 1
labels[30000:45000] = 2
labels[45000:60000] = 3
labels[60000:75000] = 4

dataset = Custom_Dataset(labels,"Rice_Image_Dataset")

### shuffeling and creating train / test dataset

In [4]:
from torch.utils.data import Subset
import numpy as np

N = len(dataset)

indices = np.arange(N)
indices = np.random.permutation(indices)

slice = int(0.8*N)

train_indices = indices[:slice]
test_indices = indices[slice:]

train_dataset = Subset(dataset,train_indices)
test_dataset = Subset(dataset,test_indices)


### Loading Data

In [5]:
from torch.utils.data import DataLoader

train_dataloader = DataLoader(train_dataset,batch_size=64,shuffle=True)
test_dataloader = DataLoader(test_dataset,batch_size=64,shuffle=True)


### creating model

In [40]:
from torch import nn

class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.seq = nn.Sequential(
            # shape is 150 x 150 , 3 channels
            nn.Conv2d(in_channels=3,out_channels=10,kernel_size=(11,11)), # 11-1=10 will be reduced from pixels shape is 140x140
            nn.ReLU(),
            nn.Dropout(0.2),

            nn.Conv2d(in_channels=10,out_channels=10,kernel_size=(11,11)), # 11-1=10 will be reduced from pixels shape is 130x130
            nn.ReLU(),
            nn.Dropout(0.2),

            nn.Conv2d(in_channels=10,out_channels=10,kernel_size=(11,11)), # 11-1=10 will be reduced from pixels shape is 120x120
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(12,12)), # shape /2 then shape is 10x10

            nn.Flatten(), # shape was 10(channels) 20 20 now its 10*10*10
            nn.Linear(1000,512),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(512,512),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(512,5)
        ) 
        
    def forward(self,X):
        logits = self.seq(X)
        return logits

### learning

In [41]:
model = NeuralNetwork().to(device)

learning_rate = 0.001
epoches = 5
optimizer = torch.optim.SGD(model.parameters(),lr=learning_rate)
loss_fn = nn.CrossEntropyLoss()

def train_loop(train_dataloader,model,loss_fn,optim):
    model.train()
    for batch, (X,y) in enumerate(train_dataloader):
        y=y.to(torch.int64)
        pred = model(X)
        loss = loss_fn(pred,y)

        loss.backward()
        optim.step()
        optim.zero_grad()

        if (batch % 100 == 0):
            loss = loss.item()
            current = (batch+1)*len(X)
            total_size = len(train_dataloader.dataset)
            print("loss : {} / [{}/{}]".format(loss,current,total_size))

def test_loop(test_dataloader,model,loss_fn):
    model.eval()
    test_loss,correct = 0,0

    with torch.no_grad():
        for X,y in test_dataloader:
            y=y.to(torch.int64)
            pred = model(X)
            test_loss += loss_fn(pred,y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    
    size = len(test_dataloader.dataset)
    num_batches = len(test_dataloader)
    correct /= size # acuuracy
    test_loss /= num_batches # avg loss in each batch

    print("Test ERR >> Acc : {} / Avg Loss : {}".format(correct,test_loss))
    return correct

finalAcc = -100

for i in range(epoches):
    print("EPOCH {} -----------------------------".format(i+1))
    train_loop(train_dataloader,model,loss_fn,optimizer)
    acc = test_loop(test_dataloader,model,loss_fn)
    finalAcc = acc

print("Done, Accuracy is : {}".format(finalAcc))


EPOCH 1 -----------------------------
loss : 2.6992545127868652 / [64/60000]
loss : 0.6482333540916443 / [6464/60000]
loss : 0.34981662034988403 / [12864/60000]
loss : 0.18721738457679749 / [19264/60000]
loss : 0.1719854772090912 / [25664/60000]
loss : 0.2527434229850769 / [32064/60000]
loss : 0.2708818316459656 / [38464/60000]
loss : 0.06755897402763367 / [44864/60000]
loss : 0.09046316146850586 / [51264/60000]
loss : 0.12092944234609604 / [57664/60000]
Test ERR >> Acc : 0.9422666666666667 / Avg Loss : 0.1616611862119208
EPOCH 2 -----------------------------
loss : 0.16581037640571594 / [64/60000]
loss : 0.18272382020950317 / [6464/60000]
loss : 0.06095385178923607 / [12864/60000]
loss : 0.038240835070610046 / [19264/60000]
loss : 0.0797572210431099 / [25664/60000]
loss : 0.20200024545192719 / [32064/60000]
loss : 0.09259215742349625 / [38464/60000]
loss : 0.14818055927753448 / [44864/60000]
loss : 0.07410043478012085 / [51264/60000]
loss : 0.16554561257362366 / [57664/60000]
Test ERR

after around **20m**(using cuda) resulted with accuracy **98.7%**<br>