The dataloader was referenced this [article](https://www.kaggle.com/code/moulibhaskar/pytorch-rice-classification/notebook)

# Sumamry
* Pytorch
* model : efficientNet0([timm](https://www.kaggle.com/datasets/jinmingteo/timm-pytorch-image-models))
* optimzier : SGD
* scheduler : CosineAnnealingLR
* Augmentation : RandomCrop(height=128, width=128)

In [None]:
import sys
sys.path.append("../input/timm-pytorch-image-models")
import timm

In [None]:
import torch
from torch import nn
from torch.utils.data import DataLoader,Dataset
from torchvision import datasets, models
from torchvision.transforms import ToTensor
from tqdm import tqdm
import os
import pandas as pd
from torchvision.io import read_image
from PIL import Image
import cv2
import torchvision.transforms as transforms
import numpy as np
import timm

import albumentations as A
from albumentations.core.transforms_interface import ImageOnlyTransform
from matplotlib import pyplot as plt

from sklearn import preprocessing
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

In [None]:
CFG={
    'EPOCHS':15,
    'LR':1e-4,
    'BATCH':64
}

In [None]:
rice_names=['Arborio','Basmati','Ipsala','Jasmine','Karacadag']
image_files = [[os.path.join('../input/rice-image-dataset/Rice_Image_Dataset', rice_name, x) for x in os.listdir(os.path.join('../input/rice-image-dataset/Rice_Image_Dataset', rice_name))] for rice_name in rice_names]


images_paths = []
for i in range(5):
    for j in range(len(image_files[i])):
        current = image_files[i]
        images_paths.append(current[j])

In [None]:
df_labels = {
    'Arborio' : 0,
    'Basmati' : 1,
    'Ipsala' : 2,
    'Jasmine' : 3,
    'Karacadag': 4
}

# Visualization

* Image to which Amentation is applied
* outlier data(Arborio 11836)

In [None]:
def aug_random_imshow(idx,name,transform):
  plt.figure(figsize=(10,10))

  image=cv2.imread("../input/rice-image-dataset/Rice_Image_Dataset/{0}/{0} ({1}).jpg".format(name, idx))
  

  plt.subplot(1,2,1)
  plt.imshow(np.real(image))
  plt.title(f'{name} original')

  augmentations = transform(image=image)
  
  plt.subplot(1,2,2)
  plt.imshow(augmentations["image"])
  plt.title(f'{name} transform')

  plt.show()

In [None]:
height = 128
width = 128
transform = A.Compose([
    A.RandomCrop(height=128, width=128),
])

aug_random_imshow(10,'Arborio',transform)

In [None]:
aug_random_imshow(11836,'Arborio',transform)

In [None]:
class CustomDataset(Dataset):
  def __init__(self,img_path,df_labels,To_tensor,transforms=None):
    super().__init__()
    self.img_path=img_path
    self.label=df_labels
    self.To_tensor=To_tensor
    self.transforms=transforms

  def __len__(self):
    return len(self.img_path)

  def __getitem__(self,idx):
    path=self.img_path[idx]
    image=cv2.imread(path)
    image=np.asarray(image,dtype=np.uint8)
    
    image=self.transforms(image=image)
    image=Image.fromarray(image['image'].astype(np.uint8)) 
        
    label_name = path.split('/')[-2]
    label=self.label[label_name]
    
    image=self.To_tensor(image)
    label=torch.tensor(label)

    return image.clone().detach(),label

train, valid, test dataset split

In [None]:
train,test=train_test_split(images_paths, test_size=0.2, shuffle=True,random_state=34)
train,valid=train_test_split(train, test_size=0.2, shuffle=True, random_state=34)

Image transform to Crop image

In [None]:
To_tensor=transforms.Compose([
    transforms.ToTensor(), 
    transforms.Normalize(
        [0.485, 0.456, 0.406],
        [0.229, 0.224, 0.225]
    ),
])

transforms_train = A.Compose([
    A.RandomCrop(height=128, width=128),
])

transforms_valid = A.Compose([
])

train_dataset=CustomDataset(train,df_labels,To_tensor,transforms=transforms_train)

val_dataset=CustomDataset(valid,df_labels,To_tensor,transforms=transforms_valid)

In [None]:
train_dataloader=torch.utils.data.DataLoader(
    train_dataset,
    pin_memory=True,
    batch_size=CFG['BATCH'],
    num_workers=4,
    shuffle=True
)
valid_dataloader=torch.utils.data.DataLoader(
    val_dataset,
    pin_memory=True,
    batch_size=CFG['BATCH'],
    num_workers=4,
    shuffle=True
)


Import efficientNet from the timm library.

In [None]:

class BaseModel(nn.Module):
  def __init__(self,num_classes=5):
    super(BaseModel,self).__init__()
    self.model=timm.create_model('efficientnet_b0',num_classes=5)

  def forward(self,x):
    x=self.model(x)
    return x

* Optimizer : SGD (Adam also good)
* scheduler : CosineAnnealingLR

In [None]:
device= torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

model=BaseModel().to(device)
optimizer=torch.optim.SGD(model.parameters(), lr=CFG['LR'])
criterion=nn.CrossEntropyLoss().to(device)
scheduler=torch.optim.lr_scheduler.CosineAnnealingLR(optimizer=optimizer, T_max=100, eta_min=0.001)

# Train

In [None]:
valid_loss_list=[]
train_loss_list=[]
for epoch in range(1,CFG['EPOCHS']+1):
  model_preds=[]
  true_labels=[]

  model.train()
  for i,(data,label) in enumerate(tqdm(train_dataloader)):
    data,label=data.to(device),label.to(device)

    optimizer.zero_grad()
    output=model(data)
    loss=criterion(output,label)
    loss.backward()
    optimizer.step()


  model_preds+=output.argmax(1).detach().cpu().numpy().tolist()
  true_labels+=label.detach().cpu().numpy().tolist()
  print(f'{epoch}: "train loss:"{loss.item():.5f}, "train acc: "{accuracy_score(true_labels,model_preds)}')
        
  model_preds=[]
  true_labels=[]
  model.eval()
  with torch.no_grad():
    for i,(data,label) in enumerate(tqdm(valid_dataloader)):
      data,label=data.to(device),label.to(device)
        
      output=model(data)
      valid_loss=criterion(output,label)

    model_preds+=output.argmax(1).detach().cpu().numpy().tolist()
    true_labels+=label.detach().cpu().numpy().tolist()
    print(f'epoch {epoch}: "val loss:"{valid_loss.item():.5f}, "val acc: "{accuracy_score(true_labels,model_preds)}')
  
  if scheduler is not None:
    scheduler.step()
