<a href="https://colab.research.google.com/github/chaidosa/Flowers-Species/blob/main/Flower_classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Flower Classification
*To accomplish this task we've taken Flower dataset from Kaggel: https://www.kaggle.com/alxmamaev/flowers-recognition*

In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [16]:
import os

os.environ['KAGGLE_CONFIG_DIR'] = "/content/gdrive/MyDrive/data/kaggle"

In [14]:
""" Moving to the directory where the kaggle.json file is present (You need API to download dataset directly
from kaggle it's an easy process) """
%cd /content/gdrive/MyDrive/data/kaggle

/content/gdrive/MyDrive/data/kaggle


In [18]:
!kaggle datasets download -d alxmamaev/flowers-recognition

Downloading flowers-recognition.zip to /content/gdrive/MyDrive/data/kaggle
100% 225M/225M [00:11<00:00, 24.3MB/s]
100% 225M/225M [00:11<00:00, 21.3MB/s]


In [None]:
# Unzipping the data files and deleting the zip file
!unzip \*.zip && rm *.zip

In [22]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


import torch
import torch.nn as nn
import torchvision
from torch.utils.data import Dataset,DataLoader
import torchvision.transforms as transforms
import torch.nn.functional as F

In [23]:
mean = np.array([0.4914, 0.4822, 0.4465])
std  = np.array([0.2023, 0.1994, 0.2010])

data_transforms = {
    'train':
    transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.CenterCrop((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.ColorJitter(),
        transforms.ToTensor(),
        transforms.Normalize(mean, std)
    ]),
    'test':
    transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.CenterCrop((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean, std)
    ]),
}

In [24]:
# Define the hyperparameters
batch_size = 8
learning_rate = 1e-3
num_epochs = 50
num_classes = 5

# If GPU is available choose that, below code is for the same
device = None
if torch.cuda.is_available():
  device = torch.device('cuda')
else:
  device = torch.device('cpu')

print(device)


cuda


In [28]:
%pwd

'/content/gdrive/MyDrive/data/kaggle'

In [29]:
Path_data = '/content/gdrive/MyDrive/data/kaggle/flowers/'

In [33]:
# Total datset
data_set = torchvision.datasets.ImageFolder(Path_data, transform=data_transforms['train'])

#size of the total dataset
print(len(data_set))
# shape of the dataset
print(data_set[0][0].shape)
# What are the classes of the dataset
print(data_set.class_to_idx)

4317
torch.Size([3, 224, 224])
{'daisy': 0, 'dandelion': 1, 'rose': 2, 'sunflower': 3, 'tulip': 4}


In [37]:
# Spilt the data between train and the validation sets

Split_ratio = 0.8
train_size = int(Split_ratio * len(data_set))
val_size   = len(data_set) - train_size

print(f'Train size is: {train_size}, Validation size is {val_size}')

train_data, val_data = torch.utils.data.random_split(data_set, [train_size, val_size])
print(f"{len(train_data)}, {len(val_data)}")

Train size is: 3453, Validation size is 864
3453, 864


In [40]:
# dataloaders
train_load = DataLoader(dataset=train_data,
                        batch_size=batch_size,
                        shuffle=True,
                        num_workers=2)

val_load = DataLoader(dataset=val_data,
                      batch_size=1,
                      shuffle=True,
                      num_workers=2)

In [44]:
# Checking if the data loading is working

eg = iter(train_load)
sample, labels = next(eg)
print(sample.shape, labels.shape)
print(f"{len(train_load)}, {len(val_load)}")


torch.Size([8, 3, 224, 224]) torch.Size([8])
432, 864


In [49]:
# Custom CNN models class
class ConvNet(nn.Module):
  def __init__(self, model, num_classes):
    super(ConvNet, self).__init__()
    self.base_model = nn.Sequential(*list(model.children())[:-1]) #excluding the last FC layer
    self.linear1 = nn.Linear(in_features=2048, out_features=512)
    self.relu = nn.ReLU()
    self.linear2 = nn.Linear(in_features=512, out_features=num_classes)

  def forward(self, x):
    x = self.base_model(x)
    x = torch.flatten(x, 1)
    lin = self.linear1(x)
    x = self.relu(lin)
    out = self.linear2(x)
    return lin, out

In [50]:
model = torchvision.models.resnet50(pretrained=True) # This our base model

model = ConvNet(model, num_classes)

model = model.to(device)


objective_function = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9)



In [51]:
print(model)

ConvNet(
  (base_model): Sequential(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (4): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
          (0): Conv2d(6

In [52]:
# Training on our flower dataset

n_iters = len(train_load)

for epoch in range(num_epochs):
  model.train()
  for ii,(images, labels) in enumerate(train_load):
    images = images.to(device)
    labels = labels.to(device)

    _,outputs = model(images)
    loss = objective_function(outputs, labels)

    loss.backward()
    optimizer.step()
    optimizer.zero_grad()

    if (ii+1)%108 == 0:
      print(f'Epoch [{epoch+1}/{num_epochs}], Step[{ii+1}/{n_iters}], Loss = {loss.item():.6f}')

Epoch [1/50], Step[108/432], Loss = 0.664136
Epoch [1/50], Step[216/432], Loss = 0.222699
Epoch [1/50], Step[324/432], Loss = 0.354514
Epoch [1/50], Step[432/432], Loss = 0.736255
Epoch [2/50], Step[108/432], Loss = 0.928168
Epoch [2/50], Step[216/432], Loss = 0.021836
Epoch [2/50], Step[324/432], Loss = 0.112790
Epoch [2/50], Step[432/432], Loss = 0.110380
Epoch [3/50], Step[108/432], Loss = 0.834857
Epoch [3/50], Step[216/432], Loss = 0.062991
Epoch [3/50], Step[324/432], Loss = 0.101490
Epoch [3/50], Step[432/432], Loss = 0.150484
Epoch [4/50], Step[108/432], Loss = 0.290846
Epoch [4/50], Step[216/432], Loss = 0.159907
Epoch [4/50], Step[324/432], Loss = 0.025264
Epoch [4/50], Step[432/432], Loss = 0.031802
Epoch [5/50], Step[108/432], Loss = 0.567587
Epoch [5/50], Step[216/432], Loss = 0.100825
Epoch [5/50], Step[324/432], Loss = 0.307965
Epoch [5/50], Step[432/432], Loss = 0.326450
Epoch [6/50], Step[108/432], Loss = 0.010951
Epoch [6/50], Step[216/432], Loss = 0.303842
Epoch [6/5

In [53]:
# Evaluating model

def eval_model(model, dataloader, phase):

  with torch.no_grad():
    # for the entire dataset
    n_correct = 0
    n_samples = 0

    model.eval()

    for images, labels in dataloader:

      images = images.to(device)
      labels = labels.to(device)

      _, outputs = model(images)

      _, preds = torch.max(outputs, 1)
      n_samples += labels.size(0)
      n_correct += (preds == labels).sum().item()

    accuracy = n_correct/float(n_samples)

    print(f'Accuracy of model on {phase} set = {(100.0 * accuracy):.4f} %')


In [55]:
train_load = DataLoader(dataset=train_data,
                        batch_size=1,
                        shuffle=False,
                        num_workers=2)

eval_model(model, dataloader=train_load, phase='training')

Accuracy of model on training set = 99.9421 %


In [56]:
eval_model(model, dataloader=val_load, phase='testing')

Accuracy of model on testing set = 93.5185 %
