Article: https://www.embedded-robotics.com/\
Code: Image Classification using Convolution Neural Network\
Prepared By: Awais Naeem (awais.naeem@embedded-robotics.com)\
Copyrights: www.embedded-robotics.com\
Disclaimer: This code can be distributed with the proper mention of the owner copyrights

In [2]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.utils.data as torch_data
import torch.optim as optim
from sklearn.metrics import accuracy_score
from skimage.io import imread
from skimage.transform import resize

Intel Data: https://www.kaggle.com/datasets/puneet6060/intel-image-classification

In [3]:
home = os.path.expanduser('~')

In [4]:
classes_dict = {'buildings':0, 'forest':1, 'glacier':2, 'mountain':3, 'sea':4, 'street':5}

In [5]:
train_data_path = os.path.join(home, ".torch/datasets/intel/seg_train/seg_train")
test_data_path = os.path.join(home, ".torch/datasets/intel/seg_test/seg_test")

In [6]:
train_X = []
train_y = []
for (out_class, out_label) in classes_dict.items():
    class_path = os.path.join(train_data_path, out_class)
    class_img_list = os.listdir(class_path)
    
    for img_name in class_img_list:
        img_path = os.path.join(class_path, img_name)
        img = imread(img_path)
        img = img/255.0
        img = resize(img, (150,150,3), anti_aliasing=True)
        train_X.append(img)
        train_y.append(out_label)

In [7]:
test_X = []
test_y = []
for (out_class, out_label) in classes_dict.items():
    class_path = os.path.join(test_data_path, out_class)
    class_img_list = os.listdir(class_path)
    
    for img_name in class_img_list:
        img_path = os.path.join(class_path, img_name)
        img = imread(img_path)
        img = img/255.0
        img = resize(img, (150,150,3), anti_aliasing=True)
        test_X.append(img)
        test_y.append(out_label)

In [8]:
np.shape(train_X)

: 

: 

In [7]:
train_X = np.array(train_X)
train_y = np.array(train_y)
test_X = np.array(test_X)
test_y = np.array(test_y)

: 

: 

In [None]:
print('Train X:', train_X.shape)
print('Train y:', train_y.shape)
print('Train X:', test_X.shape)
print('Train y:', test_y.shape)

In [None]:
plt.figure(figsize=(10,10))
plt.subplot(221)
plt.imshow(train_X[0], cmap='gray')
plt.subplot(222)
plt.imshow(train_X[20], cmap='gray')
plt.subplot(223)
plt.imshow(train_X[60], cmap='gray')
plt.subplot(224)
plt.imshow(train_X[120], cmap='gray')
plt.show()

In [None]:
plt.figure(figsize=(10,10))
plt.subplot(221)
plt.imshow(test_X[0], cmap='gray')
plt.subplot(222)
plt.imshow(test_X[20], cmap='gray')
plt.subplot(223)
plt.imshow(test_X[60], cmap='gray')
plt.subplot(224)
plt.imshow(test_X[120], cmap='gray')
plt.show()

In [None]:
tensor_train_X = torch.from_numpy(train_X).float()
tensor_train_y = torch.from_numpy(train_y).long()
tensor_test_X = torch.from_numpy(test_X).float()
tensor_test_y = torch.from_numpy(test_y).long()

In [None]:
tensor_train_X = tensor_train_X.permute((0,3,1,2))
tensor_test_X = tensor_test_X.permute((0,3,1,2))

In [None]:
tensor_train_dataset = torch_data.TensorDataset(tensor_train_X, tensor_train_y)
tensor_test_dataset = torch_data.TensorDataset(tensor_test_X, tensor_test_y)

In [None]:
train_dl = torch_data.DataLoader(tensor_train_dataset, batch_size=64, shuffle=True)
test_dl = torch_data.DataLoader(tensor_test_dataset, batch_size=16, shuffle=False)

Using LeNet Architecture for Deep Learning Model

Image Size Calculation: [(W-K+2P)/S] + 1; W = Width of Input Image, K = Kernel Size, P = Padding, S = Stride

In [None]:
class LeNet (nn.Module):
    def __init__ (self, n_channels, classes):
        super(LeNet, self).__init__() #3*150*150
        
        self.conv1 = nn.Conv2d(in_channels=n_channels, out_channels=20, kernel_size=(5,5)) #20*146*146
        self.relu1 = nn.ReLU()
        self.maxpool1 = nn.MaxPool2d(kernel_size=(2,2), stride=(2,2)) ##20*73*73
        
        self.conv2 = nn.Conv2d(in_channels=20, out_channels=50, kernel_size=(5,5)) #50*69*69
        self.relu2 = nn.ReLU()
        self.maxpool2 = nn.MaxPool2d(kernel_size=(2,2), stride=(2,2)) #50*34*34
        
        self.fc1 = nn.Linear(in_features=57800, out_features=1024)
        self.relu3 = nn.ReLU()

        self.fc2 = nn.Linear(in_features=1024, out_features=512)
        self.relu4 = nn.ReLU()
        
        self.fc3 = nn.Linear(in_features=512, out_features=classes)
        self.Softmax = nn.Softmax(dim=1)
    
    def forward (self, x):
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.maxpool1(x)
        
        x = self.conv2(x)
        x = self.relu2(x)
        x = self.maxpool2(x)
        
        x = x.view(-1,50*34*34)
        x = self.fc1(x)
        x = self.relu3(x)

        x = self.fc2(x)
        x = self.relu4(x)
        
        x = self.fc3(x)
        output = self.Softmax(x)
        
        return output

In [1]:
def train_model(model, train_dl, learning_rate, epochs):
    
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    
    for epoch in range(epochs):
        
        model.train()
        
        totalTrainLoss = 0;
        train_steps = 0;
        
        for (inputs, targets) in train_dl:
            optimizer.zero_grad()
            yhat = model(inputs)
            loss = criterion(yhat,targets.squeeze())
            loss.backward()
            optimizer.step()
            
            totalTrainLoss = totalTrainLoss + loss
            train_steps = train_steps + 1

        avgTrainLoss = totalTrainLoss.detach().numpy()/train_steps
        print('[EPOCH {}] -> Training Loss: [{}]'.format(epoch, avgTrainLoss))
        

In [None]:
model = LeNet(3, 6)

In [None]:
train_model(model, train_dl, learning_rate=0.01, epochs=20)

In [None]:
def evaluate_model(model, test_dl):
    
    with torch.no_grad():
        model.eval()
        predictions, actuals = list(), list()
        for (inputs, targets) in test_dl:
            yhat = model(inputs)
            yhat = yhat.detach().numpy()
            yhat = np.argmax(yhat, axis=1)
            yhat = yhat.reshape(-1,1)
            actual = targets.numpy()
            actual = actual.reshape(-1,1)
            predictions.append(yhat)
            actuals.append(actual)
        predictions, actuals = np.vstack(predictions), np.vstack(actuals)
        return accuracy_score(actuals, predictions)
        

In [None]:
train_accuracy = evaluate_model(model, train_dl)
print('Train Data Accuracy: {}%'.format(train_accuracy*100))

In [None]:
test_accuracy = evaluate_model(model, test_dl)
print('Test Data Accuracy: {}%'.format(test_accuracy*100))