In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import seaborn as sns

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader


In [2]:
# txf = transforms.ToTensor()
txf = transforms.Compose([
    transforms.Grayscale(),
    transforms.ToTensor(),
])
train_data = datasets.CIFAR10(
    root='cifar10/',
    train=True,
    download=True,
    transform=txf
)
test_data = datasets.CIFAR10(
    root='cifar10/',
    train=False,
    download=True,
    transform=txf
)


Files already downloaded and verified
Files already downloaded and verified


In [3]:
torch.manual_seed(9)
train_loader = DataLoader(train_data, batch_size=10, shuffle=True)
test_loader = DataLoader(test_data, batch_size=10, shuffle=True)


In [4]:
class Lenet(nn.Module):
    def __init__(self, out_size) -> None:
        super().__init__()

        self.conv = nn.Sequential(
            nn.Conv2d(
                in_channels=1,
                out_channels=6,
                kernel_size=5,
            ),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(
                in_channels=6,
                out_channels=16,
                kernel_size=5
            ),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(
                in_channels=16,
                out_channels=120,
                kernel_size=5
            )
        )

        self.fc1 = nn.Linear(120, 30)
        self.fc2 = nn.Linear(30, out_size)

    def forward(self, x):
        x = self.conv(x)
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return F.softmax(x, dim=1)

    def interm_features(self, x):
        with torch.no_grad():
            x = self.conv(x)
            x = torch.flatten(x, 1)
            x = F.relu(self.fc1(x))
            return x


In [5]:
torch.manual_seed(9)
lenet = Lenet(10)
criterion = nn.CrossEntropyLoss()
optimiser = torch.optim.Adam(lenet.parameters(), lr=0.0001)


In [6]:
from torchsummary import summary
summary(lenet, (1, 32, 32))


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 6, 28, 28]             156
         MaxPool2d-2            [-1, 6, 14, 14]               0
            Conv2d-3           [-1, 16, 10, 10]           2,416
         MaxPool2d-4             [-1, 16, 5, 5]               0
            Conv2d-5            [-1, 120, 1, 1]          48,120
            Linear-6                   [-1, 30]           3,630
            Linear-7                   [-1, 10]             310
Total params: 54,632
Trainable params: 54,632
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.06
Params size (MB): 0.21
Estimated Total Size (MB): 0.27
----------------------------------------------------------------


In [7]:
try:
    lenet.load_state_dict(torch.load('./lenet_en.pt'))
except:
    n_train = len(train_data)
    n_test = len(test_data)
    e = 500

    train_losses = []
    test_losses = []
    train_accuracy = []
    test_accuracy = []

    for i in range(e):
        trn_corr = 0
        tst_corr = 0

        for b, (X_train, y_train) in enumerate(train_loader):
            y_pred = lenet.forward(X_train)
            loss = criterion(y_pred, y_train)

            predicted = torch.max(y_pred.data, 1)[1]
            batch_corr = (predicted == y_train).sum()
            trn_corr += batch_corr

            optimiser.zero_grad()
            loss.backward()
            optimiser.step()

        train_losses.append(loss.item())
        train_accuracy.append(trn_corr*100/n_train)

        with torch.no_grad():
            for b, (X_test, y_test) in enumerate(test_loader):
                y_val = lenet(X_test)
                predicted = torch.max(y_val.data, 1)[1]
                tst_corr += (predicted == y_test).sum()

        loss = criterion(y_val, y_test)
        test_losses.append(loss.item())
        test_accuracy.append(tst_corr*100/n_test)

        print(
            f'epoch: {i+1}\ttrain loss: {train_losses[i]:.3f} \ttrain accuracy: {train_accuracy[i]:2.3f}%')
        print(
            f"\t\ttest loss:  {test_losses[i]:.3f} \ttest accuracy:  {test_accuracy[i]:2.3f}%\n")

    torch.save(lenet.state_dict(), 'lenet_en.pt')


In [8]:
all_interm_features_train = []
train_labels = []

for b, (x, y) in enumerate(train_loader):
    all_interm_features_train.append(lenet.interm_features(x))
    train_labels.append(y.view(10, 1))

train_features = torch.concatenate(all_interm_features_train, axis=0)
train_labels = torch.concatenate(train_labels, axis=0)

all_interm_features_test = []
test_labels = []
for b, (x, y) in enumerate(test_loader):
    all_interm_features_test.append(lenet.interm_features(x))
    test_labels.append(y.view(10, 1))

test_features = torch.concatenate(all_interm_features_test, axis=0)
test_labels = torch.concatenate(test_labels, axis=0)

train_df = pd.DataFrame(train_features)
train_df['y'] = train_labels
test_df = pd.DataFrame(test_features)
test_df['y'] = test_labels

train_df.to_csv('lenet_train.csv', index=False)
test_df.to_csv('lenet_test.csv', index=False)


## Reading the Dataset

In [11]:
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
df_train = pd.read_csv('lenet_train.csv')
df_test = pd.read_csv('lenet_test.csv')
X_train = df_train.iloc[:, :-1].values
y_train = df_train.iloc[:, -1].values

X_test = df_test.iloc[:, :-1].values
y_test = df_test.iloc[:, -1].values

scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

clf = RandomForestClassifier(
    n_estimators=50,
    criterion='entropy',
    random_state=0
)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

acc = accuracy_score(y_test, y_pred)
cm = confusion_matrix(y_test, y_pred)
print(f"Accuracy with Random Forest: {round(acc*100, 2)}%")


Accuracy with Random Forest: 51.81%
