<a href="https://colab.research.google.com/github/arpitpatelsitapur/my-py-torch-journey/blob/main/Fashion_MNIST_pytorch_ANN_model_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## **Checking in only 6000 rows of training and 1000 of testing**

In [None]:
# we load MNIST data from keras, there are other methods too.
from keras import datasets
(X_train, y_train), (X_test, y_test) =datasets.fashion_mnist.load_data()

In [None]:
import pandas as pd

# Reshape the arrays to be 2-dimensional
X_train = X_train.reshape(X_train.shape[0], -1)
X_test = X_test.reshape(X_test.shape[0], -1)
y_train = y_train.reshape(y_train.shape[0], -1)
y_test = y_test.reshape(y_test.shape[0], -1)

# convert into Dataframe
X_train = pd.DataFrame(X_train)
X_test = pd.DataFrame(X_test)
y_train = pd.DataFrame(y_train)
y_test = pd.DataFrame(y_test)

In [None]:
X_train.head(5)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,774,775,776,777,778,779,780,781,782,783
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,1,0,0,0,0,...,119,114,130,76,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,22,...,0,0,1,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,33,96,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [None]:
print("X_train.shape = ", X_train.shape)
print("X_test.shape = ", X_test.shape)
print("-"*100)
print("Keeping only 6000 in training and 1000 for testing.")
X_train=X_train.head(6000)
X_test=X_test.head(1000)
y_train=y_train.head(6000)
y_test=y_test.head(1000)
print("-"*100)
print("X_train.shape = ", X_train.shape)
print("X_test.shape = ", X_test.shape)


X_train.shape =  (60000, 784)
X_test.shape =  (10000, 784)
----------------------------------------------------------------------------------------------------
Keeping only 6000 in training and 1000 for testing.
----------------------------------------------------------------------------------------------------
X_train.shape =  (6000, 784)
X_test.shape =  (1000, 784)


In [None]:
# if running first time, u need to install torchinfo
# !pip install torchinfo

In [None]:
import pandas as pd
import torch
import torch.nn as nn
from torchinfo import summary
import torch.optim as optim
torch.manual_seed(42)

<torch._C.Generator at 0x7b4026b69950>

In [None]:
X_train_t = torch.tensor(X_train.values, dtype=torch.float32)
y_train_t = torch.tensor(y_train.values, dtype=torch.long)
X_test_t = torch.tensor(X_test.values, dtype=torch.float32)
y_test_t = torch.tensor(y_test.values, dtype=torch.long)

In [None]:
print(f"X_train_t.shape = {X_train_t.shape}, y_train_t.shape = {y_train_t.shape}")
print(f"X_test_t.shape = {X_test_t.shape}, y_test_t.shape = {y_test_t.shape}")

X_train_t.shape = torch.Size([6000, 784]), y_train_t.shape = torch.Size([6000, 1])
X_test_t.shape = torch.Size([1000, 784]), y_test_t.shape = torch.Size([1000, 1])


In [None]:
# define datset and dataloader
from torch.utils.data import Dataset,DataLoader

class custom_dataset(Dataset):
  def __init__(self,X,y):
    self.X=X
    self.y=y
    self.n_samples=X.shape[0]

  def __len__(self):
    return self.n_samples

  def __getitem__(self,index):
    return self.X[index],self.y[index]

train_dataset=custom_dataset(X_train_t,y_train_t)
test_dataset=custom_dataset(X_test_t,y_test_t)

train_loader=DataLoader(dataset=train_dataset,batch_size=150,shuffle=True)
test_loader=DataLoader(dataset=test_dataset,batch_size=150,shuffle=True)

for batch_X,batch_y in test_loader:
  # print(batch_X)
  # print(batch_y)
  print(f"batch_X.shape = {batch_X.shape}, batch_y.shape = {batch_y.shape}")
  print("-"*50)

batch_X.shape = torch.Size([150, 784]), batch_y.shape = torch.Size([150, 1])
--------------------------------------------------
batch_X.shape = torch.Size([150, 784]), batch_y.shape = torch.Size([150, 1])
--------------------------------------------------
batch_X.shape = torch.Size([150, 784]), batch_y.shape = torch.Size([150, 1])
--------------------------------------------------
batch_X.shape = torch.Size([150, 784]), batch_y.shape = torch.Size([150, 1])
--------------------------------------------------
batch_X.shape = torch.Size([150, 784]), batch_y.shape = torch.Size([150, 1])
--------------------------------------------------
batch_X.shape = torch.Size([150, 784]), batch_y.shape = torch.Size([150, 1])
--------------------------------------------------
batch_X.shape = torch.Size([100, 784]), batch_y.shape = torch.Size([100, 1])
--------------------------------------------------


In [None]:
## **ANN model structure**
# - input layer (784)
# - 2 hidden layer (each 128)
# - 1 output layer
# - relu in hidden layers
# - softmax in output layer

# Define model
class SimpleNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = torch.relu(self.fc1(x))      # hidden layer1 with ReLU
        x = torch.relu(self.fc2(x))      # hidden layer1 with ReLU
        x = self.fc3(x)  # output layer (remove softmax, CrossEntropyLoss includes it)
        return x

In [None]:
# Hyperparameters
input_size = X_train_t.shape[1]
hidden_size = 128
output_size = 10  # 10 classes for Fashion MNIST
num_epochs = 50
lr = 0.001

model = SimpleNN(input_size, hidden_size, output_size)
loss = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=lr)

summary(model)

Layer (type:depth-idx)                   Param #
SimpleNN                                 --
├─Linear: 1-1                            100,480
├─Linear: 1-2                            16,512
├─Linear: 1-3                            1,290
Total params: 118,282
Trainable params: 118,282
Non-trainable params: 0

In [None]:
# Training (one batch of dataset per epoch)
for epoch in range(num_epochs):
    batch_no=1
    for batch_X, batch_y in train_loader:
        # forward pass
        outputs = model(batch_X)
        # loss calculation
        l = loss(outputs, batch_y.squeeze(1))
        # backward pass
        optimizer.zero_grad()
        l.backward()
        # updating grads
        optimizer.step()
        print(f"Epoch [{epoch+1}/{num_epochs}], Batch {batch_no}, Loss: {l.item():.4f}")
        batch_no=batch_no+1
    print("-"*100)

Epoch [1/50], Batch 1, Loss: 18.9197
Epoch [1/50], Batch 2, Loss: 15.6825
Epoch [1/50], Batch 3, Loss: 13.7244
Epoch [1/50], Batch 4, Loss: 17.0933
Epoch [1/50], Batch 5, Loss: 12.7258
Epoch [1/50], Batch 6, Loss: 10.8626
Epoch [1/50], Batch 7, Loss: 9.7363
Epoch [1/50], Batch 8, Loss: 6.4689
Epoch [1/50], Batch 9, Loss: 5.6945
Epoch [1/50], Batch 10, Loss: 4.1110
Epoch [1/50], Batch 11, Loss: 3.8076
Epoch [1/50], Batch 12, Loss: 3.6110
Epoch [1/50], Batch 13, Loss: 1.7109
Epoch [1/50], Batch 14, Loss: 2.4474
Epoch [1/50], Batch 15, Loss: 2.7614
Epoch [1/50], Batch 16, Loss: 2.1721
Epoch [1/50], Batch 17, Loss: 1.0055
Epoch [1/50], Batch 18, Loss: 2.1075
Epoch [1/50], Batch 19, Loss: 2.0032
Epoch [1/50], Batch 20, Loss: 1.7227
Epoch [1/50], Batch 21, Loss: 1.8280
Epoch [1/50], Batch 22, Loss: 1.6201
Epoch [1/50], Batch 23, Loss: 1.1484
Epoch [1/50], Batch 24, Loss: 1.1362
Epoch [1/50], Batch 25, Loss: 1.6635
Epoch [1/50], Batch 26, Loss: 1.0426
Epoch [1/50], Batch 27, Loss: 0.9423
Epoc

In [None]:
# testing
with torch.no_grad():
    n_correct = 0
    n_samples = 0
    for batch_X, batch_y in test_loader:
        outputs = model(batch_X)
        # max returns (value ,index)
        _, predicted = torch.max(outputs.data, 1)
        n_samples += batch_y.size(0)
        n_correct += (predicted == batch_y.squeeze(1)).sum().item()

    acc = 100.0 * n_correct / n_samples
    print(f'Accuracy of the network on the 1000 test images: {acc} %')

Accuracy of the network on the 1000 test images: 82.0 %


## **Applying this ANN in complete data**

In [None]:
import pandas as pd
import torch
import torch.nn as nn
from torchinfo import summary
import torch.optim as optim
from torch.utils.data import Dataset,DataLoader
from keras import datasets
torch.manual_seed(7)


# load dataset
(X_train, y_train), (X_test, y_test) =datasets.fashion_mnist.load_data()


# Reshape the arrays to be 2-dimensional
X_train = X_train.reshape(X_train.shape[0], -1)
X_test = X_test.reshape(X_test.shape[0], -1)
y_train = y_train.reshape(y_train.shape[0], -1)
y_test = y_test.reshape(y_test.shape[0], -1)

# convert into Dataframe
X_train = pd.DataFrame(X_train)
X_test = pd.DataFrame(X_test)
y_train = pd.DataFrame(y_train)
y_test = pd.DataFrame(y_test)

# convert into tensors
X_train_t = torch.tensor(X_train.values, dtype=torch.float32)
y_train_t = torch.tensor(y_train.values, dtype=torch.long)
X_test_t = torch.tensor(X_test.values, dtype=torch.float32)
y_test_t = torch.tensor(y_test.values, dtype=torch.long)

# dataset and dataloader
class custom_dataset(Dataset):
  def __init__(self,X,y):
    self.X=X
    self.y=y
    self.n_samples=X.shape[0]

  def __len__(self):
    return self.n_samples

  def __getitem__(self,index):
    return self.X[index],self.y[index]

train_dataset=custom_dataset(X_train_t,y_train_t)
test_dataset=custom_dataset(X_test_t,y_test_t)

train_loader=DataLoader(dataset=train_dataset,batch_size=32,shuffle=True)
test_loader=DataLoader(dataset=test_dataset,batch_size=32,shuffle=False)

# Define model
class SimpleNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleNN, self).__init__()
        self.model=nn.Sequential(
            nn.Linear(input_size,hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size,hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size,output_size)
        )

    def forward(self, x):
        return self.model(x)


# Hyperparameters
input_size = X_train_t.shape[1]
hidden_size = 128
output_size = 10  # 10 classes for Fashion MNIST
num_epochs = 50
lr = 0.001

model = SimpleNN(input_size, hidden_size, output_size)
loss = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=lr)
print(summary(model))

# Training (one batch of dataset per epoch)
for epoch in range(num_epochs):
    total_loss=0
    for batch_X, batch_y in train_loader:
        # forward pass
        outputs = model(batch_X)
        # loss calculation
        l = loss(outputs, batch_y.squeeze(1))
        # backward pass
        optimizer.zero_grad()
        l.backward()
        # updating grads
        optimizer.step()
        total_loss+=l.item()

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss/len(train_loader):.4f}")
    print("="*65)


# testing
with torch.no_grad():
    n_correct = 0
    n_samples = 0
    for batch_X, batch_y in test_loader:
        outputs = model(batch_X)
        # max returns (value ,index)
        _, predicted = torch.max(outputs.data, 1)
        n_samples += batch_y.size(0)
        n_correct += (predicted == batch_y.squeeze(1)).sum().item()

    acc = 100.0 * n_correct / n_samples
    print(f'Accuracy of the network on the 1000 test images: {acc} %')

Layer (type:depth-idx)                   Param #
SimpleNN                                 --
├─Sequential: 1-1                        --
│    └─Linear: 2-1                       100,480
│    └─ReLU: 2-2                         --
│    └─Linear: 2-3                       16,512
│    └─ReLU: 2-4                         --
│    └─Linear: 2-5                       1,290
Total params: 118,282
Trainable params: 118,282
Non-trainable params: 0
Epoch [1/50], Loss: 0.6414
Epoch [2/50], Loss: 0.4469
Epoch [3/50], Loss: 0.4136
Epoch [4/50], Loss: 0.3947
Epoch [5/50], Loss: 0.3823
Epoch [6/50], Loss: 0.3734
Epoch [7/50], Loss: 0.3610
Epoch [8/50], Loss: 0.3565
Epoch [9/50], Loss: 0.3482
Epoch [10/50], Loss: 0.3463
Epoch [11/50], Loss: 0.3421
Epoch [12/50], Loss: 0.3380
Epoch [13/50], Loss: 0.3285
Epoch [14/50], Loss: 0.3334
Epoch [15/50], Loss: 0.3259
Epoch [16/50], Loss: 0.3214
Epoch [17/50], Loss: 0.3229
Epoch [18/50], Loss: 0.3163
Epoch [19/50], Loss: 0.3180
Epoch [20/50], Loss: 0.3167
Epoch [2

## **Using GPU to Speedup Process**