In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
from torchinfo import summary

In [3]:
torch.manual_seed(42)

<torch._C.Generator at 0x135291290>

In [4]:
df = pd.read_csv('../2. Dataset/fmnist_small.csv')


In [5]:
x = df.iloc[:, 1:]/255.0
y = df.iloc[:,0]

In [6]:
xtrain , xtest , ytrain , ytest = train_test_split( x , y , test_size=0.2 , random_state=20)

In [7]:
xtrain_tensor = torch.from_numpy(xtrain.values).float()
xtest_tensor = torch.from_numpy(xtest.values).float()
ytrain_tensor = torch.from_numpy(ytrain.values)
ytest_tensor = torch.from_numpy(ytest.values)

In [8]:
from torch.utils.data import Dataset, DataLoader

class CustomDataset(Dataset):

  def __init__(self, features, labels):

    self.features = features
    self.labels = labels

  def __len__(self):

    return len(self.features)

  def __getitem__(self, idx):

    return self.features[idx], self.labels[idx]


In [9]:
train_dataset = CustomDataset(xtrain_tensor,ytrain_tensor)
test_dataset = CustomDataset(xtest_tensor,ytest_tensor)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True , pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=True , pin_memory=True)
# helps to faster copy to gpu

In [10]:
class MyNN(nn.Module):

  def __init__(self, num_features):

    super().__init__()

    self.network = nn.Sequential(
        nn.Linear(num_features, 128),
        nn.ReLU(),
        nn.Linear(128, 64),
        nn.ReLU(),
        nn.Linear(64, 10)
    )

  def forward(self, features):

    out = self.network(features)

    return out

In [11]:
device = 'cpu'
if hasattr(torch,'mps') and torch.backends.mps.is_available():
    device = 'mps'
    print("MPS is available")

MPS is available


In [12]:
model = MyNN(xtrain_tensor.shape[1]) 
model = model.to(device) # so that weights also move on device

summary(model , input_size = xtrain_tensor.shape , device=device)   # shoudl pass device , else it takes cpu and possibility of runtime

Layer (type:depth-idx)                   Output Shape              Param #
MyNN                                     [4800, 10]                --
├─Sequential: 1-1                        [4800, 10]                --
│    └─Linear: 2-1                       [4800, 128]               100,480
│    └─ReLU: 2-2                         [4800, 128]               --
│    └─Linear: 2-3                       [4800, 64]                8,256
│    └─ReLU: 2-4                         [4800, 64]                --
│    └─Linear: 2-5                       [4800, 10]                650
Total params: 109,386
Trainable params: 109,386
Non-trainable params: 0
Total mult-adds (Units.MEGABYTES): 525.05
Input size (MB): 15.05
Forward/backward pass size (MB): 7.76
Params size (MB): 0.44
Estimated Total Size (MB): 23.25

In [None]:
epochs = 10
learning_rate = 0.1

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr= learning_rate)

In [None]:
for epoch in range(epochs):

  total_epoch_loss = 0

  for batch_features, batch_labels in train_loader:

    # move data to gpu 
    # one way you can also do is in initial only store tensors in data (then train_dataset points to tensor and hence in gpu only)
    # .to(device) creates a copy on GPU

    batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)
 
    outputs = model(batch_features)

    loss = criterion(outputs, batch_labels)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    total_epoch_loss = total_epoch_loss + loss.item()

  avg_loss = total_epoch_loss/len(train_loader)
  print(f'Epoch: {epoch + 1} , Loss: {avg_loss}')


In [None]:
model.eval()

In [None]:
total = 0
correct = 0

with torch.no_grad():

  for batch_features, batch_labels in test_loader:

    batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)

    outputs = model(batch_features)

    _, predicted = torch.max(outputs, 1) 
    # torch.max(input, dim)  ==> maximum along dim 1 i.e along rows
    # gives max_values,max_indices

    total = total + batch_labels.shape[0]

    correct = correct + (predicted == batch_labels).sum().item()

print(correct/total)
