In [9]:
# Importing the required libraries
import torch
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import random
import os
import pickle
import time
from sklearn.utils import shuffle
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms

In [10]:
# Setup TensorFlow
EPOCHS = 10
BATCH_SIZE = 128
LEARNING_RATE = 0.01
# Importing the MNIST dataset
# Load the MNIST dataset
data = np.load('./mnist.npz')
X_train, y_train = torch.from_numpy(data['x_train']), torch.from_numpy(data['y_train'])
X_test, y_test = torch.from_numpy(data['x_test']), torch.from_numpy(data['y_test'])


In [11]:
# Define the LeNet-5 architecture
class LeNet5(nn.Module):
    def __init__(self):
        super(LeNet5, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, kernel_size=5)
        self.conv2 = nn.Conv2d(6, 16, kernel_size=5)
        self.fc1 = nn.Linear(16 * 4 * 4, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = nn.functional.relu(self.conv1(x))
        x = nn.functional.avg_pool2d(x, 2)
        x = nn.functional.relu(self.conv2(x))
        x = nn.functional.avg_pool2d(x, 2)
        x = x.view(-1, 16 * 4 * 4)
        x = nn.functional.relu(self.fc1(x))
        x = nn.functional.relu(self.fc2(x))
        x = self.fc3(x)
        return x


In [12]:
from torch.utils.data import TensorDataset, DataLoader
X_train = X_train.reshape(-1, 1, 28, 28)/255
batch_size = 128
train_dataset = TensorDataset(X_train.float(), y_train)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
#test_dataset
X_test = X_test.reshape(-1, 1, 28, 28)/255
test_dataset = TensorDataset(X_test.float(), y_test)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

# Initialize the model and optimizer
model = LeNet5()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

# Train the model
num_epochs = 10

criterion = nn.CrossEntropyLoss()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
for epoch in range(num_epochs):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()

    # Evaluate the model
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += criterion(output, target).item()
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    accuracy = 100.0 * correct / len(test_loader.dataset)
    print(f"Epoch {epoch+1}/{num_epochs}, Test Loss: {test_loss:.4f}, Accuracy: {accuracy:.2f}%")


Epoch 1/10, Test Loss: 0.0023, Accuracy: 91.13%
Epoch 2/10, Test Loss: 0.0013, Accuracy: 94.96%
Epoch 3/10, Test Loss: 0.0009, Accuracy: 96.40%
Epoch 4/10, Test Loss: 0.0007, Accuracy: 97.08%
Epoch 5/10, Test Loss: 0.0006, Accuracy: 97.47%
Epoch 6/10, Test Loss: 0.0006, Accuracy: 97.34%
Epoch 7/10, Test Loss: 0.0006, Accuracy: 97.52%
Epoch 8/10, Test Loss: 0.0005, Accuracy: 98.09%
Epoch 9/10, Test Loss: 0.0004, Accuracy: 98.43%
Epoch 10/10, Test Loss: 0.0004, Accuracy: 98.48%


In [13]:
#save the model
torch.save(model.state_dict(), 'lenet5.pth')

In [None]:
import pickle
 
# Save model
with open("iris-model.pickle", "wb") as fp:
    pickle.dump(model.state_dict(), fp)
    
# Create new model and load states
newmodel = LeNet5()
with open("iris-model.pickle", "rb") as fp:
    newmodel.load_state_dict(pickle.load(fp))

In [72]:
state_dict = model.state_dict()
keys=list(state_dict.keys())
print(keys)
for key in keys:
    split=key.split('.')
    print(state_dict[key].cpu().numpy().shape)
    if split[1]=='weight':
        name="W_"+split[0]+".npy"
    else:
        name="b_"+split[0]+".npy"
    if 'fc' in split[0] and 'weight' in split[1]:
        np.save(name,state_dict[key].cpu().numpy().transpose())
    else:
        np.save(name,state_dict[key].cpu().numpy())
    

['conv1.weight', 'conv1.bias', 'conv2.weight', 'conv2.bias', 'fc1.weight', 'fc1.bias', 'fc2.weight', 'fc2.bias', 'fc3.weight', 'fc3.bias']
(6, 1, 5, 5)
(6,)
(16, 6, 5, 5)
(16,)
(120, 784)
(120,)
(84, 120)
(84,)
(10, 84)
(10,)
