In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import glob
import json
import numpy
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
#import matplotlib.pyplot as plt
from numpy import vstack
from sklearn.metrics import accuracy_score
from PIL import Image
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
dir_path = "/content/drive/My Drive/Capstone/FEMNIST"

def getUsers():
  users = []
  path = dir_path + "/train/*.json"
  for file in glob.glob(path):
    print("processing: " + file)
    with open(file) as f:
      dict = json.load(f);
      for user in dict['users']:
        users.append(user)
  return users

def GetFile(path, clientID):
  for file in glob.glob(path):
    with open(file) as f:
      dict = json.load(f);
      for user in dict['users']:
        if user == clientID:
          return file;

users = getUsers()
len(users)

In [None]:
#train_data = torchvision.datasets.EMNIST('.', 'mnist', download='True', train='True', transform=transforms.ToTensor())
#test_data = torchvision.datasets.EMNIST('.', 'mnist', download='True', train='False', transform=transforms.ToTensor())


class FEMNISTDataset(Dataset):
  def __init__(self, test, clientID):
    self.images = []
    self.labels = []
    # Find file with the client ID, get its data
    if test:
      path = dir_path + "/test/*.json"
    else:
      path = dir_path + "/train/*.json"
    file = GetFile(path, clientID)
    with open(file) as f:
      dict = json.load(f)
      index = dict['users'].index(clientID)
      self.numSamples = dict['num_samples'][index]
      print(str(clientID) + " has " + str(self.numSamples) + " samples")
      self.images = dict['user_data'][clientID]['x']
      self.labels = dict['user_data'][clientID]['y']

  def __len__(self):
    return self.numSamples
  
  def __getitem__(self, idx):
    image = numpy.array(self.images[idx]).reshape(1,28,28)
    img_tensor = torch.from_numpy(image).float()
    label_id = torch.tensor(self.labels[idx])
    return img_tensor, label_id



train_data = FEMNISTDataset(False, users[80])
test_data = FEMNISTDataset(True, users[80])

trainLoader = DataLoader(train_data, batch_size=64)
testLoader = DataLoader(test_data, batch_size=1024)

#img, label = train_data[2]
#plt.figure()
#plt.title(label)
#plt.axis("off")
#plt.imshow(img, cmap="gray")
#plt.show()

f0128_00 has 299 samples
f0128_00 has 34 samples


In [None]:
class CustomCNN(nn.Module):
    def __init__(self):
        super(CustomCNN, self).__init__()
        self.conv = nn.Sequential(
                    nn.Conv2d(1, 24, kernel_size = (5,5), stride=1, padding=0),
                    nn.ReLU(inplace=True),
                    nn.MaxPool2d(2, stride=2),
                    nn.Conv2d(24, 48, kernel_size = (5,5), stride=1, padding=0),
                    nn.ReLU(inplace=True),
                    nn.MaxPool2d(2, stride=2))
        self.fc = nn.Sequential(
            nn.Linear(in_features= 4*4*48, out_features= 256),
            nn.Linear(in_features= 256, out_features= 62))
        
    def forward(self, x):
        x = self.conv(x);
        x = torch.flatten(x, start_dim=1, end_dim=-1)
        x = self.fc(x)
        return x
        

In [None]:
def train_model(trainLoader, model, epochs):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
    # enumerate epochs
    for epoch in range(epochs):
        print("starting epoch %d" % epoch)
        # enumerate mini batches
        for i, (inputs, targets) in enumerate(trainLoader):
            # clear the gradients
            optimizer.zero_grad()
            # compute the model output
            yhat = model(inputs)
            # calculate loss
            loss = criterion(yhat, targets)
            # credit assignment
            loss.backward()
            # update model weights
            optimizer.step()
        acc = evaluate_model(testLoader, model)
        print('Accuracy: %.3f' % acc)


In [None]:
# evaluate the model
def evaluate_model(testLoader, model):
    predictions, actuals = list(), list()
    for i, (inputs, targets) in enumerate(testLoader):
        # evaluate the model on the test set
        yhat = model(inputs)
        # retrieve numpy array
        yhat = yhat.detach().numpy()
        actual = targets.numpy()
        actual = actual.reshape((len(actual), 1))
        # round to class values
        yhat = yhat.argmax(axis=1)
        yhat = yhat.reshape((len(yhat), 1))
        # store
        predictions.append(yhat)
        actuals.append(actual)
    predictions, actuals = vstack(predictions), vstack(actuals)
    # calculate accuracy
    acc = accuracy_score(actuals, predictions)
    return acc

In [None]:
model = CustomCNN()
print(model)
train_model(trainLoader, model, 100)
print('Finished')

CustomCNN(
  (conv): Sequential(
    (0): Conv2d(1, 24, kernel_size=(5, 5), stride=(1, 1))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(24, 48, kernel_size=(5, 5), stride=(1, 1))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc): Sequential(
    (0): Linear(in_features=768, out_features=256, bias=True)
    (1): Linear(in_features=256, out_features=62, bias=True)
  )
)
starting epoch 0
Accuracy: 0.000
starting epoch 1
Accuracy: 0.059
starting epoch 2
Accuracy: 0.059
starting epoch 3
Accuracy: 0.059
starting epoch 4
Accuracy: 0.000
starting epoch 5
Accuracy: 0.059
starting epoch 6
Accuracy: 0.059
starting epoch 7
Accuracy: 0.059
starting epoch 8
Accuracy: 0.059
starting epoch 9
Accuracy: 0.059
starting epoch 10
Accuracy: 0.059
starting epoch 11
Accuracy: 0.059
starting epoch 12
Accuracy: 0.059
starting epoch 13
Accuracy: 0.059
startin