In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import os
import time
import torch
import torch.nn as nn
from torch.autograd import Variable
import matplotlib.pyplot as plt
from model import CNNModel

%matplotlib inline

In [2]:
start_training_file = 'models/start_training1'
stop_training_file = 'models/stop_training1'
global_model_file = 'models/global_trainer1.pt'
local_model_file = 'models/trainer1_cnn.pt'

data_file = 'data/trainer1.csv'

In [3]:
df_train = pd.read_csv(data_file)
#df_train.head()
print('Labels in this training set: ', df_train['label'].unique())

Labels in this training set:  [0 1 2]


In [4]:
df_features = df_train.iloc[:, 1:785]
df_label = df_train.iloc[:, 0]

In [5]:
X_train, X_valid, y_train, y_valid = train_test_split(df_features, df_label, 
                                                      test_size = 0.2,
                                                      random_state = 1234)

In [6]:
X_train = np.array(X_train).reshape(X_train.shape[0], X_train.shape[1])
X_valid = np.array(X_valid).reshape(X_valid.shape[0], X_valid.shape[1])

In [7]:
X_train = X_train.reshape(X_train.shape[0], 1, 28, 28)
X_train  = torch.from_numpy(X_train).float()

y_train = torch.from_numpy(np.array(y_train))

In [8]:
print(X_train.shape, y_train.shape)

torch.Size([14898, 1, 28, 28]) torch.Size([14898])


In [9]:
X_valid = X_valid.reshape(X_valid.shape[0], 1, 28, 28)
X_valid = torch.from_numpy(X_valid).float()

y_valid = torch.from_numpy(np.array(y_valid))
print(X_valid.shape, y_valid.shape)

torch.Size([3725, 1, 28, 28]) torch.Size([3725])


In [10]:
batch_size = 100

# Pytorch train and test sets
train = torch.utils.data.TensorDataset(X_train, y_train)
valid = torch.utils.data.TensorDataset(X_valid, y_valid)

# data loader
train_loader = torch.utils.data.DataLoader(train, batch_size = batch_size, shuffle = False)
valid_loader = torch.utils.data.DataLoader(valid, batch_size = batch_size, shuffle = False)

In [11]:
# Define of hyperparameters for local training.
n_iters = 1500
num_epochs = n_iters / (len(X_train) / batch_size)
num_epochs = int(num_epochs)

print('Total Local Epochs: ', num_epochs)

error = nn.CrossEntropyLoss()

learning_rate = 0.001
model = CNNModel()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

Total Local Epochs:  10


In [12]:
global_epoch = 1
wait_counter = 0

# Wait for instruction from coordinator
while(True):
    if os.path.exists(stop_training_file):
        os.remove(stop_training_file)
        print('Received Stop training from Coordinator. Exiting...')
        break

    if (wait_counter % 30 == 0):
        print('Waiting for global model...')
    
    time.sleep(1)
    wait_counter = wait_counter + 1
    
    if os.path.exists(start_training_file):
        os.remove(start_training_file)
        
        # model = CNNModel()
        model.load_state_dict(torch.load(global_model_file))
        os.remove(global_model_file)

        print('Received Global Model.')
        
        count = 0
        loss_list = []
        iteration_list = []
        accuracy_list = []

        for epoch in range(num_epochs):
            for i, (images, labels) in enumerate(train_loader):
                train = images
                labels = Variable(labels)
                
                # Clear gradients
                optimizer.zero_grad()
                
                # Forward propagation
                outputs = model(train)
                
                # Calculate softmax and ross entropy loss
                loss = error(outputs, labels)
                
                # Calculating gradients
                loss.backward()
                
                # Update parameters
                optimizer.step()

                count += 1
                if count % 50 == 0:
                    # Calculate Accuracy         
                    correct = 0
                    total = 0
                    # Iterate through validation dataset
                    for images, labels in valid_loader:                
                        #valid = Variable(images.view(100,1,28,28))
                        valid = images
                        # Forward propagation
                        outputs = model(valid)
                        # Get predictions from the maximum value
                        predicted = torch.max(outputs.data, 1)[1]

                        # Total number of labels
                        total += len(labels)
                        correct += (predicted == labels).sum()

                    accuracy = 100 * correct / float(total)

                    # store loss and iteration
                    loss_list.append(loss.data)
                    iteration_list.append(count)
                    accuracy_list.append(accuracy)
                if count % 50 == 0:
                    # Print Loss
                    print('Global epoch:{} Iteration: {}  Loss: {}  Accuracy: {} %'.format(global_epoch, count, loss.data, accuracy))
        
        print('Completed global epoch: ', global_epoch)
        
        global_epoch = global_epoch + 1
        
        torch.save(model.state_dict(), local_model_file)
        print('Saved local model file.')

Waiting for global model...
Received Global Model.
Global epoch:1 Iteration: 50  Loss: 0.5738599896430969  Accuracy: 98.17449951171875 %
Global epoch:1 Iteration: 100  Loss: 0.08205222338438034  Accuracy: 98.81879425048828 %
Global epoch:1 Iteration: 150  Loss: 0.0639127641916275  Accuracy: 99.14093780517578 %
Global epoch:1 Iteration: 200  Loss: 0.0026888602878898382  Accuracy: 99.0604019165039 %
Global epoch:1 Iteration: 250  Loss: 0.0034260607790201902  Accuracy: 99.24832153320312 %
Global epoch:1 Iteration: 300  Loss: 0.006135350558906794  Accuracy: 99.19463348388672 %
Global epoch:1 Iteration: 350  Loss: 0.00011745021038223058  Accuracy: 99.19463348388672 %
Global epoch:1 Iteration: 400  Loss: 0.0004404490173328668  Accuracy: 99.08724975585938 %
Global epoch:1 Iteration: 450  Loss: 0.006162415724247694  Accuracy: 99.35570526123047 %
Global epoch:1 Iteration: 500  Loss: 5.341080031939782e-05  Accuracy: 99.11409759521484 %
Global epoch:1 Iteration: 550  Loss: 0.0057860310189425945  

Global epoch:4 Iteration: 100  Loss: 0.0005318419425748289  Accuracy: 99.43624114990234 %
Global epoch:4 Iteration: 150  Loss: 0.07026495784521103  Accuracy: 99.51677703857422 %
Global epoch:4 Iteration: 200  Loss: 0.007347089238464832  Accuracy: 99.70469665527344 %
Global epoch:4 Iteration: 250  Loss: 0.0034401696175336838  Accuracy: 99.51677703857422 %
Global epoch:4 Iteration: 300  Loss: 0.002253333106637001  Accuracy: 99.62416076660156 %
Global epoch:4 Iteration: 350  Loss: 0.0033796983771026134  Accuracy: 99.78523254394531 %
Global epoch:4 Iteration: 400  Loss: 4.572238685796037e-05  Accuracy: 99.43624114990234 %
Global epoch:4 Iteration: 450  Loss: 3.2186148018809035e-05  Accuracy: 99.75839233398438 %
Global epoch:4 Iteration: 500  Loss: 0.000260725908447057  Accuracy: 99.75839233398438 %
Global epoch:4 Iteration: 550  Loss: 0.0006431341171264648  Accuracy: 99.62416076660156 %
Global epoch:4 Iteration: 600  Loss: 0.00015633006114512682  Accuracy: 99.62416076660156 %
Global epoch:

Global epoch:7 Iteration: 200  Loss: 0.003961704205721617  Accuracy: 99.67784881591797 %
Global epoch:7 Iteration: 250  Loss: 0.0006350954063236713  Accuracy: 99.5973129272461 %
Global epoch:7 Iteration: 300  Loss: 0.0004968259017914534  Accuracy: 99.67784881591797 %
Global epoch:7 Iteration: 350  Loss: 1.3600302736449521e-05  Accuracy: 99.67784881591797 %
Global epoch:7 Iteration: 400  Loss: 2.598752644189517e-07  Accuracy: 99.57047271728516 %
Global epoch:7 Iteration: 450  Loss: 4.9724530981620774e-05  Accuracy: 99.75839233398438 %
Global epoch:7 Iteration: 500  Loss: 0.00015791530313435942  Accuracy: 99.75839233398438 %
Global epoch:7 Iteration: 550  Loss: 7.003155769780278e-05  Accuracy: 99.67784881591797 %
Global epoch:7 Iteration: 600  Loss: 4.063384949404281e-06  Accuracy: 99.7315444946289 %
Global epoch:7 Iteration: 650  Loss: 5.786777910543606e-05  Accuracy: 99.75839233398438 %
Global epoch:7 Iteration: 700  Loss: 0.0001171232943306677  Accuracy: 99.78523254394531 %
Global epo

Global epoch:10 Iteration: 300  Loss: 0.0006239270442165434  Accuracy: 99.67784881591797 %
Global epoch:10 Iteration: 350  Loss: 7.39970846552751e-06  Accuracy: 99.67784881591797 %
Global epoch:10 Iteration: 400  Loss: 1.2016057553410064e-06  Accuracy: 99.67784881591797 %
Global epoch:10 Iteration: 450  Loss: 0.0005729268305003643  Accuracy: 99.7315444946289 %
Global epoch:10 Iteration: 500  Loss: 1.5735178749309853e-05  Accuracy: 99.75839233398438 %
Global epoch:10 Iteration: 550  Loss: 0.0006802360876463354  Accuracy: 99.70469665527344 %
Global epoch:10 Iteration: 600  Loss: 3.1169241992756724e-05  Accuracy: 99.70469665527344 %
Global epoch:10 Iteration: 650  Loss: 1.6143900211318396e-05  Accuracy: 99.70469665527344 %
Global epoch:10 Iteration: 700  Loss: 0.00043255608761683106  Accuracy: 99.78523254394531 %
Global epoch:10 Iteration: 750  Loss: 4.5526667236117646e-05  Accuracy: 99.78523254394531 %
Global epoch:10 Iteration: 800  Loss: 3.6428020848688902e-06  Accuracy: 99.81208038330

KeyboardInterrupt: 

In [None]:
# df_train = pd.read_csv('data/trainer1.csv')
# df_features = df_train.iloc[:, 1:785]
# df_label = df_train.iloc[:, 0]
# X_train, X_valid, y_train, y_valid = train_test_split(df_features, df_label, 
#                                                       test_size = 0.2,
#                                                       random_state = 1234)
# X_valid = np.array(X_valid).reshape(X_valid.shape[0], X_valid.shape[1])

In [None]:
# sample = 10
# img = X_valid[sample] #shape (784,1)
# img = img.reshape(1, 1, 28, 28) #shape (1,1,28,28)
# img  = torch.from_numpy(img).float() #tensor

# prediction = model(img).detach().numpy()[0].argmax()
# print(prediction)

In [None]:
# fig = plt.figure
# plt.imshow(X_valid[sample].reshape(28,28), cmap='gray')
# plt.show()