In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import os
import time
import torch
import torch.nn as nn
from torch.autograd import Variable
import matplotlib.pyplot as plt
from model import CNNModel

%matplotlib inline

In [2]:
start_training_file = 'models/start_training2'
stop_training_file = 'models/stop_training2'
global_model_file = 'models/global_trainer2.pt'
local_model_file = 'models/trainer2_cnn.pt'

data_file = 'data/trainer2.csv'

In [3]:
df_train = pd.read_csv(data_file)
# df_train.head()
print('Labels in this training set: ', df_train['label'].unique())

Labels in this training set:  [5 4 3]


In [4]:
df_features = df_train.iloc[:, 1:785]
df_label = df_train.iloc[:, 0]

In [5]:
X_train, X_valid, y_train, y_valid = train_test_split(df_features, df_label, 
                                                      test_size = 0.2,
                                                      random_state = 1234)

In [6]:
X_train = np.array(X_train).reshape(X_train.shape[0], X_train.shape[1])
X_valid = np.array(X_valid).reshape(X_valid.shape[0], X_valid.shape[1])

In [7]:
X_train = X_train.reshape(X_train.shape[0], 1, 28, 28)
X_train  = torch.from_numpy(X_train).float()

y_train = torch.from_numpy(np.array(y_train))

In [8]:
print(X_train.shape, y_train.shape)

torch.Size([13915, 1, 28, 28]) torch.Size([13915])


In [9]:
X_valid = X_valid.reshape(X_valid.shape[0], 1, 28, 28)
X_valid = torch.from_numpy(X_valid).float()

y_valid = torch.from_numpy(np.array(y_valid))
print(X_valid.shape, y_valid.shape)

torch.Size([3479, 1, 28, 28]) torch.Size([3479])


In [10]:
batch_size = 100

# Pytorch train and test sets
train = torch.utils.data.TensorDataset(X_train, y_train)
valid = torch.utils.data.TensorDataset(X_valid, y_valid)

# data loader
train_loader = torch.utils.data.DataLoader(train, batch_size = batch_size, shuffle = False)
valid_loader = torch.utils.data.DataLoader(valid, batch_size = batch_size, shuffle = False)

In [11]:
# Define of hyperparameters for local training.
n_iters = 1500
num_epochs = n_iters / (len(X_train) / batch_size)
num_epochs = int(num_epochs)

print('Total Local Epochs: ', num_epochs)

error = nn.CrossEntropyLoss()

learning_rate = 0.001
model = CNNModel()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

Total Local Epochs:  10


In [12]:
global_epoch = 1
wait_counter = 0

# Wait for instruction from coordinator
while(True):
    if os.path.exists(stop_training_file):
        os.remove(stop_training_file)
        print('Received Stop training from Coordinator. Exiting...')
        break

    if (wait_counter % 30 == 0):
        print('Waiting for global model...')
    
    time.sleep(1)
    wait_counter = wait_counter + 1
    
    if os.path.exists(start_training_file):
        os.remove(start_training_file)
        
        # model = CNNModel()
        model.load_state_dict(torch.load(global_model_file))
        os.remove(global_model_file)

        print('Received Global Model.')
        
        count = 0
        loss_list = []
        iteration_list = []
        accuracy_list = []

        for epoch in range(num_epochs):
            for i, (images, labels) in enumerate(train_loader):
                train = images
                labels = Variable(labels)
                
                # Clear gradients
                optimizer.zero_grad()
                
                # Forward propagation
                outputs = model(train)
                
                # Calculate softmax and ross entropy loss
                loss = error(outputs, labels)
                
                # Calculating gradients
                loss.backward()
                
                # Update parameters
                optimizer.step()

                count += 1
                if count % 50 == 0:
                    # Calculate Accuracy         
                    correct = 0
                    total = 0
                    # Iterate through validation dataset
                    for images, labels in valid_loader:                
                        #valid = Variable(images.view(100,1,28,28))
                        valid = images
                        # Forward propagation
                        outputs = model(valid)
                        # Get predictions from the maximum value
                        predicted = torch.max(outputs.data, 1)[1]

                        # Total number of labels
                        total += len(labels)
                        correct += (predicted == labels).sum()

                    accuracy = 100 * correct / float(total)

                    # store loss and iteration
                    loss_list.append(loss.data)
                    iteration_list.append(count)
                    accuracy_list.append(accuracy)
                if count % 50 == 0:
                    # Print Loss
                    print('Global epoch:{} Iteration: {}  Loss: {}  Accuracy: {} %'.format(global_epoch, count, loss.data, accuracy))
        
        print('Completed global epoch: ', global_epoch)
        
        global_epoch = global_epoch + 1
        
        torch.save(model.state_dict(), local_model_file)
        print('Saved local model file.')

Waiting for global model...
Received Global Model.
Global epoch:1 Iteration: 50  Loss: 0.12222348153591156  Accuracy: 96.26329040527344 %
Global epoch:1 Iteration: 100  Loss: 0.10073047876358032  Accuracy: 97.58551025390625 %
Global epoch:1 Iteration: 150  Loss: 0.08646978437900543  Accuracy: 98.44783020019531 %
Global epoch:1 Iteration: 200  Loss: 0.012635942548513412  Accuracy: 97.9304428100586 %
Global epoch:1 Iteration: 250  Loss: 0.13974490761756897  Accuracy: 98.67778015136719 %
Global epoch:1 Iteration: 300  Loss: 0.013070127926766872  Accuracy: 98.82150268554688 %
Global epoch:1 Iteration: 350  Loss: 0.0016926564276218414  Accuracy: 98.82150268554688 %
Global epoch:1 Iteration: 400  Loss: 0.09645015746355057  Accuracy: 98.96521759033203 %
Global epoch:1 Iteration: 450  Loss: 0.07250909507274628  Accuracy: 98.96521759033203 %
Global epoch:1 Iteration: 500  Loss: 0.007952871732413769  Accuracy: 99.022705078125 %
Global epoch:1 Iteration: 550  Loss: 0.007553511764854193  Accuracy:

Global epoch:4 Iteration: 300  Loss: 0.0010132527677342296  Accuracy: 99.42512512207031 %
Global epoch:4 Iteration: 350  Loss: 0.0005245365318842232  Accuracy: 99.45386505126953 %
Global epoch:4 Iteration: 400  Loss: 0.07732358574867249  Accuracy: 99.42512512207031 %
Global epoch:4 Iteration: 450  Loss: 0.0225185826420784  Accuracy: 99.3388900756836 %
Global epoch:4 Iteration: 500  Loss: 0.00016476721793878824  Accuracy: 99.54010009765625 %
Global epoch:4 Iteration: 550  Loss: 0.0015218661865219474  Accuracy: 99.56884002685547 %
Global epoch:4 Iteration: 600  Loss: 0.0015186486998572946  Accuracy: 99.5113525390625 %
Global epoch:4 Iteration: 650  Loss: 0.00036665171501226723  Accuracy: 99.48261260986328 %
Global epoch:4 Iteration: 700  Loss: 3.0410688850679435e-05  Accuracy: 99.45386505126953 %
Global epoch:4 Iteration: 750  Loss: 0.003637798363342881  Accuracy: 99.42512512207031 %
Global epoch:4 Iteration: 800  Loss: 3.719244705280289e-05  Accuracy: 99.42512512207031 %
Global epoch:4 

Global epoch:7 Iteration: 550  Loss: 0.00020342347852420062  Accuracy: 99.42512512207031 %
Global epoch:7 Iteration: 600  Loss: 5.043663986725733e-05  Accuracy: 99.5113525390625 %
Global epoch:7 Iteration: 650  Loss: 5.64543079235591e-05  Accuracy: 99.48261260986328 %
Global epoch:7 Iteration: 700  Loss: 3.0208744647097774e-05  Accuracy: 99.48261260986328 %
Global epoch:7 Iteration: 750  Loss: 0.00011117743270006031  Accuracy: 99.56884002685547 %
Global epoch:7 Iteration: 800  Loss: 3.4186471111752326e-06  Accuracy: 99.59758758544922 %
Global epoch:7 Iteration: 850  Loss: 0.00011134427768411115  Accuracy: 99.56884002685547 %
Global epoch:7 Iteration: 900  Loss: 4.92294057039544e-05  Accuracy: 99.56884002685547 %
Global epoch:7 Iteration: 950  Loss: 9.698987923911773e-06  Accuracy: 99.62632751464844 %
Global epoch:7 Iteration: 1000  Loss: 0.0009090485400520265  Accuracy: 99.56884002685547 %
Global epoch:7 Iteration: 1050  Loss: 1.5206230273179244e-05  Accuracy: 99.56884002685547 %
Globa

Global epoch:10 Iteration: 800  Loss: 6.02750078542158e-05  Accuracy: 99.6838150024414 %
Global epoch:10 Iteration: 850  Loss: 1.5454323147423565e-05  Accuracy: 99.65507507324219 %
Global epoch:10 Iteration: 900  Loss: 5.125894517732377e-07  Accuracy: 99.6838150024414 %
Global epoch:10 Iteration: 950  Loss: 2.9692224416066892e-05  Accuracy: 99.6838150024414 %
Global epoch:10 Iteration: 1000  Loss: 1.9023853383259848e-05  Accuracy: 99.65507507324219 %
Global epoch:10 Iteration: 1050  Loss: 3.746981019503437e-05  Accuracy: 99.6838150024414 %
Global epoch:10 Iteration: 1100  Loss: 9.476948616793379e-05  Accuracy: 99.6838150024414 %
Global epoch:10 Iteration: 1150  Loss: 6.934956036275253e-05  Accuracy: 99.6838150024414 %
Global epoch:10 Iteration: 1200  Loss: 3.099422940522345e-07  Accuracy: 99.6838150024414 %
Global epoch:10 Iteration: 1250  Loss: 5.423915467872575e-07  Accuracy: 99.65507507324219 %
Global epoch:10 Iteration: 1300  Loss: 3.118334643659182e-05  Accuracy: 99.6838150024414 

KeyboardInterrupt: 

In [None]:
# df_train = pd.read_csv('data/trainer1.csv')
# df_features = df_train.iloc[:, 1:785]
# df_label = df_train.iloc[:, 0]
# X_train, X_valid, y_train, y_valid = train_test_split(df_features, df_label, 
#                                                       test_size = 0.2,
#                                                       random_state = 1234)
# X_valid = np.array(X_valid).reshape(X_valid.shape[0], X_valid.shape[1])

In [None]:
# sample = 10
# img = X_valid[sample] #shape (784,1)
# img = img.reshape(1, 1, 28, 28) #shape (1,1,28,28)
# img  = torch.from_numpy(img).float() #tensor

# prediction = model(img).detach().numpy()[0].argmax()
# print(prediction)

In [None]:
# fig = plt.figure
# plt.imshow(X_valid[sample].reshape(28,28), cmap='gray')
# plt.show()