In [44]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import os
import time
import torch
import torch.nn as nn
from torch.autograd import Variable
import matplotlib.pyplot as plt
from model import CNNModel

%matplotlib inline

In [45]:
start_training_file = 'models/start_training2'
stop_training_file = 'models/stop_training2'
global_model_file = 'models/global_trainer2.pt'
local_model_file = 'models/trainer2_cnn.pt'

data_file = 'data/trainer2.csv'

In [46]:
df_train = pd.read_csv(data_file)
# df_train.head()
print('Labels in this training set: ', df_train['label'].unique())

Labels in this training set:  [5 4 3]


In [47]:
df_features = df_train.iloc[:, 1:785]
df_label = df_train.iloc[:, 0]

In [48]:
X_train, X_valid, y_train, y_valid = train_test_split(df_features, df_label, 
                                                      test_size = 0.2,
                                                      random_state = 1234)

In [49]:
X_train = np.array(X_train).reshape(X_train.shape[0], X_train.shape[1])
X_valid = np.array(X_valid).reshape(X_valid.shape[0], X_valid.shape[1])

In [50]:
X_train = X_train.reshape(X_train.shape[0], 1, 28, 28)
X_train  = torch.from_numpy(X_train).float()

y_train = torch.from_numpy(np.array(y_train))

In [51]:
print(X_train.shape, y_train.shape)

torch.Size([13915, 1, 28, 28]) torch.Size([13915])


In [52]:
X_valid = X_valid.reshape(X_valid.shape[0], 1, 28, 28)
X_valid = torch.from_numpy(X_valid).float()

y_valid = torch.from_numpy(np.array(y_valid))
print(X_valid.shape, y_valid.shape)

torch.Size([3479, 1, 28, 28]) torch.Size([3479])


In [53]:
batch_size = 100

# Pytorch train and test sets
train = torch.utils.data.TensorDataset(X_train, y_train)
valid = torch.utils.data.TensorDataset(X_valid, y_valid)

# data loader
train_loader = torch.utils.data.DataLoader(train, batch_size = batch_size, shuffle = False)
valid_loader = torch.utils.data.DataLoader(valid, batch_size = batch_size, shuffle = False)

In [54]:
# Define of hyperparameters for local training.
n_iters = 1000
num_epochs = n_iters / (len(X_train) / batch_size)
num_epochs = int(num_epochs)

print('Total Local Epochs: ', num_epochs)

error = nn.CrossEntropyLoss()

learning_rate = 0.001
model = CNNModel()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

Total Local Epochs:  7


In [None]:
global_epoch = 1
wait_counter = 0

# Wait for instruction from coordinator
while(True):
    if os.path.exists(stop_training_file):
        os.remove(stop_training_file)
        print('Received Stop training from Coordinator. Exiting...')
        break

    if (wait_counter % 30 == 0):
        print('Waiting for global model...')
    
    time.sleep(1)
    wait_counter = wait_counter + 1
    
    if os.path.exists(start_training_file):
        os.remove(start_training_file)
        
        # model = CNNModel()
        model.load_state_dict(torch.load(global_model_file))
        os.remove(global_model_file)

        print('Received Global Model.')
        
        count = 0
        loss_list = []
        iteration_list = []
        accuracy_list = []

        for epoch in range(num_epochs):
            for i, (images, labels) in enumerate(train_loader):
                train = images
                labels = Variable(labels)
                
                # Clear gradients
                optimizer.zero_grad()
                
                # Forward propagation
                outputs = model(train)
                
                # Calculate softmax and ross entropy loss
                loss = error(outputs, labels)
                
                # Calculating gradients
                loss.backward()
                
                # Update parameters
                optimizer.step()

                #count += 1
                if count % 100 == 0:
                    # Calculate Accuracy         
                    correct = 0
                    total = 0
                    # Iterate through validation dataset
                    for images, labels in valid_loader:                
                        #valid = Variable(images.view(100,1,28,28))
                        valid = images
                        # Forward propagation
                        outputs = model(valid)
                        # Get predictions from the maximum value
                        predicted = torch.max(outputs.data, 1)[1]

                        # Total number of labels
                        total += len(labels)
                        correct += (predicted == labels).sum()

                    accuracy = 100 * correct / float(total)

                    # store loss and iteration
                    loss_list.append(loss.data)
                    iteration_list.append(count)
                    accuracy_list.append(accuracy)
                if count % 100 == 0:
                    # Print Loss
                    print('Global epoch:{} Iteration: {}  Loss: {}  Accuracy: {} %'.format(global_epoch, count, loss.data, accuracy))
                
                count += 1
                
        print('Completed global epoch: ', global_epoch)
        
        global_epoch = global_epoch + 1
        
        torch.save(model.state_dict(), local_model_file)
        print('Saved local model file.')

Waiting for global model...
Received Global Model.
Global epoch:1 Iteration: 0  Loss: 18.99418830871582  Accuracy: 39.752803802490234 %
Global epoch:1 Iteration: 100  Loss: 0.22858844697475433  Accuracy: 97.81546783447266 %
Global epoch:1 Iteration: 200  Loss: 0.1640002578496933  Accuracy: 98.82150268554688 %
Global epoch:1 Iteration: 300  Loss: 0.2109578251838684  Accuracy: 99.022705078125 %
Global epoch:1 Iteration: 400  Loss: 0.08795972168445587  Accuracy: 99.08019256591797 %
Global epoch:1 Iteration: 500  Loss: 0.00038287416100502014  Accuracy: 98.90773010253906 %
Global epoch:1 Iteration: 600  Loss: 0.014110403135418892  Accuracy: 98.8502426147461 %
Global epoch:1 Iteration: 700  Loss: 0.00045417368528433144  Accuracy: 99.13768005371094 %
Global epoch:1 Iteration: 800  Loss: 0.0020924850832670927  Accuracy: 99.25265502929688 %
Global epoch:1 Iteration: 900  Loss: 0.015780767425894737  Accuracy: 99.13768005371094 %
Completed global epoch:  1
Saved local model file.
Received Global 

In [None]:
# df_train = pd.read_csv('data/trainer1.csv')
# df_features = df_train.iloc[:, 1:785]
# df_label = df_train.iloc[:, 0]
# X_train, X_valid, y_train, y_valid = train_test_split(df_features, df_label, 
#                                                       test_size = 0.2,
#                                                       random_state = 1234)
# X_valid = np.array(X_valid).reshape(X_valid.shape[0], X_valid.shape[1])

In [None]:
# sample = 10
# img = X_valid[sample] #shape (784,1)
# img = img.reshape(1, 1, 28, 28) #shape (1,1,28,28)
# img  = torch.from_numpy(img).float() #tensor

# prediction = model(img).detach().numpy()[0].argmax()
# print(prediction)

In [None]:
# fig = plt.figure
# plt.imshow(X_valid[sample].reshape(28,28), cmap='gray')
# plt.show()