In [87]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np

In [88]:
# basic configuration
weight_averaging = False # use weight averaging? If off, all parties only train with local data
smc = False # use SMC for weight averaging?
n_epochs = 30 # number of total training rounds
weight_sharing_every_x_epoch = 5 # weight sharing after x training rounds. 1 = FedSGD, >1 = FedAvg
num_parties = 1 # number of ditributed parties. If set to 1, one party holds all data 
model = "alex" #"small" for small model, "alex" for AlexNet

#more advanced settings
equal_model = True # Do not train last shared model again
only_share_in_last_round = False # Only average weights one time at the end of local training

pa = range(num_parties)



In [89]:
from torchvision import datasets
import torchvision.transforms as transforms
from torch.utils.data.sampler import SubsetRandomSampler


# number of subprocesses to use for data loading
num_workers = 0
# how many samples per batch to load
batch_size = 20
# percentage of training set to use as validation
valid_size = 0.2

# convert data to a normalized torch.FloatTensor
if model == "small":
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ])
elif model == "alex":
    transform = transforms.Compose(
    [transforms.Resize((224, 224)),
     transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
        

# choose the training and test datasets
train_data = datasets.CIFAR10('data', train=True,
                              download=True, transform=transform)
test_data = datasets.CIFAR10('data', train=False,
                             download=True, transform=transform)



# obtain training indices that will be used for validation
num_train = int(len(train_data)/num_parties) #assume that each party only holds (100/n)% of the training data
#indices = [list(range(0*num_train, (0+1)*num_train)), list(range(1*num_train, (1+1)*num_train)), list(range(2*num_train, (2+1)*num_train))]

indices = []
split = int(np.floor(valid_size * num_train))
train_idx, valid_idx = [],[]
train_sampler, valid_sampler = [],[]
train_loader, valid_loader = [],[]

# Each party gets seperate training and validation data
for i in pa:
    indices.append(list(range(i*num_train, (i+1)*num_train)))
    np.random.shuffle(indices[i])
    train_idx.append(indices[i][split:])
    valid_idx.append(indices[i][:split])
    
    # define samplers for obtaining training and validation batches
    train_sampler.append(SubsetRandomSampler(train_idx[i])) 
    valid_sampler.append(SubsetRandomSampler(valid_idx[i])) 
    
    # prepare data loaders (combine dataset and sampler)
    train_loader.append(torch.utils.data.DataLoader(train_data, batch_size=batch_size,
        sampler=train_sampler[i], num_workers=num_workers))
    valid_loader.append(torch.utils.data.DataLoader(train_data, batch_size=batch_size, 
        sampler=valid_sampler[i], num_workers=num_workers))
                                                                       






# The 10 classes in the dataset
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

# For consistency of results let all parties use the same test dataset
test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, 
        num_workers=num_workers) 

Files already downloaded and verified
Files already downloaded and verified


## Build Neural Network

In [90]:
if model == "small":
    import torch.nn as nn
    import torch.nn.functional as F

    # check if CUDA is available
    train_on_gpu = torch.cuda.is_available()

    if not train_on_gpu:
        print('CUDA is not available.  Training on CPU ...')
    else:
        print('CUDA is available!  Training on GPU ...')


    # define the CNN architecture
    class Net(nn.Module):
        def __init__(self):
            super(Net, self).__init__()
            # convolutional layer (sees 32x32x3 image tensor)
            self.conv1 = nn.Conv2d(3, 16, 3, padding=1)
            # convolutional layer (sees 16x16x16 tensor)
            self.conv2 = nn.Conv2d(16, 32, 3, padding=1)
            # convolutional layer (sees 8x8x32 tensor)
            self.conv3 = nn.Conv2d(32, 64, 3, padding=1)
            # max pooling layer
            self.pool = nn.MaxPool2d(2, 2)
            # linear layer (64 * 4 * 4 -> 500)
            self.fc1 = nn.Linear(64 * 4 * 4, 500)
            # linear layer (500 -> 10)
            self.fc2 = nn.Linear(500, 10)
            # dropout layer (p=0.25)
            self.dropout = nn.Dropout(0.25)

        def forward(self, x):
            # add sequence of convolutional and max pooling layers
            x = self.pool(F.relu(self.conv1(x)))
            x = self.pool(F.relu(self.conv2(x)))
            x = self.pool(F.relu(self.conv3(x)))
            # flatten image input
            x = x.view(-1, 64 * 4 * 4)
            # add dropout layer
            x = self.dropout(x)
            # add 1st hidden layer, with relu activation function
            x = F.relu(self.fc1(x))
            # add dropout layer
            x = self.dropout(x)
            # add 2nd hidden layer, with relu activation function
            x = self.fc2(x)
            return x

    # create n complete CNNs
    models=[]
    models.append(Net())

    #ensure all models start with the same weights
    for i in pa[:-1]:
        models.append(Net())
        models[i+1].load_state_dict(models[i].state_dict())
        #print(model)

    for i in pa:    
    # move tensors to GPU if CUDA is available
        if train_on_gpu:
            models[i].cuda()

In [91]:
if model == "alex":
    import torch.nn as nn
    import torch.nn.functional as F

    # check if CUDA is available
    train_on_gpu = torch.cuda.is_available()

    if not train_on_gpu:
        print('CUDA is not available.  Training on CPU ...')
    else:
        print('CUDA is available!  Training on GPU ...')


    # define the CNN architecture
    class AlexNet(nn.Module):
        def __init__(self, num_classes=1000):
            super(AlexNet, self).__init__()
            self.features = nn.Sequential(
                nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(kernel_size=3, stride=2),
                nn.Conv2d(64, 192, kernel_size=5, padding=2),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(kernel_size=3, stride=2),
                nn.Conv2d(192, 384, kernel_size=3, padding=1),
                nn.ReLU(inplace=True),
                nn.Conv2d(384, 256, kernel_size=3, padding=1),
                nn.ReLU(inplace=True),
                nn.Conv2d(256, 256, kernel_size=3, padding=1),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(kernel_size=3, stride=2),
            )
            self.classifier = nn.Sequential(
                nn.Dropout(),
                nn.Linear(256 * 6 * 6, 4096),
                nn.ReLU(inplace=True),
                nn.Dropout(),
                nn.Linear(4096, 4096),
                nn.ReLU(inplace=True),
                nn.Linear(4096, num_classes),
            )

        def forward(self, x):
            x = self.features(x)
            x = x.view(x.size(0), 256 * 6 * 6)
            x = self.classifier(x)
            return x

    # create n complete CNNs
    models=[]
    models.append(AlexNet())

    #ensure all models start with the same weights
    for i in pa[:-1]:
        models.append(AlexNet())
        models[i+1].load_state_dict(models[i].state_dict())
        #print(model)

    for i in pa:    
    # move tensors to GPU if CUDA is available
        if train_on_gpu:
            models[i].cuda()

CUDA is available!  Training on GPU ...


Specify a loss function and an optimizer, and instantiate the model.

If you use a less common loss function, please note why you chose that loss function in a comment.

In [92]:
if model == "small":
    # specify optimizer, specify loss function (categorical cross-entropy)
    criterion, optimizer = [],[]
    for i in pa:
        optimizer.append(optim.SGD(models[i].parameters(), lr=0.01))
        criterion.append(nn.CrossEntropyLoss())

In [93]:
if model == "alex":
    # optimizer AlexNet
    criterion, optimizer = [],[]
    for i in pa:
        optimizer.append(optim.SGD(models[i].parameters(), lr=0.001, momentum=0.9))
        criterion.append(nn.CrossEntropyLoss())

In [94]:
#calculate number of parameters

def pytorch_count_params(model):
  "count number trainable parameters in a pytorch model"
  total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
  return total_params

In [95]:
pytorch_count_params(models[0])

61100840

In [96]:
# calculate averaged weight of a model without SMC
def share_weights(models):
    state_dicts = []
    
    state_dicts = [models[i].state_dict() for i in range(len(models))]
    for key in state_dicts[0]:
        #print(0,state_dicts[0][key])
        for i in range(1,len(models)):
        #for i in range(len(models)):
            #print(i,state_dicts[i][key])
            state_dicts[0][key] += state_dicts[i][key]
            
        state_dicts[0][key] /= len(models)
        #print(0,state_dicts[0][key])
    for i in range(len(models)):
        models[i].load_state_dict(state_dicts[0]) 
    

In [97]:
##Shamir's Secret Sharing implementation###
import random
from math import ceil
from decimal import Decimal
 
FIELD_SIZE = 10**15
 
 
def reconstruct_secret(shares):
    """
    Combines individual shares (points on graph)
    using Lagranges interpolation.
 
    `shares` is a list of points (x, y) belonging to a
    polynomial with a constant of our key.
    """
    sums = 0
    #sums = 0.0
    prod_arr = []
 
    for j, share_j in enumerate(shares):
        xj, yj = share_j
        prod = Decimal(1)
        #prod = float(1.0)
        prod = Decimal(1)
        for i, share_i in enumerate(shares):
            xi, _ = share_i
            if i != j:
                prod *= Decimal(Decimal(xi)/(xi-xj))
                #prod *= float(float(xi)/float(xi-xj))
                
 
        prod *= Decimal(yj)
        #prod = float(prod) * yj
        sums += Decimal(prod)
        #sums+=float(prod)
        #sums+= prod
        #print(type(sums),sums)
    #return int(round(Decimal(sums), 0))
    return sums
    #return sums
 
def polynom(x, coefficients):
    """
    This generates a single point on the graph of given polynomial
    in `x`. The polynomial is given by the list of `coefficients`.
    """
    point = 0
    # Loop through reversed list, so that indices from enumerate match the
    # actual coefficient indices
    for coefficient_index, coefficient_value in enumerate(coefficients[::-1]):
        point += x ** coefficient_index * coefficient_value
    return point
 
 
def coeff(t, secret):
    """
    Randomly generate a list of coefficients for a polynomial with
    degree of `t` - 1, whose constant is `secret`.
 
    For example with a 3rd degree coefficient like this:
        3x^3 + 4x^2 + 18x + 554
 
        554 is the secret, and the polynomial degree + 1 is
        how many points are needed to recover this secret.
        (in this case it's 4 points).
    """
    coeff = [random.randrange(0, FIELD_SIZE) for _ in range(t - 1)]
    coeff.append(secret)
    return coeff
 
 
def generate_shares(n, m, secret):
    """
    Split given `secret` into `n` shares with minimum threshold
    of `m` shares to recover this `secret`, using SSS algorithm.
    """
    coefficients = coeff(m, secret)
    shares = []
 
    for i in range(1, n+1):
        x = random.randrange(1, FIELD_SIZE)
        shares.append((x, polynom(x, coefficients)))
    #print(shares)
    return shares
 
 
# Driver code
def shamir(t,n,secret):
 
    # (3,5) sharing scheme
    
    #print(f'Original Secret: {secret}')
 
    # Phase I: Generation of shares
    shares = generate_shares(n, t, secret)
    #print(f'Shares: {", ".join(str(share) for share in shares)}')
 
    # Phase II: Secret Reconstruction
    # Picking t shares randomly for
    # reconstruction
    pool = random.sample(shares, t)
    #print(f'Combining shares: {", ".join(str(share) for share in pool)}')
    #print(f'Reconstructed secret: {reconstruct_secret(pool)}')
    


In [98]:
def generate_shares(n,m,secret,interpolation_points):
    """
    Split given `secret` into `n` shares with minimum threshold
    of `m` shares to recover this `secret`. Use intperolation_points from array.
    """
    coefficients = coeff(m, secret)
    shares = []
 
    for i in range(len(interpolation_points)):
        #x = random.randrange(1, FIELD_SIZE)
        # use fixed interpolation points:
        x = interpolation_points[i]    
        shares.append((x, polynom(x, coefficients)))
    #print(shares)
    return shares
    

In [99]:
   

def share_weights_with_SMC_old(models,num_parties,threshold, precision):
    state_dicts = []
    
    state_dicts = [models[i].state_dict() for i in range(len(models))]
    for key in state_dicts[0]:
        shares = []
        sum_of_shares = []
        aggregated_shares = []
        for i in range(len(models)):
            
            #convert floating point secret to int with specified decimal precision            
            secret = state_dicts[i][key]*10**precision 
            
            #split up secret into n shares
            
            shares.append(generate_shares(num_parties, threshold, secret, ))
            
            #usually parties would now send secret over network sockets 
            # Party i sends secret[i][j] to Party j
            # -> each party ends up with n secret shares         
            
            
            
        # each party adds up its local shares     
        for i in range(len(models)):    
            sum_of_shares.append(0)
            aggregated_shares.append(0)
            for j in range(len(models)):
                sum_of_shares[i] += shares[i][j][1]
            aggregated_shares[i] = (shares[i][0][0], sum_of_shares[i])    
            #usually parties would now share their sums over network sockets 
            # Party i sends sum[i] to all Parties
            # -> each party can reconstruct the secret 
            
        #all parties reconstruct the secret    
        for i in range(len(models)):
            reconstructed_aggregation = reconstruct_secret(aggregated_shares)
            state_dicts[0][key] = reconstructed_aggregation
            state_dicts[0][key] /= (len(models)*10**precision)        
            
        
            #print(0,state_dicts[0][key])
    for i in range(len(models)):
        models[i].load_state_dict(state_dicts[0]) 
    

In [100]:
def sum_of_secrets_with_SMC(secrets, num_parties):
    interpolation_points = [i for i in range(1,num_parties+1)]
    nump = range(len(secrets))
    shares = []    
    for i in nump:
        # generate secret
        shares.append(generate_shares(len(secrets), len(secrets), secrets[i], interpolation_points))
        
        # send all secrets s[][j] to party j
        # ...
        
        #party sums up all secrets
    sums_of_shares = []
    aggreagated_shares = []       
    
    for i in nump:
        sums_of_shares.append(Decimal(0))
        for j in nump:
            sums_of_shares[i] += Decimal(shares[j][i][1])            
        
        # x is the same for all secrets send to party i,y        
        aggreagated_shares.append( (shares[0][i][0] ,Decimal(sums_of_shares[i])))
        
    # reconstruct secrets   
    #print(aggreagated_shares)

    reconstructed_secret = reconstruct_secret(aggreagated_shares)
    
    return reconstructed_secret
    

        
        

In [101]:

def share_weights_with_SMC_t(models,num_parties,threshold, precision):
    state_dicts = []    
    
    state_dicts = [models[i].state_dict() for i in range(len(models))]
    
    
    # save all weights and biases as as secrets 
    for key in state_dicts[0]:
        #iterate over all elements in all tensors
        secrets = []
        #elements = []
        for p in pa: # usually performed at each party individually
            secrets.append([])
            for elements in state_dicts[p][key]:                
                for x in np.nditer(elements):
                    secrets[p].append(Decimal(x.item()))
                    
        # engage in secret sharing for each value
        for elements in state_dicts[0][key]: 
            print("before", elements)
            with np.nditer(elements, op_flags=['writeonly']) as it:                
                i = 0
                for x in it: # do one shamir's secret sharing per secret
                    ith_secret_of_all_players = []
                    for p in pa: # usually performed at each party individually
                        ith_secret_of_all_players.append(Decimal(secrets[p][i]))                   
                    #print(ith_secret_of_all_players)
                    sum_of_secret = sum_of_secrets_with_SMC(ith_secret_of_all_players, num_parties)
                    sum_of_secret /=  num_parties
                    x = sum_of_secret
                    x = 5.05
                    #print("after", elements)
                    
                    i+=1   
            print("after", elements)
          

    # load averaged weights and biases
    for i in range(len(models)):
        models[i].load_state_dict(state_dicts[0]) 
    

In [102]:

def share_weights_with_SMC(models,num_parties,threshold, precision):
    state_dicts = []    
    
    state_dicts = [models[i].state_dict() for i in range(len(models))]
    
    
    # save all weights and biases as as secrets 
    for key in state_dicts[0]:
        #iterate over all elements in all tensors
        secrets = []
        #elements = []
        for p in pa: # usually performed at each party individually
            secrets.append([])
                            
            for x in np.nditer(state_dicts[p][key]):
                    secrets[p].append(Decimal(x.item()))
                    
        # engage in secret sharing for each value
        
        tensor = state_dicts[0][key]    
        with np.nditer(tensor, op_flags=['readwrite']) as it:  
            #print("before", tensor)
            i = 0
            for x in it: # do one shamir's secret sharing per secret
                ith_secret_of_all_players = []
                for p in pa: # usually performed at each party individually
                    ith_secret_of_all_players.append(Decimal(secrets[p][i]))                   
                    #print(ith_secret_of_all_players)
                sum_of_secret = sum_of_secrets_with_SMC(ith_secret_of_all_players, num_parties)
                sum_of_secret /=  num_parties
                x[...] = float(sum_of_secret)
                #x[...] = 5.05
                    #print("after", elements)
                    
                i+=1   
        #print(i,"tafter", tensor)
        state_dicts[0][key] = tensor
        #print(i,"after", state_dicts[0][key])
          

    # load averaged weights and biases
    for i in range(len(models)):
        models[i].load_state_dict(state_dicts[0]) 

In [103]:
print(sum_of_secrets_with_SMC([Decimal(2.2265156165165),Decimal(3.1),Decimal(4.8)],3))
#reconstructed = sum_of_secrets_with_SMC([Decimal(0.00000651561611591919),Decimal(0),Decimal(0)],3)

#state_dicts = []
#state_dicts.append(models[0].state_dict())
#for key in state_dicts[0]:
#    l = state_dicts[0][key].tolist()
#    print(l)
    #l = Decimal[l]
    #print(sum_of_secrets_with_SMC(l,3))
    
    #print(l)
    


10.12651561652


In [104]:
models[0].cuda()
#print(list(models[0].parameters()))
#models[0].cpu()


#np.array(list(state_dicts[0].items()))

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
    (2): ReLU(inplace=True)
    (3): Dropout(p=0.5, 

## Running Neural Network

In [105]:
valid_loss_min = [np.Inf for i in pa]  # track change in validation loss

for epoch in range(1, n_epochs+1):
    for i in pa:
        print('Party: {}'.format(i))      
        # keep track of training and validation loss
        train_loss = 0.0
        valid_loss = 0.0

        ###################
        # train the model #
        ###################
        models[i].train()
        for data, target in train_loader[i]:
            # move tensors to GPU if CUDA is available
            if train_on_gpu:
                data, target = data.cuda(), target.cuda()
            # clear the gradients of all optimized variables
            optimizer[i].zero_grad()
            # forward pass: compute predicted outputs by passing inputs to the model
            output = models[i](data)
            # calculate the batch loss
            loss = criterion[i](output, target)
            # backward pass: compute gradient of the loss with respect to model parameters
            loss.backward()
            # perform a single optimization step (parameter update)
            optimizer[i].step()
            # update training loss
            train_loss += loss.item()*data.size(0)

        ######################    
        # validate the model #
        ######################
        models[i].eval()
        for data, target in valid_loader[i]:
            # move tensors to GPU if CUDA is available
            if train_on_gpu:
                data, target = data.cuda(), target.cuda()
            # forward pass: compute predicted outputs by passing inputs to the model
            output = models[i](data)
            # calculate the batch loss
            loss = criterion[i](output, target)
            # update average validation loss 
            valid_loss += loss.item()*data.size(0)

        # calculate average losses
        train_loss = train_loss/len(train_loader[i].sampler)
        valid_loss = valid_loss/len(valid_loader[i].sampler)

        # print training/validation statistics 
        print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(
            epoch, train_loss, valid_loss))

        # save model if validation loss has decreased
        if valid_loss <= valid_loss_min[i]:
            print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(
            valid_loss_min[i],
            valid_loss))
            torch.save(models[i].state_dict(), 'model_cifar{}.pt'.format(i))
            valid_loss_min[i] = valid_loss

       
    #If selected, let participants take average of their models' weights after x epochs
    if weight_averaging:
        if epoch % weight_sharing_every_x_epoch == 0:
        
            if (not only_share_in_last_round) or epoch == n_epochs: 
                print('Secure Aggregation and Weight Averaging ...')
                if smc:
                    #share_weights(models) 
                    for i in pa:
                        models[i].cpu()
                    share_weights_with_SMC(models,num_parties,num_parties, 10)
                    for i in pa:
                        models[i].cuda()
                    #[torch.save(models[i].state_dict(), 'model_cifar{}.pt'.format(i)) for i in pa]
                else:
                    share_weights(models) 
        
#Finally, take averaged model for all parties if prefered:
if weight_averaging & equal_model:
    for i in pa:
        torch.save(models[i].state_dict(), 'model_cifar{}.pt'.format(i))
    



Party: 0
Epoch: 1 	Training Loss: 2.561666 	Validation Loss: 1.720176
Validation loss decreased (inf --> 1.720176).  Saving model ...
Party: 0


KeyboardInterrupt: 

Plot the training loss (and validation loss/accuracy, if recorded).

In [106]:
for i in pa:
    models[i].load_state_dict(torch.load('model_cifar{}.pt'.format(i)))

## Testing model

In [107]:
# track test loss
test_loss = []
class_correct = []
class_total = []
for i in pa: 
    test_loss.append(0.0)
    class_correct.append(list(0. for j in range(10)))
    class_total.append(list(0. for j in range(10)))
    models[i].eval()
    
# iterate over test data    
for data, target in test_loader:  
    for i in pa:            
    # move tensors to GPU if CUDA is available
        if train_on_gpu:            
            data, target = data.cuda(), target.cuda()            
        # forward pass: compute predicted outputs by passing inputs to the model        
        output = models[i](data)
        # calculate the batch loss
        loss = criterion[i](output, target)
        # update test loss        
        test_loss[i] += loss.item()*data.size(0)
        # convert output probabilities to predicted class
        _, pred = torch.max(output, 1)    
        # compare predictions to true label
        correct_tensor = pred.eq(target.data.view_as(pred))
        correct = np.squeeze(correct_tensor.numpy()) if not train_on_gpu else np.squeeze(correct_tensor.cpu().numpy())
        # calculate test accuracy for each object class
        for j in range(batch_size):
            label = target.data[j]
            class_correct[i][label] += correct[j].item()
            class_total[i][label] += 1

for i in pa:
    print('Party: {}'.format(i))
    # average test loss
    test_loss[i] = test_loss[i]/len(test_loader.dataset)
    print('Test Loss: {:.6f}\n'.format(test_loss[i]))

    for j in range(10):
        if class_total[i][j] > 0:
            print('Test Accuracy of %5s: %2d%% (%2d/%2d)' % (
                classes[j], 100 * class_correct[i][j] / class_total[i][j],
                np.sum(class_correct[i][j]), np.sum(class_total[i][j])))
        else:
            print('Test Accuracy of %5s: N/A (no training examples)' % (classes[j]))

    print('\nTest Accuracy (Overall): %2d%% (%2d/%2d)' % (
        100. * np.sum(class_correct[i]) / np.sum(class_total[i]),
        np.sum(class_correct[i]), np.sum(class_total[i])))

Party: 0
Test Loss: 0.718176

Test Accuracy of plane: 80% (803/1000)
Test Accuracy of   car: 85% (852/1000)
Test Accuracy of  bird: 62% (626/1000)
Test Accuracy of   cat: 53% (533/1000)
Test Accuracy of  deer: 66% (669/1000)
Test Accuracy of   dog: 66% (660/1000)
Test Accuracy of  frog: 85% (854/1000)
Test Accuracy of horse: 83% (832/1000)
Test Accuracy of  ship: 85% (851/1000)
Test Accuracy of truck: 85% (854/1000)

Test Accuracy (Overall): 75% (7534/10000)


In [108]:
for i in pa:
    torch.save(models[i].state_dict(), 'checkpoint{}.pth'.format(i))