In [None]:
import os
import time
import numpy as np
import pandas as pd
from numpy import random as rd
from matplotlib import pyplot as plt
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from Experiments import Experiment2

cuda = True if torch.cuda.is_available() else False

Tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor

path = './plots/MNIST'
if not os.path.exists(path):
    os.mkdir(path)
    
# Fetching dataset.

train_dataset = datasets.MNIST('./data/', train=True, transform=transforms.ToTensor(), download=True)
test_dataset = datasets.MNIST('./data/', train=False, transform=transforms.ToTensor(), download=True)

train_loader = DataLoader(train_dataset, batch_size=len(train_dataset))
X = next(iter(train_loader))[0].numpy()
Y = next(iter(train_loader))[1].numpy()

test_loader = DataLoader(test_dataset, batch_size=len(test_dataset))
testX = next(iter(test_loader))[0].numpy()
testY = next(iter(test_loader))[1].numpy()

range_ = [10,20,50,100,200,500,1000,2000,5000,10000]
precision_ = 100 # Number of times a new training is drawn and a new model is trained.
precisionAttack = 100 # Number of times the Likelihood attack is repeated on each trained model.

# Performing the experiment.

gErrList = [] 
gErrStdList = []
LikeSucList = []
LikeSucStdList = []
AccList = []
AccStdList = []
for n in range_: # Loop over size of the training set.
    print('------------------------------------------------------------------------------ ')
    print('Size of the training set: ' + str(n))
    gErrList_ = []
    LikeSucList_ = []
    AccList_ = []
    for seed in range(precision_): # Repeat the experiment with different, randomly drawn training sets. 
        genErr, Suc, Acc = Experiment2(n,X,Y,testX,testY,seed**2,mode='MNIST',precision=precisionAttack)  
        gErrList_.append(genErr)   
        AccList_.append(Acc)
        LikeSucList_.append(Suc)
    gErrList.append(np.mean(gErrList_)) # The generalization gap is averaged over different models.
    gErrStdList.append(np.std(gErrList_))
        
    LikeSucList.append(np.mean(LikeSucList_)) # Success rate of the likelihood attack is averaged over different models.
    LikeSucStdList.append(np.std(LikeSucList_))
    
    AccList.append(np.mean(AccList_)) # Accuracy in the test set is averaged over different models.
    AccStdList.append(np.std(AccList_))
    
    print("Likelihood Attack Success rate: %f, Generalization Gap: %f, Accuracy Training set: %f" 
          % (np.mean(LikeSucList_),np.mean(gErrList_),np.mean(AccList_)))

BoundThm2List = [(gErrList[i]/8) + .5 for i in range(len(range_))]
BoundThm2StdList = [(gErrStdList[i]/64) for i in range(len(range_))]

# Saving and Plotting the results.

AccDF = pd.DataFrame(AccList, columns=['a'])
AccDF.insert(1,'b',range_)
AccDF.insert(2,'c',AccStdList)
AccDF.insert(3,'d',gErrList)
AccDF.to_csv(path+'/Acc.csv',index=False)

plt.figure(0)
plt.plot(range_,gErrList)
plt.xlabel('Number of samples in the Universe')
plt.ylabel('Generalization Gap')
plt.show()  

PSucLB = pd.DataFrame(BoundThm2List, columns=['a'])
PSucLB.insert(1,'b',range_)
PSucLB.insert(2,'c',BoundThm2StdList)
PSucLB.insert(3,'d',gErrList)
PSucLB.to_csv(path+'/PSucLB2.csv',index=False)

Psuc = pd.DataFrame(LikeSucList, columns=['a'])
Psuc.insert(1,'b',range_)
Psuc.insert(2,'c',LikeSucStdList)
Psuc.insert(3,'d',gErrList)
Psuc.to_csv(path+'/PsucLikihood.csv',index=False)
    
plt.figure(1)
plt.plot(range_,BoundThm2List)
plt.plot(range_,LikeSucList)
plt.legend(['Lower Bound','Success Rate of Likelihood Attacker'])
plt.xlabel('Number of samples in the Universe')
plt.ylabel('Prob of Success')
plt.show()    