In [4]:
from dataset import get_mnist_dataset
train_set, train_classes = get_mnist_dataset(train=True, shuffle=True, normalize=False)
val_set, val_classes = get_mnist_dataset(train=False, total_data=1000, shuffle=True, normalize=False)


from collections import OrderedDict, namedtuple
from itertools import product
import os
from tqdm import tqdm_notebook as tqdm
from tqdm import trange
from IPython.display import clear_output
import time
import torch.nn as nn
import pandas as pd
import torch
import matplotlib.pyplot as plt
from utils.run_manager import RunBuilder
from models import g_step, MnistAEDGCCA, MnistAutoencoderNoBN
from torchvision import datasets, transforms, utils
from torch.utils.data import DataLoader
import torchvision
from dataset import get_mnist_dataset 
import numpy as np

class RunBuilder():
    @staticmethod
    def get_runs(params):

        Run = namedtuple('Run', params.keys())

        runs = []
        for v in product(*params.values()):
            runs.append(Run(*v))

        return runs

if torch.cuda.is_available():
    devices = ['cpu']
else:
    devices = ['cpu']
print('starting')


params = OrderedDict(
    lr = [0.001],
    batch_size = [1000],
    device = devices,
    shuffle = [True],
    num_workers = [5],
    manual_seed = [1265],
    loss_func = [nn.MSELoss],
    quant = [True],
    latent_dim = [10], 
    num_inner_epochs = [8]
)

# layer_sizes_list = 3*[[128, 64, 2]]
# input_size_list = 3*[2]


run_count = 0
models = []


run_data = []

data_load_time = 0
forward_time = 0


for run in RunBuilder.get_runs(params):
#     torch.cuda.set_device(run.device)
    
    run_count += 1
    device = torch.device(run.device)
    
    dgcca = MnistAEDGCCA(output_size=run.latent_dim, network=)
    dgcca = dgcca.to('cpu')
        
    train_views = list(train_set.to('cpu'))
    val_views = list(val_set.to('cpu'))
    
    optimizer = torch.optim.Adam(dgcca.parameters(), lr=run.lr)
    num_batches = len(train_views[0])//run.batch_size
    
    criterion = run.loss_func()

    out = torch.stack(dgcca(train_views))
    G = g_step(out.clone().detach())  
    
    M_serv = out.detach().clone()
    
    I = len(train_views)
    
    for epoch in range(60):
        total_recons_loss = 0
        total_val_loss = 0
        batch_count = 0
        total_dgcca_loss = 0
        
        dgcca.to('cuda')
        
        for _ in tqdm(range(run.num_inner_epochs)):
            for i in tqdm(range(num_batches)):

                optimizer.zero_grad()
                batch = []

                # mini batch gradient
                batch = [view[(i*run.batch_size):((i+1)*run.batch_size), :].to('cuda') for view in train_views]            
                target = G[(i*run.batch_size):((i+1)*run.batch_size), :].to('cuda')

                latent = dgcca(batch)

                ae_loss = (run.latent_dim/(2*28*28*target.shape[0]))*torch.norm(torch.stack(dgcca.decode(latent)) - torch.stack(batch))

                dgcca_loss = 1/2*torch.norm(torch.stack(latent)-target)/target.shape[0] 

                loss = dgcca_loss + ae_loss

                loss.backward()

                optimizer.step()

                total_recons_loss += loss.item()
                total_dgcca_loss += dgcca_loss.item()
#                 print(ae_loss.item(), dgcca_loss.item())
                del batch, target, latent

        dgcca.to('cpu')
        out = torch.stack(dgcca(train_views)).detach().clone()        
        if run.quant:
            for i in range(I):
                diff = out[i] - M_serv[i]
                max_val = diff.abs().max()
                quant = ((1/max_val)*diff[i]).round()*(max_val/1)
                var = M_serv[i] + quant
                M_serv[i] = var
                del max_val, diff, quant, var
            G = g_step(M_serv.clone().detach())
        else:
            G = g_step(out.clone().detach())   
            
        # validation loss
        out_val = dgcca(val_views)
        out_val = torch.stack(out_val)
        
        G_val = g_step(out_val.clone().detach())
        
        loss_val = criterion(out_val, G_val)
        total_val_loss += loss_val.item()
        del out, G_val, out_val

        
        results = OrderedDict()
        results['epoch'] = epoch
        results['train_loss'] = total_recons_loss/(num_batches*run.num_inner_epochs)
        results['dgcca_loss'] = total_dgcca_loss/(num_batches*run.num_inner_epochs)
        results['val_loss'] = total_val_loss
        results['batch_size'] = run.batch_size
        results['lr'] = run.lr
        if results['train_loss'] < 0.00275:
            break
        run_data.append(results)
        df3 = pd.DataFrame.from_dict(run_data, orient='columns')
        clear_output(wait=True)
#         show_latent()
        display(df3)
            
#             m.track_loss(G_adv_loss=losses['beta_kl-divergence'], G_mse_loss=losses[''], D_real_loss=total_D_real, D_fake_loss=total_D_fake, D_real_count=real_count, D_fake_count=fake_count)
#         print(epoch, "total_Gloss:",total_Gloss, "total_Dloss:",total_Dloss, "mse:",total_mse_loss, "adv: ", total_adv_loss)           
#         m.end_epoch()
        torch.save(dgcca, 'trained_models/dgcca_mnist_ae2_fed.model')

Unnamed: 0,epoch,train_loss,dgcca_loss,val_loss,batch_size,lr
0,0,0.003766,0.000414,0.000882,1000,0.001
1,1,0.00349,0.000308,0.000871,1000,0.001
2,2,0.003234,0.000321,0.00089,1000,0.001
3,3,0.003139,0.000323,0.000891,1000,0.001
4,4,0.003084,0.000323,0.000893,1000,0.001
5,5,0.003049,0.000322,0.000895,1000,0.001
6,6,0.003023,0.000322,0.000895,1000,0.001
7,7,0.003004,0.000322,0.000897,1000,0.001
8,8,0.002989,0.000321,0.000898,1000,0.001
9,9,0.002977,0.000321,0.000899,1000,0.001


In [1]:
from dataset import get_mnist_dataset
train_set, train_classes = get_mnist_dataset(train=True, shuffle=True, normalize=False)
val_set, val_classes = get_mnist_dataset(train=False, total_data=1000, shuffle=True, normalize=False)


from collections import OrderedDict, namedtuple
from itertools import product
import os
from tqdm import tqdm_notebook as tqdm
from tqdm import trange
from IPython.display import clear_output
import time
import torch.nn as nn
import pandas as pd
import torch
import matplotlib.pyplot as plt
from utils.run_manager import RunBuilder
from models import g_step, MnistAEDGCCA, MnistAELinear
from torchvision import datasets, transforms, utils
from torch.utils.data import DataLoader
import torchvision
from dataset import get_mnist_dataset 
import numpy as np

class RunBuilder():
    @staticmethod
    def get_runs(params):

        Run = namedtuple('Run', params.keys())

        runs = []
        for v in product(*params.values()):
            runs.append(Run(*v))

        return runs

if torch.cuda.is_available():
    devices = ['cpu']
else:
    devices = ['cpu']
print('starting')


params = OrderedDict(
    lr = [0.001],
    batch_size = [1000],
    device = devices,
    shuffle = [True],
    num_workers = [5],
    manual_seed = [1265],
    loss_func = [nn.MSELoss],
    quant = [False],
    latent_dim = [10], 
    num_inner_epochs = [1]
)

# layer_sizes_list = 3*[[128, 64, 2]]
# input_size_list = 3*[2]


run_count = 0
models = []


run_data = []

data_load_time = 0
forward_time = 0


for run in RunBuilder.get_runs(params):
#     torch.cuda.set_device(run.device)
    
    run_count += 1
    device = torch.device(run.device)
    
    dgcca = MnistAEDGCCA(output_size=run.latent_dim, network=MnistAELinear)
    dgcca = dgcca.to('cpu')
        
    train_views = list(train_set.to('cpu'))
    val_views = list(val_set.to('cpu'))
    
    optimizer = torch.optim.Adam(dgcca.parameters(), lr=run.lr)
    num_batches = len(train_views[0])//run.batch_size
    
    criterion = run.loss_func()

    out = torch.stack(dgcca(train_views))
    G = g_step(out.clone().detach())  
    
    M_serv = out.detach().clone()
    
    I = len(train_views)
    
    for epoch in range(60):
        total_recons_loss = 0
        total_val_loss = 0
        batch_count = 0
        total_dgcca_loss = 0
        
        dgcca.to('cuda')
        
        for _ in tqdm(range(run.num_inner_epochs)):
            for i in tqdm(range(num_batches)):

                optimizer.zero_grad()
                batch = []

                # mini batch gradient
                batch = [view[(i*run.batch_size):((i+1)*run.batch_size), :].to('cuda') for view in train_views]            
                target = G[(i*run.batch_size):((i+1)*run.batch_size), :].to('cuda')

                latent = dgcca(batch)

                ae_loss = (run.latent_dim/(2*28*28*target.shape[0]))*torch.norm(torch.stack(dgcca.decode(latent)) - torch.stack(batch))

                dgcca_loss = 1/2*torch.norm(torch.stack(latent)-target)/target.shape[0] 

                loss = dgcca_loss + ae_loss

                loss.backward()

                optimizer.step()

                total_recons_loss += loss.item()
                total_dgcca_loss += dgcca_loss.item()
#                 print(ae_loss.item(), dgcca_loss.item())
                del batch, target, latent

        dgcca.to('cpu')
        out = torch.stack(dgcca(train_views)).detach().clone()        
        if run.quant:
            for i in range(I):
                diff = out[i] - M_serv[i]
                max_val = diff.abs().max()
                quant = ((1/max_val)*diff[i]).round()*(max_val/1)
                var = M_serv[i] + quant
                M_serv[i] = var
                del max_val, diff, quant, var
            G = g_step(M_serv.clone().detach())
        else:
            G = g_step(out.clone().detach())   
            
        # validation loss
        out_val = dgcca(val_views)
        out_val = torch.stack(out_val)
        
        G_val = g_step(out_val.clone().detach())
        
        loss_val = criterion(out_val, G_val)
        total_val_loss += loss_val.item()
        del out, G_val, out_val

        
        results = OrderedDict()
        results['epoch'] = epoch
        results['train_loss'] = total_recons_loss/(num_batches*run.num_inner_epochs)
        results['dgcca_loss'] = total_dgcca_loss/(num_batches*run.num_inner_epochs)
        results['val_loss'] = total_val_loss
        results['batch_size'] = run.batch_size
        results['lr'] = run.lr
        if results['train_loss'] < 0.00290:
            break
        run_data.append(results)
        df4 = pd.DataFrame.from_dict(run_data, orient='columns')
        clear_output(wait=True)
#         show_latent()
        display(df4)
            
#             m.track_loss(G_adv_loss=losses['beta_kl-divergence'], G_mse_loss=losses[''], D_real_loss=total_D_real, D_fake_loss=total_D_fake, D_real_count=real_count, D_fake_count=fake_count)
#         print(epoch, "total_Gloss:",total_Gloss, "total_Dloss:",total_Dloss, "mse:",total_mse_loss, "adv: ", total_adv_loss)           
#         m.end_epoch()
        torch.save(dgcca, 'trained_models/dgcca_mnist_linear_innerit=1.model')

  "Default grid_sample and affine_grid behavior has changed "
  "Default grid_sample and affine_grid behavior has changed "
  "Default grid_sample and affine_grid behavior has changed "
  "Default grid_sample and affine_grid behavior has changed "
  "Default grid_sample and affine_grid behavior has changed "
  "Default grid_sample and affine_grid behavior has changed "
  "Default grid_sample and affine_grid behavior has changed "


starting


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


  0%|          | 0/1 [00:00<?, ?it/s]

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


  0%|          | 0/60 [00:00<?, ?it/s]

RuntimeError: The size of tensor a (784) must match the size of tensor b (28) at non-singleton dimension 4

In [8]:
train_set[0]

tensor([[[[0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          ...,
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.]]],


        [[[0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          ...,
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.]]],


        [[[0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          ...,
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.]]],


        ...,


        [[[0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          ...,
          [0., 0., 0.,  ..., 0.

In [1]:
from dataset_mnist import get_mnist_dataset
train_set, train_classes = get_mnist_dataset(train=True, shuffle=True, normalize=False)
val_set, val_classes = get_mnist_dataset(train=False, total_data=1000, shuffle=True, normalize=False)

train_set = train_set.view(train_set.shape[0], train_set.shape[1], -1)
val_set = val_set.view(val_set.shape[0], val_set.shape[1], -1)


  "Default grid_sample and affine_grid behavior has changed "
  "Default grid_sample and affine_grid behavior has changed "
  "Default grid_sample and affine_grid behavior has changed "
  "Default grid_sample and affine_grid behavior has changed "
  "Default grid_sample and affine_grid behavior has changed "
  "Default grid_sample and affine_grid behavior has changed "
  "Default grid_sample and affine_grid behavior has changed "


In [5]:
# from dataset_mnist import get_mnist_dataset
# train_set, train_classes = get_mnist_dataset(train=True, shuffle=True, normalize=False)
# val_set, val_classes = get_mnist_dataset(train=False, total_data=1000, shuffle=True, normalize=False)

# train_set = train_set.view(train_set.shape[0], train_set.shape[1], -1)
# val_set = val_set.view(val_set.shape[0], val_set.shape[1], -1)

from collections import OrderedDict, namedtuple
from itertools import product
import os
from tqdm import tqdm, trange
from IPython.display import clear_output
import time
import torch.nn as nn
import pandas as pd
import torch
import matplotlib.pyplot as plt
from dgcca.models import g_step, MnistAEDGCCA, MnistAELinear, MnistAELinearBN
from torchvision import datasets, transforms, utils
from torch.utils.data import DataLoader
from dgcca.utils.compressor import qsgd
import torchvision
import numpy as np
import pprint as pp
import argparse

from torch.optim.lr_scheduler import MultiStepLR

parser = argparse.ArgumentParser(description="MNIST DGCCA")

# Data
parser.add_argument('--model_dest', default='../trained_models/dgcca_mnist_cutemaxvar2.model', help="Destination model path")
parser.add_argument('--random_seed', default=5555, help='')
parser.add_argument('--batch_size', default=1000, help='')
parser.add_argument('--lr', default=0.001, help='')
parser.add_argument('--device', default='cuda', help='')
parser.add_argument('--shuffle', default=True, help='')
parser.add_argument('--num_epochs', default=50, help='')
parser.add_argument('--latent_dim', default=10, help='')
parser.add_argument('--reg_weight', default=0.1, help='')

parser.add_argument('--compress', default=True, help='')
parser.add_argument('--compression_scheme', default='qsgd', help='')
parser.add_argument('--compress_downlink', default=False, help='')
parser.add_argument('--inner_epochs', default=1, help='')
parser.add_argument('--nbits', default=4, help='')

args = vars(parser.parse_args([]))
pp.pprint(args)

# Set the random seed
torch.manual_seed(int(args['random_seed']))

# some special parameters
num_workers = 5
loss_func = nn.MSELoss

run_data = []
device = torch.device(args['device'])

# eval and train models and copy the train params to eval
dgcca = MnistAEDGCCA(output_size=args['latent_dim'], network=MnistAELinearBN)
dgcca = dgcca.to('cpu')

# Get train and validation dataset
train_views = list(train_set.to('cpu'))
val_views = list(val_set.to('cpu'))

optimizer = torch.optim.Adam(dgcca.parameters(), lr=args['lr'])
scheduler = MultiStepLR(optimizer, [30,70], gamma=0.8)
num_batches = len(train_views[0])//args['batch_size']

criterion = loss_func()

out = torch.stack(dgcca(train_views))
G_serv = g_step(out.clone().detach())  

M_serv = out.detach().clone()
G_client = G_serv.clone()

I = len(train_views)

for epoch in range(70):
    total_recons_loss = 0
    total_val_loss = 0
    batch_count = 0
    total_ae_loss = 0
    total_dgcca_loss = 0
    
    dgcca.train()
    dgcca.to(device)
    
    for _ in trange(args['inner_epochs']):
        for i in trange(num_batches):

            optimizer.zero_grad()
            batch = []

            # mini batch gradient
            batch = [view[(i*args['batch_size']):((i+1)*args['batch_size']), :].to(device) for view in train_views]            
            target = G_client[(i*args['batch_size']):((i+1)*args['batch_size']), :].to(device)

            latent = dgcca(batch)
            
            ae_loss = (args['latent_dim']/(2*28*28*target.shape[0]))*torch.norm(torch.stack(dgcca.decode(latent)) - torch.stack(batch))
            
            dgcca_loss = 1/2*torch.norm(torch.stack(latent)-target)/target.shape[0] 

            loss = dgcca_loss + args['reg_weight']*ae_loss

            loss.backward()

            optimizer.step()

            total_recons_loss += loss.item()
            total_ae_loss += ae_loss.item()
            total_dgcca_loss += dgcca_loss.item()
            del batch, target, latent
    
    scheduler.step()
    
    dgcca.eval()
    dgcca.to('cpu')
    out = torch.stack(dgcca(train_views)).detach().clone()        
    if args['compress']:
        for i in range(I):
            diff = out[i] - M_serv[i]
            if args['compression_scheme'] == 'qsgd':
                quant = qsgd(diff, n_bits=args['nbits'])
            elif args['compression_scheme'] == 'deterministic':
                quant = deterministic_quantize(diff, n_bits=args['nbits'])
            M_serv[i] += quant
            M_serv[i] -= M_serv[i].mean(dim=0)
            del diff, quant

        G_serv = g_step(M_serv.clone().detach())
        if args['compress_downlink']:
            if args['compression_scheme'] == 'qsgd':
                G_client = G_client + qsgd(G_serv-G_client, n_bits=args['nbits'])
            elif args['compression_scheme'] == 'deterministic':
                G_client = G_client + deterministic_quantize(G_serv-G_client, n_bits=args['nbits'])
        else:
            G_client = G_serv.clone()
    else:
        out = out - out.mean(dim=1).unsqueeze(dim=1)
        M_serv = out.clone()
        G_serv = g_step(M_serv.clone().detach())  
        G_client = G_serv.clone() 
    del out
        
    # validation loss
    out_val = dgcca(val_views)
    out_val = torch.stack(out_val)    
    G_val = g_step(out_val.clone().detach())
    
    loss_val = 1/2*torch.norm(out_val-G_val)/G_val.shape[0]
    total_val_loss += loss_val.item()
    del G_val, out_val

    
    results = OrderedDict()
    results['epoch'] = epoch
    results['total_loss'] = total_recons_loss/(num_batches*args['inner_epochs'])
    results['ae_loss'] = total_ae_loss/(num_batches*args['inner_epochs'])
    results['dgcca_loss'] = total_dgcca_loss/(num_batches*args['inner_epochs'])
    results['val_fidelity'] = total_val_loss
#     results['diff_norm'] = diff_norm
    results['lr'] = args['lr']
    
    run_data.append(results)
    df3 = pd.DataFrame.from_dict(run_data, orient='columns')
    clear_output(wait=True)
    display(df3)
#     df3.to_pickle('plt/train_curve_mnist_dist.pkl')
    torch.save(dgcca, args['model_dest'])
    

Unnamed: 0,epoch,total_loss,ae_loss,dgcca_loss,val_fidelity,lr
0,0,0.007258,0.003437,0.006915,0.002658,0.001
1,1,0.001905,0.003330,0.001572,0.002734,0.001
2,2,0.001771,0.003277,0.001443,0.002660,0.001
3,3,0.001690,0.003208,0.001369,0.002665,0.001
4,4,0.001656,0.003123,0.001344,0.002565,0.001
...,...,...,...,...,...,...
65,65,0.001841,0.002825,0.001559,0.003005,0.001
66,66,0.001968,0.002820,0.001686,0.002865,0.001
67,67,0.001949,0.002801,0.001669,0.002986,0.001
68,68,0.002149,0.002793,0.001869,0.002827,0.001


In [5]:
import torch
import matplotlib.pyplot as plt
from models import g_step, MnistDGCCA
from dataset import get_mnist_dataset 
import numpy as np

test_set, test_classes = get_mnist_dataset(train=False, normalize=False)
test_views = list(test_set)

model_path = '../trained_models/dgcca_mnist_cutemaxvar_compressed.model'

dgcca = torch.load(model_path)

out = torch.stack(dgcca(test_views))
G = g_step(out.clone().detach())  

import numpy as np
from sklearn.manifold import TSNE

X = G[:10000].numpy()
X_embedded = TSNE(n_components=2).fit_transform(X)

fig, axes = plt.subplots(1,1)

axes.scatter(X_embedded[:3000,0], X_embedded[:3000,1], c=test_classes[:3000].numpy())
axes.set_title('t-SNE plot of latent representation G')

AttributeError: 'list' object has no attribute 'size'

In [7]:
batch[0].shape

torch.Size([1000, 1, 28, 28])