In [1]:
! pip install optuna --quiet

[K     |████████████████████████████████| 348 kB 8.8 MB/s 
[K     |████████████████████████████████| 81 kB 9.9 MB/s 
[K     |████████████████████████████████| 209 kB 75.6 MB/s 
[K     |████████████████████████████████| 78 kB 8.7 MB/s 
[K     |████████████████████████████████| 147 kB 61.2 MB/s 
[K     |████████████████████████████████| 50 kB 7.4 MB/s 
[K     |████████████████████████████████| 112 kB 69.9 MB/s 
[?25h  Building wheel for pyperclip (setup.py) ... [?25l[?25hdone


In [13]:
import torch
import optuna
import numpy as np
import matplotlib.pylab as plt
from IPython.display import clear_output
import torch.nn as nn 
import torch.optim as optim

import antidistil
import pipeline
import consts
from importlib import reload
import plot
import json
reload(plot)
reload(antidistil)
reload(pipeline)
reload(consts)
plot.prepare_for_plots()

Using cuda device


<Figure size 864x720 with 0 Axes>

In [14]:
train_dataloader, test_dataloader = pipeline.get_data()

In [15]:
def train_antidistill_loop(l1, l2, l3, l4):

    history = {'train_loss': [], 'train_acc': [], 'val_loss': [], 'val_acc': []}

    for i in range(consts.num_repeats):
        clear_output()
        print(f"Model {i+1}\n-------------------------------"
                          "\n-------------------------------")

        torch.manual_seed(i)

        teacher = pipeline.make_teacher_model()
        teacher.load_state_dict(torch.load(pipeline.get_path()+f'/teacher_5cl_{i}.pt'))
        model = antidistil.make_student_model()

        mask = torch.tensor([1, 1, 1, 1, 1, 0, 0, 0, 0, 0], dtype=torch.float).to(consts.device)
        
        loss_fn = antidistil.altidistill_loss
        optimizer = optim.Adam(model.parameters(), lr=consts.student_5_antidistil_learning_rate)
        scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.85)

        lambdas = [l1, l2, l3, l4]
        
        for epoch in range(10):
            print(f"Epoch {epoch+1}\n-------------------------------")

            pipeline.antidistil_loop(teacher, model, lambdas, mask, train_dataloader, 
                                    loss_fn, optimizer, scheduler, noise_dist='uniform', noise_eps=1e-1)

        optimizer = optim.Adam(model.parameters(), lr=consts.student_5_learning_rate)    

        mask = torch.ones(10).to(consts.device)

        loss_fn = nn.CrossEntropyLoss()

        for epoch in range(10):
            print(f"Epoch {epoch+1}\n-------------------------------")

            pipeline.train_loop(model, history, mask, train_dataloader, loss_fn, optimizer)
            pipeline.test_loop(model, history, mask, test_dataloader, loss_fn)

        pipeline.test_loop_fsgm(model, history, mask, test_dataloader, loss_fn, consts.fsgm_eps)
        pipeline.test_loop_noise(model, history, mask, test_dataloader, consts.noise_eps)

    return np.array(history['fsgm_noise_acc']).mean(axis=0)[-1]

In [None]:
def objective(trial):
    l1 = trial.suggest_float('l1', 0, 1)
    l2 = trial.suggest_float('l2', 0, 1)
    l3 = trial.suggest_float('l3', 0, 1)
    l4 = trial.suggest_float('l4', 0, 1)
    
    acc = train_antidistill_loop(l1, l2, l3, l4)

    return acc

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=10)

In [None]:
study.best_trials

In [None]:
best_params = optuna.importance.get_param_importances(study)
best_params

In [None]:
history = {'train_loss': [], 'train_acc': [], 'val_loss': [], 'val_acc': []}

for i in range(consts.num_repeats):
    clear_output()
    print(f"Model {i+1}\n-------------------------------"
                      "\n-------------------------------")

    torch.manual_seed(i)

    teacher = pipeline.make_teacher_model()
    teacher.load_state_dict(torch.load(pipeline.get_path()+f'/teacher_5cl_{i}.pt'))
    model = antidistil.make_student_model()

    mask = torch.tensor([1, 1, 1, 1, 1, 0, 0, 0, 0, 0], dtype=torch.float).to(consts.device)
    
    loss_fn = antidistil.altidistill_loss
    optimizer = optim.Adam(model.parameters(), lr=consts.student_5_antidistil_learning_rate)
    scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.85)

    lambdas = [value for _, value in best_params.items()]
    
    for epoch in range(consts.student_5_antidistil_epochs):
        print(f"Epoch {epoch+1}\n-------------------------------")

        pipeline.antidistil_loop(teacher, model, lambdas, mask, train_dataloader, 
                                loss_fn, optimizer, scheduler, noise_dist='uniform', noise_eps=1e-1)

    optimizer = optim.Adam(model.parameters(), lr=consts.student_5_learning_rate)    

    mask = torch.ones(10).to(consts.device)

    loss_fn = nn.CrossEntropyLoss()

    for epoch in range(consts.student_5_training_epochs):
        print(f"Epoch {epoch+1}\n-------------------------------")

        pipeline.train_loop(model, history, mask, train_dataloader, loss_fn, optimizer)
        pipeline.test_loop(model, history, mask, test_dataloader, loss_fn)

    pipeline.test_loop_fsgm(model, history, mask, test_dataloader, loss_fn, consts.fsgm_eps)
    pipeline.test_loop_noise(model, history, mask, test_dataloader, consts.noise_eps)

In [26]:
reload(plot)
plot.prepare_for_plots()
plot.plot_variance([history], np.arange(1, consts.student_5_training_epochs + 1),
                   ['From scratch'], 'val_acc', 
                    'Epoch', 'Accuracy')

your plot


In [27]:
reload(plot)
plot.prepare_for_plots()
plot.plot_variance([history], consts.fsgm_eps,
                   ['From scratch'], 'fsgm_noise_acc', 
                    'FSGM eps', 'Accuracy')

your plot


In [28]:
reload(plot)
plot.prepare_for_plots()
plot.plot_variance([history], consts.fsgm_eps,
                   ['From scratch'], 'param_noise_acc', 
                    'Param noise eps', 'Accuracy')

your plot


In [None]:
with open(pipeline.get_path()+'/history_best_antidistill_with_L4.json', 'w') as out:
    out.write(json.dumps(history))