# Longer runs

_15 August 2021_ <br />
_Chuan-Zheng Lee <czlee@stanford.edu>_ <br />
_Rough working_

Since the neural network plots don't seem to indicate that the digital scheme eventually catches up to the analog scheme, I ran the experiments for 1200 rounds to see if it shows anything interesting.

In [None]:
from pathlib import Path

import matplotlib.pyplot as plt

from plots_utils import plot_averaged_training_charts, plot_comparison, make_axes, show_timestamp_info

In [None]:
def get_comparison_specs(dataset, noise='__all__'):

    common_fixed_specs = {
        'rounds': 1200,
        'batch_size': 64,
        'epochs': 1,
        'ema_coefficient': 1 / 3,
        'data_per_client': None,
        'save_squared_error': False,
        'send': 'deltas',
    }

    fixed_specs = common_fixed_specs.copy()
    fixed_specs.update({
        'power_update_period': 1,
        'power_quantile': 0.9,
        'power_factor': 0.9,
    })
    title_specs = {
        'power': 1.0,
        'clients': 20,
        'dataset': dataset,
        'lr_client': 0.001 if dataset == 'cifar10' else 0.01,
        'momentum_client': 0.9 if dataset == 'cifar10' else 0.0,
    }
    series_specs = {
        'noise': noise,
    }

    all_analog_specs = title_specs, fixed_specs, series_specs

    fixed_specs = common_fixed_specs.copy()
    fixed_specs.update({
        'qrange_update_period': 1,
        'qrange_param_quantile': 0.9,
        'qrange_client_quantile': 0.9,
        'channel_uses': None,
        'rounding_method': 'stochastic',
        'parameter_schedule': 'staggered',
        'zero_bits_strategy': 'read-zero',
    })

    all_digital_specs = title_specs, fixed_specs, series_specs
    
    return all_analog_specs, all_digital_specs

def plot_analog_vs_digital(paths, dataset, noise='__all__', **kwargs):
    all_specs = get_comparison_specs(dataset, noise)
    plot_comparison('accuracy', *paths, *all_specs, **kwargs)
    plt.grid()

In [None]:
analog_path = [
    Path("results/20210813-popeye1-cifarmnist-long-dynpower/"),
    Path("results/20210813-popeye2-cifarmnist-long-dynpower/"),
]
digital_path = [
    Path("results/20210813-popeye1-cifarmnist-long-dynquant/"),
    Path("results/20210813-popeye2-cifarmnist-long-dynquant/"),
]
paths = analog_path, digital_path

In [None]:
show_timestamp_info(analog_path)

In [None]:
show_timestamp_info(digital_path)

In [None]:
plot_analog_vs_digital(paths, 'fashion-mnist')

In [None]:
plot_analog_vs_digital(paths, 'fashion-mnist', plot_quartiles=True)

In [None]:
plot_analog_vs_digital(paths, 'cifar10')

In [None]:
plot_analog_vs_digital(paths, 'cifar10', plot_range=True)
plt.ylim((0.40, 0.47))

# Simple (non-federated) learning

This is mostly a check to see what the convergence accuracy for this model is.

In [None]:
results_dir = Path("results/20210813-fmnist-simple-long/")
show_timestamp_info(results_dir)

In [None]:
fixed_specs = {
    'batch_size': 64,
    'epochs': 8000,
}
title_specs = {
    'dataset': 'fashion-mnist',
    'learning_rate': 0.01,
    'momentum': 0.0,
}
series_specs = {}

plot_averaged_training_charts(results_dir, ['accuracy', 'test_loss', 'train_loss'], title_specs, fixed_specs, series_specs)

This does kind of seem like it overfitted.

In [None]:
results_dir = Path("results/20210813-cifar10-simple-long/")
# show_timestamp_info(results_dir)

In [None]:
fixed_specs = {
    'batch_size': 64,
    'epochs': 8000,
}
title_specs = {
    'dataset': 'cifar10',
    'learning_rate': 0.001,
    'momentum': 0.9,
}
series_specs = {}

plot_averaged_training_charts(results_dir, ['accuracy', 'test_loss', 'train_loss'], title_specs, fixed_specs, series_specs)