In [None]:
import numpy as np
import cupy as cp
import pathlib
import csv
import sys

In [None]:
sys.path.append(str(pathlib.Path('../').resolve()))

In [None]:
from lib.experiments import train_test_experiment, build_CNN, build_augmentation_pipeline
from lib.utils.enums import NonLinearity, Initialization, Dataset, OptimizerName
from lib.utils.data import load_integer_dataset
from lib.utils.misc import to_cpu

## Experiments config

In [None]:
architecture = [
    ('Conv2d', 128, 3, 1, 1),
    ('Conv2d', 256, 3, 1, 1),
    ('MaxPool2d', 2, 2),
    ('Conv2d', 256, 3, 1, 1),
    ('Conv2d', 512, 3, 1, 1),
    ('MaxPool2d', 2, 2),
    ('Conv2d', 512, 3, 1, 1),
    ('MaxPool2d', 2, 2),
    ('Conv2d', 512, 3, 1, 1),
    ('MaxPool2d', 2, 2),
]
num_fc_layers = 2
num_fc_hidden = [1024]

In [None]:
dataset_config = dict(
    dataset=Dataset.CIFAR10.name,
    subsample_ratio=0.5,
    ohe_values=(0, 32),
    data_augmentation=False,
    normalize=True,
)

In [None]:
network_config = dict(
    architecture=architecture,
    num_fc_layers=num_fc_layers,
    num_fc_hidden=num_fc_hidden,
    non_linearity=NonLinearity.NITRO_LEAKY_RELU.name,
    init=Initialization.UNIFORM_KAIMING_LEAKY_RELU.name,
    pred_decoder_dim=4096,
    fe_dropout_rate=0.0,
    fc_dropout_rate=0.0,
    seed=42,
    dtype='int32',
    device='cuda',
    local_loss='pred',
    pooling_type='max',
    bias=False,
    debug=False,
)

In [None]:
training_config = dict(
    batch_size=64,
    num_epochs=30,
    lr_inv=512,
    lr_amp_factor=640,
    fwd_decay_inv=None,
    subnet_decay_inv=None,
    optimizer=OptimizerName.INTEGER_SGD.name,
)

In [None]:
config = dataset_config | network_config | training_config

In [None]:
def run_experiment(exp_config):
    print(f"{'-' * 64}")
    print(f"Running experiment with fwd_decay_inv = {exp_config['fwd_decay_inv']} and lrn_decay_inv={exp_config['subnet_decay_inv']}")
    # Setup reproducibility
    np.random.seed(exp_config["seed"])
    cp.random.seed(exp_config["seed"])
    
    # Load the dataset
    data = load_integer_dataset(exp_config, ohe_values=exp_config["ohe_values"], val_dim=0, show_images=False, show_log=False, data_path='../.data')
    X_tr, _, X_test, y_tr, _, y_test = data

    # Setup data augmentation
    augmentation_pipeline = build_augmentation_pipeline(exp_config)

    # Build and train the model
    m = build_CNN(exp_config, X_tr=X_tr, y_tr=y_tr)    
    best_train_acc, best_test_acc = train_test_experiment(
        m, exp_config, X_tr=X_tr, X_te=X_test, y_tr=y_tr, y_te=y_test,
        augmentation_pipeline=augmentation_pipeline,
        show_local_accuracies=False, show_progress_bar=False
    )

    # Compute the mean weight for each layer
    weights_norm = []
    for layer in m.get_layers_with_parameters():
        mean_value = np.mean(np.abs(layer.weights))
        weights_norm.append(to_cpu(mean_value))

    # Append the results to a CSV file
    with open('./weight_decay.csv', mode='a', newline='') as f:
        writer = csv.writer(f)
        writer.writerow([exp_config["fwd_decay_inv"], exp_config["subnet_decay_inv"], best_train_acc, best_test_acc, *weights_norm])

    import requests
    tg_chat_id = '827408693'
    tg_api_token = '6331557290:AAGQaWZP0oANCgL1_0hMiL0XQk_mT7n-tFk'
    text = "Weight decay effect run complete"
    requests.post(f'https://api.telegram.org/bot{tg_api_token}/sendMessage', params={"chat_id": tg_chat_id, "text": text});

In [None]:
fwd_decay_values = [0, 20_000, 15_000, 10_000, 5000]
subnet_decay_values = [0, 3000, 1000]

In [None]:
for fwd_decay_inv in fwd_decay_values:
    for subnet_decay_inv in subnet_decay_values:
        config["fwd_decay_inv"] = fwd_decay_inv 
        config["subnet_decay_inv"] = subnet_decay_inv
        run_experiment(config)

In [None]:
import requests
tg_chat_id = '827408693'
tg_api_token = '6331557290:AAGQaWZP0oANCgL1_0hMiL0XQk_mT7n-tFk'
text = "Weight decay effect experiment has finished!"
requests.post(f'https://api.telegram.org/bot{tg_api_token}/sendMessage', params={"chat_id": tg_chat_id, "text": text});