In [None]:
import gc

gc.collect()


In [None]:
!git clone https://github.com/Xtra-Computing/NIID-Bench


In [None]:
cd NIID-Bench/


**Removing previous experiment files (if they exist!)**

In [None]:
import os
import glob
dir = '/kaggle/working/NIID-Bench/logs'
for ext in ['.json', '.log']:
    files = glob.glob(os.path.join(dir, f'*{ext}'))
    for file in files:
        os.remove(file)

In [None]:
!python experiments.py --model=simple-cnn --dataset=cifar10 --alg=fedprox --lr=0.01 --batch-size=64 --rho=0.5 --epochs=5 --n_parties=10 --mu=0.1 --comm_round=5 --partition=noniid-labeldir --beta=0.5 --device='cuda' --datadir='./data/' --logdir='./logs/' --noise=0 --sample=1 --init_seed=0
!python experiments.py --model=simple-cnn --dataset=cifar10 --alg=fedavg --lr=0.01 --batch-size=64 --rho=0.5 --epochs=5 --n_parties=10 --mu=0.1 --comm_round=5 --partition=noniid-labeldir --beta=0.5 --device='cuda' --datadir='./data/' --logdir='./logs/' --noise=0 --sample=1 --init_seed=0
!python experiments.py --model=simple-cnn --dataset=cifar10 --alg=scaffold --lr=0.01 --batch-size=64 --rho=0.5 --epochs=5 --n_parties=10 --mu=0.1 --comm_round=5 --partition=noniid-labeldir --beta=0.5 --device='cuda' --datadir='./data/' --logdir='./logs/' --noise=0 --sample=1 --init_seed=0
!python experiments.py --model=simple-cnn --dataset=cifar10 --alg=fednova --lr=0.01 --batch-size=64 --rho=0.5 --epochs=5 --n_parties=10 --mu=0.1 --comm_round=5 --partition=noniid-labeldir --beta=0.5 --device='cuda' --datadir='./data/' --logdir='./logs/' --noise=0 --sample=1 --init_seed=0     

In [None]:
import re


def string_to_dict(string):
    """
    Converts a string representing a Namespace object to a dictionary.

    Args:
        string: The string to be converted.

    Returns:
        A dictionary containing the key-value pairs from the Namespace.
    """
    result = {}
    pairs = string.split(',')
    for pair in pairs:
        key, value = pair.split('=')
        key = key.strip()
        value = value.strip().strip("'")
        result[key] = value

    # Clean keys
    cleaned_keys = [key[11:].strip('"') if key.startswith('"Namespace(') else key.strip('"') for key in result.keys()]

    # Create a new dictionary with cleaned keys
    cleaned_result = {cleaned_keys[i]: result[list(result.keys())[i]] for i in range(len(cleaned_keys))}

    return cleaned_result

def process_experiment(json_file):
    """
    Processes the arguments json file and respective experiment log 

    Args:
        json_file: path to json file

    Returns:
        Dictionary with experiment results, or None if there's some issue
    """
    with open(json_file, 'r') as f:
        content = f.read()
        args = string_to_dict(content)

    partition = args.get('partition', None)
    beta = args.get('beta', None)
    dataset = args.get('dataset', None)
    alg = args.get('alg', None)
    model = args.get('model', None)

    # Extract timestamp from json file
    timestamp = re.search(r"experiment_arguments-(\d{4}-\d{2}-\d{2}-\d{2}:\d{2}-\d{2}).json", json_file).group(1)

    log_file_pattern = f"logs/experiment_log-{timestamp}.log"
    log_files = glob.glob(log_file_pattern)
    

    if log_files:
        latest_log = max(log_files, key=lambda x: x.split('-')[2])
        with open(latest_log, 'r') as file:
            all_test_accuracies = []
            for line in file:
                if "Test accuracy" in line and not "Pre-Training" in line:
                    accuracy = float(line.split(":")[-1].strip())
                    all_test_accuracies.append(accuracy)
            """        
                if "Global Model Test accuracy" in line:
                    last_test_accuracy = line.strip()
                    last_test_accuracy = float(last_test_accuracy.rsplit(':', 1)[-1].strip())
                    break
            """        
    else:
        print(f"No log file found for {json_file}")
        return None

    if all([partition, beta, dataset, alg, model, all_test_accuracies]): #last_test_accuracy
        results = {
            'model': model,
            'dataset': dataset,
            'alg': alg,
            'beta': beta,
            'partition': partition,
            'last_test_accuracy': all_test_accuracies #last_test_accuracy
        }
        return results
    else:
        print(f"Missing parameters for {json_file}")
        return None

In [None]:
# Finding all json files
json_files = glob.glob('logs/experiment_arguments*')

# Process every json file
all_results = []
for json_file in json_files:
    result = process_experiment(json_file)
    if result:
        all_results.append(result)



**Checking the content of the experiment arguments (json file) and experiment logs (log file)**

In [None]:
!cat /logs/experiment_arguments-2024-12-11-14:41-55.json

In [None]:
!cat logs/experiment_log-2024-12-11-14:41-55.log

In [None]:
import matplotlib.pyplot as plt

def plot_results(results_dict):
  """
  Plot with accuracy comparison throghout the epochs, between the four algorithms

  Args:
    results_dict: dictionaries list, where each dictionary represents an experiment
  """

  for rslt in results_dict:
    plt.plot(rslt['last_test_accuracy'], label=f"{rslt['alg']}")

  plt.xlabel("Epoch")
  plt.ylabel("Test acc")
  plt.title(f"{rslt['partition']} {rslt['model']} on {rslt['dataset']}")
  plt.legend()
  plt.show()

plot_results(all_results)
