In [1]:
import random
import numpy as np
import os
import sys
import torch
from torchvision.transforms import transforms
from torchvision import datasets
from collections import Counter
from torch import nn
import torch.nn.functional as F
from torch.optim import lr_scheduler
from torch.utils.data import DataLoader, Subset, ConcatDataset, random_split
import numpy as np
from copy import deepcopy
import numpy as np
import torch
import torch.nn.functional as F
import pickle
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import re
import os
import itertools
from operator import itemgetter

In [2]:
# Check if running on Colab
try:
  import google.colab
  IN_COLAB=True
except:
  IN_COLAB=False

if IN_COLAB:
  from google.colab import drive
  # Connect to Google drive where the training data is located
  drive.mount("/content/gdrive")
  work_dir = "/content/gdrive/My Drive/Colab Notebooks/DL-Project-2024-Experiments/SUBMISSION"
  os.chdir(work_dir)
  print(f"Connected to Google drive, setting working directory to '{work_dir}'")

Mounted at /content/gdrive
Connected to Google drive, setting working directory to '/content/gdrive/My Drive/Colab Notebooks/DL-Project-2024-Experiments/SUBMISSION'


In [3]:
# Create results folder if it does not exist yet
results_folder_name = 'task_1_task_2_buffer_comparison_plots'
if not os.path.exists(results_folder_name):
    os.makedirs(results_folder_name)

In [4]:
def get_file_key(n_epochs, experiment, buffer_pct, goldilocks_rate, mpe_rate, goldilocks_remove_lowest_pct, goldilocks_remove_highest_pct, mpe_highest):
  return f'e2e_ep_{n_epochs}_ex_{experiment}_b_{buffer_pct}_gl_{goldilocks_rate}_mpe_{mpe_rate}_gll_{goldilocks_remove_lowest_pct}_glh_{goldilocks_remove_highest_pct}_mpeh_{mpe_highest}'

def reject(filter_map, value_map, key_list):
  for key in key_list:
    if (len(filter_map[key])>0 and value_map[key] not in filter_map[key]):
      return True
  return False

def parse_file_key(filename):
  pattern=r'task_accuracies_data_e2e_ep_(?P<n_epochs>\d+)_ex_(?P<experiment>\d+)_b_(?P<buffer_pct>[+-]?\d*\.\d+)_gl_(?P<goldilocks_rate>[+-]?\d*\.\d+)_mpe_(?P<mpe_rate>[+-]?\d*\.\d+)_gll_(?P<goldilocks_remove_lowest_pct>[+-]?\d*\.\d+)_glh_(?P<goldilocks_remove_highest_pct>[+-]?\d*\.\d+)_mpeh_(?P<mpe_highest>True|False).pkl'
  match = re.match(pattern, filename)
  if match:
    data = match.groupdict()
    #print(data)
    return data
  else:
    return None

def get_accuracies(file_name):
  file_path = os.path.join(folder_path, file_name)
  loaded_results = None
  with open(file_path, 'rb') as f:
    loaded_results = pickle.load(f)
  test_accuracies_1=loaded_results["test_accuracies_1"]
  test_accuracies_2=loaded_results["test_accuracies_2"]
  return test_accuracies_1, test_accuracies_2

def get_gl_filename(epochs, experiment, buffer, gll, glh):
  return f"task_accuracies_data_e2e_ep_{epochs}_ex_{experiment}_b_{buffer}_gl_1.0_mpe_0.0_gll_{gll}_glh_{glh}_mpeh_False.pkl"

def get_mpe_filename(epochs, experiment, buffer, mpeh):
  return f"task_accuracies_data_e2e_ep_{epochs}_ex_{experiment}_b_{buffer}_gl_0.0_mpe_1.0_gll_0.0_glh_0.0_mpeh_{mpeh}.pkl"

def get_mix_filename(epochs, experiment, buffer, gll, glh, mpeh):
  return f"task_accuracies_data_e2e_ep_{epochs}_ex_{experiment}_b_{buffer}_gl_0.5_mpe_0.5_gll_{gll}_glh_{glh}_mpeh_{mpeh}.pkl"

def get_buffer_triple(epochs, experiment, buffer, gll, glh, mpeh):
  gl_file=get_gl_filename(epochs, experiment, buffer, gll, glh)
  mpe_file=get_mpe_filename(epochs, experiment, buffer, mpeh)
  mix_file=get_mix_filename(epochs, experiment, buffer, gll, glh, mpeh)

  gl_buffer_acc_1, gl_buffer_acc_2=get_accuracies(gl_file)
  mpe_buffer_acc_1, mpe_buffer_acc_2=get_accuracies(mpe_file)
  mix_buffer_acc_1, mix_buffer_acc_2=get_accuracies(mix_file)

  return gl_buffer_acc_1, gl_buffer_acc_2, mpe_buffer_acc_1, mpe_buffer_acc_2, mix_buffer_acc_1, mix_buffer_acc_2

def plot_buffer_triple(n_epochs, experiment, buffer, gll, glh, mpeh):
  gl_buffer_acc_1, gl_buffer_acc_2, mpe_buffer_acc_1, mpe_buffer_acc_2, mix_buffer_acc_1, mix_buffer_acc_2=get_buffer_triple(n_epochs, experiment, buffer, gll, glh, mpeh)

  epochs=np.arange(0,2*n_epochs)
  switch_epoch=n_epochs
  plt.figure(figsize=(8,6))

  plt.plot(epochs, gl_buffer_acc_1, label=f"Task 1 GL")
  plt.plot(epochs, gl_buffer_acc_2, label=f"Task 2 GL")
  plt.plot(epochs, mpe_buffer_acc_1, label=f"Task 1 MPE")
  plt.plot(epochs, mpe_buffer_acc_2, label=f"Task 2 MPE")
  plt.plot(epochs, mix_buffer_acc_1, label=f"Task 1 MIX")
  plt.plot(epochs, mix_buffer_acc_2, label=f"Task 2 MIX")

  plt.axvline(x=switch_epoch, color='black', linestyle='--', label='switch tasks')
  plt.ylim(0.0,1.0)
  plt.xlabel("Epoch")
  plt.ylabel("Task 1/2 Accuracy")
  plt.title(f"")
  plt.legend()
  wildcard="X"
  plt.savefig(f"{results_folder_name}/buffer_comparison_plot_{get_file_key(n_epochs, experiment, buffer, wildcard, wildcard, gll, glh, mpeh)}.png")
  plt.close()

def plot_buffer_triples(experiments):
  for experiment in experiments:
    for epochs in [50]:
      for buffer in [0.04, 0.2, 0.4]:
        for gll, glh in [(0.15, 0.45), (0.30, 0.30), (0.45, 0.15)]:
          for mpeh in [True, False]:
            plot_buffer_triple(epochs, experiment, buffer, gll, glh, mpeh)


experiments=set()

folder_paths=["raw_result_data"]

for folder_path in folder_paths:
  for filename in os.listdir(folder_path):
      file_path = os.path.join(folder_path, filename)
      if os.path.isfile(file_path) and file_path.endswith('.pkl'):
          meta=parse_file_key(filename)
          experiments.add(meta["experiment"])

print(f"{experiments} experiments found")

plot_buffer_triples(experiments)


{'1736234827', '1736372945', '1736405581', '1736373405', '1736373208', '1736259682', '1736259645', '1736405620', '1736234835', '1736234924'} experiments found
