# Preparation

### Imports

In [163]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import json
import os
from matplotlib.ticker import FormatStrFormatter
import itertools as it
from copy import deepcopy

### Exp

In [164]:
def flatten_dict(d, parent_key='', sep='.'):
    items = []
    for k, v in d.items():
        new_key = parent_key + sep + k if parent_key else k
        if isinstance(v, dict):
            items.extend(flatten_dict(v, new_key, sep=sep).items())
        else:
            items.append((new_key, v))
    return dict(items)

def read_json(file_path):
    with open(file_path, 'r') as json_file:
        data = json.load(json_file)
    return data

In [None]:
def load_all_configurations(exp_name:str='3_giu', folder_name:str= '../out/exp/'):
    experiments = {}
    for single_config in os.listdir(os.path.join(os.getcwd(),folder_name,exp_name)):
        exp_id = single_config.replace('.json', '')
        data = read_json(os.path.join(os.getcwd(),folder_name,exp_name,single_config))
        experiments[exp_id] = flatten_dict(data)
    return pd.DataFrame.from_dict(dict(sorted(experiments.items())), "index") #sorted is not actually needed

In [165]:
def load_all_configurations_pruning(exp_name:str='3_giu', folder_name:str= '../out/exp/'):
    experiments = {}
    for single_config in os.listdir(os.path.join(os.getcwd(),folder_name,exp_name)):
        if single_config == 'WawGcW33HROmuUH1.json':
            exp_id = single_config.replace('.json', '')
            data = read_json(os.path.join(os.getcwd(),folder_name,exp_name,single_config))
            experiments[exp_id] = flatten_dict(data)
    return pd.DataFrame.from_dict(dict(sorted(experiments.items())), "index") #sorted is not actually needed

In [None]:
def load_metrics(exp_name:str='3_giu', folder_name:str= '../out/log/'):
    results = {}
    exp_log_folder = os.path.join(os.getcwd(),folder_name,exp_name)
    for exp_id in os.listdir(exp_log_folder):
        metrics_filepath = os.path.join(exp_log_folder,exp_id,'lightning_logs/version_0/metrics.csv')
        data = pd.read_csv(metrics_filepath)
        results[exp_id] = data
    return dict(sorted(results.items()))

In [166]:
######## load metrics for the pruning experiment

def load_metrics_pruning(exp_name:str='3_giu', folder_name:str= '../out/log/'):
    results = {}
    exp_log_folder = os.path.join(os.getcwd(),folder_name,exp_name)
    for exp_id in os.listdir(exp_log_folder):
        if exp_id == 'WawGcW33HROmuUH1':
            metrics_filepath = os.path.join(exp_log_folder,exp_id,'lightning_logs/version_3/metrics.csv')
            data = pd.read_csv(metrics_filepath)
            results[exp_id] = data
    return dict(sorted(results.items()))

In [167]:
def get_metrics_given_best_val(metrics_per_epoch, metrics_names, metrics_prefix = "test", metrics_dataloader_idx=1, val_prefix = "test", val_metric="NDCG@10", val_dataloader_idx=0, fix_rec_at_k=False):
    if fix_rec_at_k:
        val_metric =val_metric.replace('@','_@')
        metrics_names = [metric.replace('@','_@') for metric in metrics_names]
    val_key = f"{val_prefix}_{val_metric}"
   
    metric_keys = [f"{metrics_prefix}_{metric}" for metric in metrics_names]
    
    best_metrics = {}
    for exp_id, metrics in metrics_per_epoch.items():
        if exp_id == 'WawGcW33HROmuUH1':

            best_val_row = 0
            best_metrics[exp_id] = {metric_name: metrics[metric_key][best_val_row] for metric_name, metric_key in zip(metrics_names,metric_keys)}
    return pd.DataFrame.from_dict(dict(sorted(best_metrics.items())), "index") #sorted is not actually needed

In [168]:
def hotfix_table(table, col, value=0.5):
    for exp_id in table.index.get_level_values(0).unique():
        exp_loc = table.index.get_level_values(0) == exp_id
        vals = table.loc[exp_loc][col][1:-1].values
        mean_vals = (table.loc[exp_loc][col][:-2].values+table.loc[exp_loc][col][2:].values)/2
        over_threshold = np.abs(vals-mean_vals)>value
        sel_app = exp_loc
        app = np.concatenate((np.array([False]),over_threshold,np.array([False])))
        sel_app[sel_app] = app
        table.loc[sel_app,col] = mean_vals[over_threshold]
    return table

In [169]:
columns_for_diffs = ['duration', 'emissions', 'emissions_rate', 'cpu_power', 'gpu_power',
                    'ram_power', 'cpu_energy', 'gpu_energy', 'ram_energy',
                    'energy_consumed']
columns_that_are_same = ['project_name', 'run_id', 'country_name',
    'country_iso_code', 'region', 'cloud_provider', 'cloud_region', 'os',
    'python_version', 'codecarbon_version', 'cpu_count', 'cpu_model',
    'gpu_count', 'gpu_model', 'longitude', 'latitude', 'ram_total_size',
    'tracking_mode', 'on_cloud', 'pue']
columns_that_are_different = ['timestamp']

def load_emissions(exp_name:str='3_giu', folder_name:str= '../out/log/'):
    emissions = {}
    exp_log_folder = os.path.join(os.getcwd(),folder_name,exp_name)
    for exp_id in os.listdir(exp_log_folder):
        if exp_id == 'WawGcW33HROmuUH1':
            emissions_filepath = os.path.join(exp_log_folder,exp_id,'emissions.csv')
            if not os.path.exists(emissions_filepath): continue
            data = pd.read_csv(emissions_filepath)
    
            new_data = {}
            for column in columns_for_diffs:
                #TODO: generalize to more splits / rows of emissions
                new_data[f"train_{column}"] = data.iloc[0][column]
                new_data[f"test_{column}"] = data.iloc[1][column] - data.iloc[0][column]
            for column in columns_that_are_same:
                new_data[f"{column}"] = data.iloc[0][column]
            for column in columns_that_are_different:
                new_data[f"train_{column}"] = data.iloc[0][column]
                new_data[f"test_{column}"] = data.iloc[1][column]
            emissions[exp_id] = new_data

    return pd.DataFrame.from_dict(dict(sorted(emissions.items())), "index") #sorted is not actually needed

In [170]:
modifiers = {"":1, "K":1e3, "M":1e6, "G":1e9, "T":1e12, "P":1e15}
strings_to_search = {"num_params":"params per GPU:",
                     "flops": "fwd flops per GPU:",
                     "latency": "fwd latency:",
                     "FLOPS": "fwd FLOPS per GPU = fwd flops per GPU / fwd latency:"}

def extract_flops_info(complete_string, string_to_search):
    if string_to_search in complete_string:
        value = complete_string.split(string_to_search)[1].split("\n")[0].strip()
    else: value = "nan"
    measure = ""
    if " " in value: value, measure = value.split(" ")
    scale = measure[0] if len(measure)>0 else ""
    if scale not in modifiers:
        scale = ""
        measure = " " + measure
    unit = "" if len(measure)==1 else measure[1:]
    return float(value)*modifiers[scale], unit

def load_flops(exp_name:str='3_giu', folder_name:str= '../out/log/', split_keys = ["train", "test"]):
    flops = {}
    exp_log_folder = os.path.join(os.getcwd(),folder_name,exp_name)
    for exp_id in os.listdir(exp_log_folder):
        new_data = {}
        for split_key in split_keys:
            flops_filepath = os.path.join(exp_log_folder,exp_id,f'{split_key}_flops.txt')
            if not os.path.exists(flops_filepath): continue
            #load flops_filepath txt
            with open(flops_filepath, 'r') as file:
                data = file.read()
            
            for measure_name,string_to_search in strings_to_search.items():
                value, unit = extract_flops_info(data, string_to_search)
                measure_key = f"{split_key}_{measure_name}"
                #if unit != "": measure_key += f" ({unit})"
                new_data[measure_key] = value
            
        flops[exp_id] = new_data

    return pd.DataFrame.from_dict(dict(sorted(flops.items())), "index") #sorted is not actually needed

In [171]:
## Add emissions and flops info to metrics_per_epoch
def add_info_single_df(df, exp_data, subset_col, split_name, column_labels):
    if subset_col not in df.columns:
        split_subset = np.zeros(len(df), dtype=bool)
        split_subset[-1] = True
    else:
        split_subset = ~df[subset_col].isna()
    num_split = sum(split_subset)
    for label in column_labels:
        split_label = f"{split_name}_{label}"
        df.loc[split_subset,split_label] = np.linspace(exp_data[split_label]/num_split,exp_data[split_label],num_split)
    

def add_info_to_metrics_per_epoch(metrics_per_epoch, complete_data, column_labels, split_names = ["train", "test"], repeat_for = ["val_loss/dataloader_idx_0",None]):
    for exp_id,df in metrics_per_epoch.items():
        for split_name,repeat_col in zip(split_names,repeat_for):
            add_info_single_df(df, complete_data.loc[exp_id], f"{split_name}_loss", split_name, column_labels)
            if repeat_col is not None:
                add_info_single_df(df, complete_data.loc[exp_id], repeat_col, split_name, column_labels)
        metrics_per_epoch[exp_id] = df

### Table utils

In [172]:
def subset_data(complete_data, subset_selection={}, subset_keep=[], merge_columns={}):
    subset_data = complete_data
    for label, value in subset_selection.items():
        subset_data = subset_data[subset_data[label] == value]

    for condition1, condition2 in subset_keep:
        for label, value in condition1.items():
            subset_data2 = subset_data[subset_data[label] == value]
        indices = subset_data2.index
        for label, value in condition2.items():
            subset_data2 = subset_data2[subset_data2[label] == value]
        indices2 = subset_data2.index
        #remove for subset_data idx not in indices2
        indices_to_remove = [idx for idx in indices if idx not in indices2]
        subset_data = subset_data.drop(indices_to_remove)
        
    for col_name, cols in merge_columns.items():
        subset_data[col_name] = subset_data.loc[:,cols].apply(lambda row: '_'.join([str(x) for x in row if pd.notnull(x)]), axis=1)
        subset_data = subset_data.drop(columns=cols)
    
    return subset_data

def generate_table(subset_data,
                   label_convert_dict,
                   rows_labels, column_labels,
                   pivot_agg = lambda x: x,
                   table_name="results", save_table=False, round_at=4,
                   exp_name:str='3_giu', folder_name:str= '../out/results/',):

    subset_data = subset_data[rows_labels + column_labels]

    table = pd.pivot_table(subset_data, index=rows_labels, values=column_labels, dropna=False, aggfunc=pivot_agg)

    # Drop if all column_labels are nans
    drop_row = table[column_labels].isnull().all(axis=1)
    table = table[~drop_row]

    table.reset_index(inplace=True)

    table = table.round(round_at)

    table['svd_type'] = table['svd_type'].map(label_convert_dict)

    if save_table:
        results_path = os.path.join(folder_name,exp_name)
        if not os.path.exists(results_path):
            os.makedirs(results_path)
        table.to_csv(os.path.join(results_path,f'{table_name}.csv'), index=False)
    return table

### Plot utils

In [173]:
def to_color_blind(color):
    if color == 'red':
        return '#e41a1c'
    elif color == 'blue':
        return '#377eb8'
    elif color == 'orange':
        return '#ff7f00'
    elif color == 'green':
        return '#4daf4a'
    elif color == 'brown':
        return '#a65628'
    elif color == 'grey':
        return '#999999'
    elif color == 'yellow':
        return '#dede00'
    elif color == 'pink':
        return '#f781bf'
    elif color == 'purple':
        return '#984ea3'
    else:
        raise NotImplementedError("color not available")

def obtain_markers(number_of_markers):
    list_of_markers = ['o', 'v', 's', 'p', 'P', '*', 'X', '+', 'D', 'x']
    return list_of_markers[:number_of_markers]

def obtain_color(number_of_colors):
    all = ['#e41a1c', '#377eb8','#ff7f00','#4daf4a','#a65628',
        '#999999','#dede00','#f781bf','#984ea3']
    return all[:number_of_colors]

In [174]:
def create_plot(plot_instructions,
                **kwargs):
    final_fig, final_ax = plt.subplots(**plot_instructions["subplots"])
    
    # If final_ax is iterable
    if hasattr(final_ax, '__iter__'):
        for i,ax in enumerate(final_ax.flatten()):
            single_plot(ax, **{k:v[i] if isinstance(v, list) or isinstance(v, tuple) else v for k,v in kwargs.items()})
    else:
        single_plot(final_ax, **kwargs)

    for plt_func, plt_kwargs in plot_instructions.items():
        if plt_func != "subplots":
            run_plot_func(plt, plt_func, plt_kwargs)

def single_plot(ax,
                data,
                label_columns: list,
                x_column: str,
                y_column: str,
                plot_type = "plot",
                plot_kwargs = {},
                ax_instructions: dict = {},
                label_convert_dict: dict = {}):
    labels = []
    for col in label_columns:
        labels.append(data[col].unique())
    labels = list(it.product(*labels))
    plot_labels = [x[0] if len(x) == 1 else x for x in labels]
    plot_labels = [label_convert_dict.get(x, x) for x in plot_labels]
    labels = [x[1] for x in sorted(zip(plot_labels,labels))]
    plot_labels = sorted(plot_labels)

    colors = obtain_color(len(labels))
    markers = obtain_markers(len(labels))
        
    for i, values in enumerate(labels):
        subset_data = data
        for col, value in zip(label_columns, values):
            subset_data = subset_data[subset_data[col] == value]
        
        x_values = subset_data[x_column]
        y_values = subset_data[y_column]
        getattr(ax,plot_type)(x_values, y_values, label=plot_labels[i], color=colors[i], marker=markers[i], **plot_kwargs)
    
    for ax_func, ax_kwargs in ax_instructions.items():
        run_plot_func(ax, ax_func, ax_kwargs.copy())

def run_plot_func(obj, func, kwargs):
    other_func_instructions = None
    if "__kwargs__" in kwargs:
        func_kwargs = kwargs.pop("__kwargs__")
        kwargs, other_func_instructions = func_kwargs, kwargs
    
    app = getattr(obj, func)(**kwargs)
    if other_func_instructions is not None:
        for func, kwargs in other_func_instructions.items():
            run_plot_func(app, func, kwargs)
    
    # for index, (model, data) in enumerate(results.items()):
    #     ablation_values = list(data.keys())
    #     y_values = list(data.values())
    #     plt.plot(ablation_values, y_values, label=model, color=colors[index], marker=markers[index])
    # 
    # plt.xticks(ablation_values, ablation_values)
    # plt.xlabel(names_to_labels[ablation_type])
    # plt.ylabel(metric)
    # plt.title(names_to_labels[dataset_name])
    # plt.legend()
    # plt.show()

In [175]:
def merge_epoch_data(metrics_per_epoch, table, table_columns = [], epoch_column_not_na = "test_loss"):
    subset_metrics = {}
    for exp_id in table.index:
        subset_metrics[exp_id] = metrics_per_epoch[exp_id]
    
        subset_metrics[exp_id] = subset_metrics[exp_id].loc[~subset_metrics[exp_id][epoch_column_not_na].isna()]
        
        for col in table_columns:
            subset_metrics[exp_id].loc[:,col] = table.loc[exp_id,col]
    
    #concat all dataframes by row
    subset_metrics = pd.concat(subset_metrics)
    
    return subset_metrics

### Important vars

In [176]:
names_to_labels = {'ml-1m' : 'MovieLens 1M', 'ml-100k' : 'MovieLens 100k', 'amazon_beauty' : 'Amazon Beauty', 'foursquare-tky' : 'Foursquare TKY', 'foursquare-nyc' : 'Foursquare NYC',
                   'emb_size' : 'Embedding Size', 'lookback' : 'Input sequence length', 'carbon_emissions' : r'CO\textsubscript{2}-eq', 'model_size' : 'Model Size',
                   'ml-20m': 'MovieLens 20M'}

In [177]:
all_datasets = sorted(['ml-1m', 'ml-100k', 'amazon_beauty', 'foursquare-tky', 'foursquare-nyc'])
all_models= sorted(['SASRec','NARM', 'GRU4Rec', 'BERT4Rec', 'CORE'])
metrics_names = ["Precision", "Recall", "NDCG", "MAP"]
metrics_ks = [5, 10, 20]
all_metrics_names = [f"{metric}@{k}" for metric in metrics_names for k in metrics_ks]

# MAIN

## Load all experiments

### Exp configs

In [178]:
exp_name = "3_giu"

In [179]:
all_exps = load_all_configurations_pruning(exp_name)

In [180]:
all_exps

Unnamed: 0,data_params.collator_params.lookback,data_params.collator_params.lookforward,data_params.collator_params.num_items,data_params.collator_params.num_negatives.test,data_params.collator_params.num_negatives.train,data_params.collator_params.num_negatives.val,data_params.collator_params.out_seq_len.test,data_params.collator_params.out_seq_len.train,data_params.collator_params.out_seq_len.val,data_params.collator_params.padding_value,...,model.rec_model.emb_size,model.rec_model.lookback,model.rec_model.name,model.rec_model.num_blocks,model.rec_model.num_heads,model.rec_model.num_items,model.rec_model.num_users,model.trainer_params.callbacks,model.trainer_params.max_epochs,model.useSVD
WawGcW33HROmuUH1,200,1,3416,100,1,1,1,,1,0,...,128,200,SASRec,1,1,3416,6040,"[{'ModelCheckpoint': {'mode': 'min', 'monitor'...",600,True


### Load exp metrics

In [181]:
########## load metrics for pruning
metrics_per_epoch = load_metrics_pruning(exp_name)
metrics_per_epoch

{'WawGcW33HROmuUH1':    epoch  step  test_F1_@10  test_F1_@20  test_F1_@5  test_MAP_@10  \
 0      0     0      0.09666     0.067764    0.126043      0.076558   
 
    test_MAP_@20  test_MAP_@5  test_MRR_@10  test_MRR_@20  ...  test_NDCG_@10  \
 0      0.059033     0.093642      0.228742      0.240963  ...       0.297911   
 
    test_NDCG_@20  test_NDCG_@5  test_Precision_@10  test_Precision_@20  \
 0       0.342671      0.246232             0.05245            0.035099   
 
    test_Precision_@5  test_Recall_@10  test_Recall_@20  test_Recall_@5  \
 0           0.072914         0.524503         0.701987         0.36457   
 
    test_loss  
 0   0.597348  
 
 [1 rows x 21 columns]}

In [182]:
best_metrics = get_metrics_given_best_val(metrics_per_epoch, all_metrics_names, fix_rec_at_k=True)
best_metrics

Unnamed: 0,Precision_@5,Precision_@10,Precision_@20,Recall_@5,Recall_@10,Recall_@20,NDCG_@5,NDCG_@10,NDCG_@20,MAP_@5,MAP_@10,MAP_@20
WawGcW33HROmuUH1,0.072914,0.05245,0.035099,0.36457,0.524503,0.701987,0.246232,0.297911,0.342671,0.093642,0.076558,0.059033


In [183]:
emissions = load_emissions(exp_name)
emissions

Unnamed: 0,train_duration,test_duration,train_emissions,test_emissions,train_emissions_rate,test_emissions_rate,train_cpu_power,test_cpu_power,train_gpu_power,test_gpu_power,...,gpu_count,gpu_model,longitude,latitude,ram_total_size,tracking_mode,on_cloud,pue,train_timestamp,test_timestamp
WawGcW33HROmuUH1,2157.993973,1.161582,0.081368,3e-05,3.8e-05,-6.29682e-09,95.0,0.0,224.789394,-45.451318,...,3.0,3 x NVIDIA RTX A4000,8.5588,40.7243,62.646286,process,N,1.0,2024-06-03T21:10:58,2024-06-03T21:11:00


In [None]:
# all_flops = load_flops(exp_name)
# all_flops

In [184]:
complete_data = all_exps.join([best_metrics, emissions], how='inner') #all_flops
#join[all_exps, best_metrics, emissions, all_flops], axis=1)
metrics_per_epoch = {exp_id: metrics_per_epoch[exp_id] for exp_id in complete_data.index}
complete_data

Unnamed: 0,data_params.collator_params.lookback,data_params.collator_params.lookforward,data_params.collator_params.num_items,data_params.collator_params.num_negatives.test,data_params.collator_params.num_negatives.train,data_params.collator_params.num_negatives.val,data_params.collator_params.out_seq_len.test,data_params.collator_params.out_seq_len.train,data_params.collator_params.out_seq_len.val,data_params.collator_params.padding_value,...,gpu_count,gpu_model,longitude,latitude,ram_total_size,tracking_mode,on_cloud,pue,train_timestamp,test_timestamp
WawGcW33HROmuUH1,200,1,3416,100,1,1,1,,1,0,...,3.0,3 x NVIDIA RTX A4000,8.5588,40.7243,62.646286,process,N,1.0,2024-06-03T21:10:58,2024-06-03T21:11:00


In [185]:
column_labels = ['duration', 'emissions', 'emissions_rate', 'cpu_power','gpu_power', 'ram_power', 'cpu_energy', 'gpu_energy', 'ram_energy', 'energy_consumed'] #, 'flops', 'latency', 'FLOPS']
add_info_to_metrics_per_epoch(metrics_per_epoch, complete_data, column_labels)
metrics_per_epoch

{'WawGcW33HROmuUH1':    epoch  step  test_F1_@10  test_F1_@20  test_F1_@5  test_MAP_@10  \
 0      0     0      0.09666     0.067764    0.126043      0.076558   
 
    test_MAP_@20  test_MAP_@5  test_MRR_@10  test_MRR_@20  ...  test_duration  \
 0      0.059033     0.093642      0.228742      0.240963  ...       1.161582   
 
    test_emissions  test_emissions_rate  test_cpu_power  test_gpu_power  \
 0         0.00003        -6.296820e-09             0.0      -45.451318   
 
    test_ram_power  test_cpu_energy  test_gpu_energy  test_ram_energy  \
 0        0.000455         0.000031         0.000058     1.896585e-07   
 
    test_energy_consumed  
 0              0.000089  
 
 [1 rows x 41 columns]}

## Table generation

In [None]:
#for col in sorted(complete_data.columns): print(col)

In [186]:
rows_labels = ["svd_type"] #"data_params.name","model.rec_model.name",  "model.trainer_params.max_epochs"
metrics_names = ["Precision", "Recall", "NDCG", "MAP"]
metrics_ks = [5, 10, 20]
column_labels = sorted([f"{metric}_@{k}" for metric in metrics_names for k in metrics_ks])
subset_selection = {"model.rec_model.emb_size":128}
subset_keep = []
#merge_columns = {"svd_type": ["model.useSVD","model.freeze_emb"]}
#label_convert_dict = {"False_False": "No SVD", "True_False": "SVD", "True_True": "SVD freeze"}
merge_columns = {"svd_type": ["model.useSVD","model.freeze_emb"]}

label_convert_dict = {"False_False": "No SVD", "True_False": "SVD", "True_True": "SVD freeze"}


In [187]:
table = generate_table(subset_data(complete_data, subset_selection, subset_keep, merge_columns), label_convert_dict, rows_labels, column_labels, exp_name = "april_30", save_table=True) #, pivot_agg=list)

table

Unnamed: 0,svd_type,MAP_@10,MAP_@20,MAP_@5,NDCG_@10,NDCG_@20,NDCG_@5,Precision_@10,Precision_@20,Precision_@5,Recall_@10,Recall_@20,Recall_@5
0,SVD,0.0766,0.059,0.0936,0.2979,0.3427,0.2462,0.0525,0.0351,0.0729,0.5245,0.702,0.3646


In [None]:
# # To check which rows are duplicated
# df = complete_data.loc[(complete_data["model.rec_model.name"]=="SASRec") & (complete_data["data_params.name"]=="ml-1m")]
# #Find columns in df with different values
# columns_with_different_values = []
# for column in df.columns:
#     if len(df[column].apply(lambda x: str(x)).unique()) > 1:
#         columns_with_different_values.append(column)
# print(columns_with_different_values)
# complete_data.loc[(complete_data["model.rec_model.name"]=="SASRec") & (complete_data["data_params.name"]=="ml-1m")][columns_with_different_values]

## Plot

In [None]:
main_rows_labels = ["svd_type"]
metrics_names = ["Precision", "Recall", "NDCG", "MAP"]
metrics_ks = [5, 10, 20]
column_labels = sorted([f"{metric}_@{k}" for metric in metrics_names for k in metrics_ks])
subset_keep = []

#label_convert_dict = {"False_False": "No SVD", "True_False": "SVD", "True_True": "SVD freeze"}

label_convert_dict = {"False_False": "No SVD", "True_False": "SVD", "True_True": "SVD freeze"}
merge_columns = {"svd_type": ["model.useSVD","model.freeze_emb"]}

### Embedding size

In [None]:
main_x_column = "model.rec_model.emb_size"
main_x_label = "Embedding Size"
super_plot_name = "emb_size_NDCG@10"
super_title = "Performance with varying embedding size"

In [None]:
rows_label_to_add = [main_x_column]
rows_labels = main_rows_labels + rows_label_to_add

subset_selection = {}

In [None]:
table = generate_table(subset_data(complete_data, subset_selection, subset_keep, merge_columns), label_convert_dict, rows_labels, column_labels, exp_name = "april_30", save_table=True)
table

In [None]:
y_column = "NDCG_@10"
y_label = y_column.replace("_", "")

ax_instructions = {"set_xscale": {"value": "log"},
                   "set_ylabel": {"ylabel": y_label},
                   "get_xaxis": {"__kwargs__":{},
                                 "set_tick_params": {"which":"minor", "size":0}},
                   "get_xaxis": {"__kwargs__":{},
                                 "set_tick_params": {"which":"minor", "width":0}},
                   "legend": {},
                   "grid":{}}

plot_instructions = {"subplots": {"nrows": 1, "ncols": 1, "figsize": (6,4)},
                    "tight_layout":{},
                    "savefig": {"fname": "plot.pdf"},
                    "show":{}}

In [None]:
x_column = main_x_column
x_label = main_x_label
plot_name = super_plot_name#+"_"+subset_selection["data_params.name"]

ax_instructions["set_xticks"] = {"ticks": table[x_column].unique().astype(int), "labels": table[x_column].unique().astype(int)}
ax_instructions["set_xlabel"] = {"xlabel": x_label}
ax_instructions["legend"] = {"loc": "center right"}

plot_instructions["savefig"] = {"fname": f'../out/img/{plot_name}.pdf'}

label_convert_dict = {"False_False": "No SVD", "True_False": "SVD", "True_True": "SVD + freeze"}
#label_convert_dict = {"False_False": "No SVD", "True_False": "SVD", "True_True": "SVD freeze", "True_False_64.0":"SVD + cutoff", "True_True_64.0":"SVD freeze+cutoff"}


create_plot(plot_instructions, data=table, label_columns=["svd_type"], x_column=x_column, y_column=y_column,
            ax_instructions = ax_instructions, label_convert_dict = label_convert_dict)

In [None]:
x_column = "epoch"
x_label = "Epoch"
y_column = "test_NDCG_@10"
y_label = "NDCG@10"
ax_instructions["set_xlabel"] = {"xlabel": x_label}

### Emb size = 64

In [None]:
subset_selection = {"model.rec_model.emb_size":128}

In [None]:
epoch_table = subset_data(complete_data, subset_selection, subset_keep, merge_columns)
sas_table = merge_epoch_data(metrics_per_epoch, epoch_table, rows_labels)
#sas_table = hotfix_table(sas_table, y_column) #HOTFIX
sas_table

In [None]:
plot_name = "epochs_weightpruning_128_NDCG@10"#+super_plot_name#+subset_selection["data_params.name"]

#plot_instructions["subplots"] = {"nrows": 3, "ncols": 2, "figsize": (12,12)}
plot_instructions["savefig"] = {"fname": f'../out/img/{plot_name}.pdf'}

plot_kwargs = {"markevery": 0.1}

ax_instructions.pop("set_xticks",None)
ax_instructions["legend"] = {"loc": "center right"}
#ax_instructions["set_title"] = {"label": super_title}

#label_convert_dict = {"False_False": "No SVD", "True_False": "SVD", "True_True": "SVD + freeze"}
label_convert_dict = {"False_False": "No SVD", "True_False": "SVD", "True_True": "SVD freeze", "True_False_64.0":"SVD + cutoff", "True_True_64.0":"SVD freeze+cutoff"}

repeated_ax_instructions = [ax_instructions.copy() for _ in range(4)]
#repeated_ax_instructions[0]["set_ylim"] = {"bottom":0.525}
repeated_ax_instructions[0]["legend"] = {"loc": "center right"}

repeated_ax_instructions[1]["set_xlim"] = {"left":0,"right":10}
#repeated_ax_instructions[1]["set_ylim"] = {"bottom":0.34,"top":0.65}
repeated_ax_instructions[1].pop("set_xscale",None)
repeated_ax_instructions[1]["legend"] = {"loc": "center right"}

repeated_ax_instructions[2]["set_xlim"] = {"left":0,"right":600}
#repeated_ax_instructions[2]["set_ylim"] = {"bottom":0.64,"top":0.69}
repeated_ax_instructions[2].pop("set_xscale",None)

repeated_ax_instructions[3]["set_xlim"] = {"left":400,"right":600}
#repeated_ax_instructions[2]["set_ylim"] = {"bottom":0.64,"top":0.69}
repeated_ax_instructions[3].pop("set_xscale",None)


repeated_plot_kwargs = [plot_kwargs.copy() for _ in range(6)]
repeated_plot_kwargs[1]["markevery"] = 1
repeated_plot_kwargs[2]["markevery"] = 100
repeated_plot_kwargs[3]["markevery"] = 50

for i,(rep_ax_instr, rep_plot_kwargs) in enumerate(zip(repeated_ax_instructions,repeated_plot_kwargs)):
    rep_plot_instr = deepcopy(plot_instructions.copy())
    rep_plot_instr["savefig"]["fname"] = plot_instructions["savefig"]["fname"].split(".pdf")[0]+"_"+str(i)+".png"
    create_plot(rep_plot_instr, data=([sas_table]*4)[i], label_columns=["svd_type"], x_column=x_column, y_column=y_column,
            ax_instructions = rep_ax_instr, label_convert_dict = label_convert_dict, plot_type="plot", plot_kwargs=rep_plot_kwargs)

#### FINE?

In [None]:
y_column

#### Foursquare TKY

In [None]:
subset_selection["data_params.name"] = "foursquare-tky"
subset_selection.pop("model.rec_model.name",None)

In [None]:
table = generate_table(subset_data(complete_data, subset_selection, subset_keep, merge_columns), rows_labels, column_labels, exp_name = "april_30", save_table=True)
table

In [None]:
y_column = "NDCG_@10"
y_label = y_column.replace("_", "")

ax_instructions = {"set_xscale": {"value": "log"},
                   "set_ylabel": {"ylabel": y_label},
                   "get_xaxis": {"__kwargs__":{},
                                 "set_tick_params": {"which":"minor", "size":0}},
                   "get_xaxis": {"__kwargs__":{},
                                 "set_tick_params": {"which":"minor", "width":0}},
                   "legend": {},
                   "grid":{}}

plot_instructions = {"subplots": {"nrows": 1, "ncols": 1, "figsize": (6,4)},
                    "tight_layout":{},
                    "savefig": {"fname": "plot.pdf"},
                    "show":{}}

In [None]:
x_column = main_x_column
x_label = main_x_label
plot_name = super_plot_name+"_"+subset_selection["data_params.name"]

ax_instructions["set_xticks"] = {"ticks": table[x_column].unique().astype(int), "labels": table[x_column].unique().astype(int)}
ax_instructions["set_xlabel"] = {"xlabel": x_label}
ax_instructions["legend"] = {"loc": "upper right"}

plot_instructions["savefig"] = {"fname": f'../out/img/{plot_name}.pdf'}

label_convert_dict = {}

create_plot(plot_instructions, data=table, label_columns=["model.rec_model.name"], x_column=x_column, y_column=y_column,
            ax_instructions = ax_instructions, label_convert_dict = label_convert_dict)

In [None]:
x_column = "train_flops"
x_label = "Training flops"
plot_name = super_plot_name+"_flops_"+subset_selection["data_params.name"]

ax_instructions.pop("set_xticks",None)
ax_instructions["set_xlabel"] = {"xlabel": x_label}
ax_instructions["legend"] = {"loc": "lower left"}
#ax_instructions["set_title"] = {"label": super_title}

plot_instructions["savefig"] = {"fname": f'../out/img/{plot_name}.pdf'}

label_convert_dict = {}

create_plot(plot_instructions, data=table, label_columns=["model.rec_model.name"], x_column=x_column, y_column=y_column,
            ax_instructions = ax_instructions, label_convert_dict = label_convert_dict)


In [None]:
x_column = "epoch"
x_label = "Epoch"
y_column = "val_NDCG_@10/dataloader_idx_1"
y_label = "NDCG@10"
ax_instructions["set_xlabel"] = {"xlabel": x_label}

##### SASRec

In [None]:
subset_selection["model.rec_model.name"] = "SASRec"
epoch_table = subset_data(complete_data, subset_selection, subset_keep, merge_columns)

sas_table = merge_epoch_data(metrics_per_epoch, epoch_table, rows_labels)
sas_table = hotfix_table(sas_table, y_column) #HOTFIX
sas_table

In [None]:
subset_selection["model.rec_model.name"] = "GRU4Rec"
epoch_table = subset_data(complete_data, subset_selection, subset_keep, merge_columns)

gru_table = merge_epoch_data(metrics_per_epoch, epoch_table, rows_labels)
gru_table = hotfix_table(gru_table, y_column) #HOTFIX
gru_table

In [None]:
plot_name = "epochs_"+super_plot_name+"_GRU_SAS_"+subset_selection["data_params.name"]

#plot_instructions["subplots"] = {"nrows": 3, "ncols": 2, "figsize": (12,12)}
plot_instructions["savefig"] = {"fname": f'../out/img/{plot_name}.pdf'}

plot_kwargs = {"markevery": 0.1}

ax_instructions.pop("set_xticks",None)
ax_instructions["legend"] = {"loc": "lower right"}
#ax_instructions["set_title"] = {"label": super_title}

label_convert_dict = {} #x:f"{x} positive item(s)" for x in [1,2,3,5,10,20,50,100]}

repeated_ax_instructions = [ax_instructions.copy() for _ in range(6)]
repeated_ax_instructions[0]["set_ylim"] = {"bottom":0.25}
repeated_ax_instructions[0]["legend"] = {"loc": "upper left"}
repeated_ax_instructions[1]["legend"] = {"loc": "upper left"}


repeated_ax_instructions[2]["set_xlim"] = {"left":0,"right":10}
repeated_ax_instructions[2]["set_ylim"] = {"bottom":0.18,"top":0.43}
repeated_ax_instructions[2].pop("set_xscale",None)
repeated_ax_instructions[3]["set_xlim"] = {"left":0,"right":10}
repeated_ax_instructions[3]["set_ylim"] = {"bottom":0.125,"top":0.26}
repeated_ax_instructions[3].pop("set_xscale",None)


repeated_ax_instructions[4]["set_xlim"] = {"left":100,"right":600}
repeated_ax_instructions[4]["set_ylim"] = {"bottom":0.35,"top":0.77}
repeated_ax_instructions[4].pop("set_xscale",None)
repeated_ax_instructions[5]["set_xlim"] = {"left":100,"right":600}
repeated_ax_instructions[5]["set_ylim"] = {"bottom":0.45,"top":0.80}
repeated_ax_instructions[5].pop("set_xscale",None)


repeated_plot_kwargs = [plot_kwargs.copy() for _ in range(6)]
repeated_plot_kwargs[2]["markevery"] = repeated_plot_kwargs[3]["markevery"] = 1
repeated_plot_kwargs[4]["markevery"] = repeated_plot_kwargs[5]["markevery"] = 100

label_a = ["(a)","(b)","(c)","(d)","(e)","(f)"]
label_b = ["GRU","SAS"]*3
label_c = [" - "+x if x !="" else x for x in ["","","start","start","end","end"]]
for i,(rep_ax_instr, rep_plot_kwargs) in enumerate(zip(repeated_ax_instructions,repeated_plot_kwargs)):
    rep_plot_instr = deepcopy(plot_instructions.copy())
    rep_plot_instr["savefig"]["fname"] = plot_instructions["savefig"]["fname"].split(".pdf")[0]+"_"+str(i)+".pdf"
    rep_ax_instr["set_title"] = {"label": f"{label_a[i]} {label_b[i]}{label_c[i]}"}
    create_plot(rep_plot_instr, data=([gru_table,sas_table]*3)[i], label_columns=[main_x_column], x_column=x_column, y_column=y_column,
            ax_instructions = rep_ax_instr, label_convert_dict = label_convert_dict, plot_type="plot", plot_kwargs=rep_plot_kwargs)

#### ML-1M

In [None]:
subset_selection["data_params.name"] = "ml-1m"
subset_selection.pop("model.rec_model.name",None)

In [None]:
table = generate_table(subset_data(complete_data, subset_selection, subset_keep, merge_columns), rows_labels, column_labels, exp_name = "april_30", save_table=True)
table

In [None]:
x_column = main_x_column
x_label = main_x_label
y_column = "NDCG_@10"
y_label = y_column.replace("_", "")

ax_instructions = {"set_xscale": {"value": "log"},
                   "set_ylabel": {"ylabel": y_label},
                   "get_xaxis": {"__kwargs__":{},
                                 "set_tick_params": {"which":"minor", "size":0}},
                   "get_xaxis": {"__kwargs__":{},
                                 "set_tick_params": {"which":"minor", "width":0}},
                   "legend": {},
                   "grid":{}}

plot_instructions = {"subplots": {"nrows": 1, "ncols": 1, "figsize": (6,4)},
                    "tight_layout":{},
                    "savefig": {"fname": "plot.pdf"},
                    "show":{}}

In [None]:
plot_name = super_plot_name+"_"+subset_selection["data_params.name"]

ax_instructions["set_xticks"] = {"ticks": table[x_column].unique().astype(int), "labels": table[x_column].unique().astype(int)}
ax_instructions["set_xlabel"] = {"xlabel": x_label}
ax_instructions["legend"] = {"loc": "upper right"}

plot_instructions["savefig"] = {"fname": f'../out/img/{plot_name}.pdf'}

label_convert_dict = {}

create_plot(plot_instructions, data=table, label_columns=["model.rec_model.name"], x_column=x_column, y_column=y_column,
            ax_instructions = ax_instructions, label_convert_dict = label_convert_dict)

In [None]:
x_column = "train_flops"
x_label = "Training flops"
plot_name = super_plot_name+"_flops_"+subset_selection["data_params.name"]

ax_instructions.pop("set_xticks",None)
ax_instructions["set_xlabel"] = {"xlabel": x_label}
ax_instructions["legend"] = {"loc": "lower left"}
#ax_instructions["set_title"] = {"label": super_title}

plot_instructions["savefig"] = {"fname": f'../out/img/{plot_name}.pdf'}

label_convert_dict = {}

create_plot(plot_instructions, data=table, label_columns=["model.rec_model.name"], x_column=x_column, y_column=y_column,
            ax_instructions = ax_instructions, label_convert_dict = label_convert_dict)


In [None]:
x_column = "epoch"
x_label = "Epoch"
y_column = "val_NDCG_@10/dataloader_idx_1"
y_label = "NDCG@10"
ax_instructions["set_xlabel"] = {"xlabel": x_label}

##### SASRec

In [None]:
subset_selection["model.rec_model.name"] = "SASRec"
epoch_table = subset_data(complete_data, subset_selection, subset_keep, merge_columns)

sas_table = merge_epoch_data(metrics_per_epoch, epoch_table, rows_labels)
sas_table = hotfix_table(sas_table, y_column) #HOTFIX
sas_table

In [None]:
subset_selection["model.rec_model.name"] = "GRU4Rec"
epoch_table = subset_data(complete_data, subset_selection, subset_keep, merge_columns)

gru_table = merge_epoch_data(metrics_per_epoch, epoch_table, rows_labels)
gru_table = hotfix_table(gru_table, y_column) #HOTFIX
gru_table

In [None]:
plot_name = "epochs_"+super_plot_name+"_GRU_SAS_"+subset_selection["data_params.name"]

#plot_instructions["subplots"] = {"nrows": 3, "ncols": 2, "figsize": (12,12)}
plot_instructions["savefig"] = {"fname": f'../out/img/{plot_name}.pdf'}

plot_kwargs = {"markevery": 0.1}

ax_instructions.pop("set_xticks",None)
ax_instructions["legend"] = {"loc": "upper left"}
#ax_instructions["set_title"] = {"label": super_title}

label_convert_dict = {} #x:f"{x} positive item(s)" for x in [1,2,3,5,10,20,50,100]}

repeated_ax_instructions = [ax_instructions.copy() for _ in range(6)]
repeated_ax_instructions[2]["set_xlim"] = {"left":0,"right":10}
repeated_ax_instructions[2]["set_ylim"] = {"bottom":0.18,"top":0.41}
repeated_ax_instructions[2].pop("set_xscale",None)
repeated_ax_instructions[3]["set_xlim"] = {"left":0,"right":10}
repeated_ax_instructions[3]["set_ylim"] = {"bottom":0.05,"top":0.25}
repeated_ax_instructions[3].pop("set_xscale",None)


repeated_ax_instructions[4]["set_xlim"] = {"left":100,"right":600}
repeated_ax_instructions[4]["set_ylim"] = {"bottom":0.2,"top":0.605}
repeated_ax_instructions[4].pop("set_xscale",None)
repeated_ax_instructions[5]["set_xlim"] = {"left":100,"right":600}
repeated_ax_instructions[5]["set_ylim"] = {"bottom":0.2,"top":0.60}
repeated_ax_instructions[5].pop("set_xscale",None)


repeated_plot_kwargs = [plot_kwargs.copy() for _ in range(6)]
repeated_plot_kwargs[2]["markevery"] = repeated_plot_kwargs[3]["markevery"] = 1
repeated_plot_kwargs[4]["markevery"] = repeated_plot_kwargs[5]["markevery"] = 100

label_a = ["(a)","(b)","(c)","(d)","(e)","(f)"]
label_b = ["GRU","SAS"]*3
label_c = [" - "+x if x !="" else x for x in ["","","start","start","end","end"]]
for i,(rep_ax_instr, rep_plot_kwargs) in enumerate(zip(repeated_ax_instructions,repeated_plot_kwargs)):
    rep_plot_instr = deepcopy(plot_instructions.copy())
    rep_plot_instr["savefig"]["fname"] = plot_instructions["savefig"]["fname"].split(".pdf")[0]+"_"+str(i)+".pdf"
    rep_ax_instr["set_title"] = {"label": f"{label_a[i]} {label_b[i]}{label_c[i]}"}
    create_plot(rep_plot_instr, data=([gru_table,sas_table]*3)[i], label_columns=[main_x_column], x_column=x_column, y_column=y_column,
            ax_instructions = rep_ax_instr, label_convert_dict = label_convert_dict, plot_type="plot", plot_kwargs=rep_plot_kwargs)

### Num negatives

In [None]:
main_x_column = "data_params.collator_params.num_negatives.train"
main_x_label = "Number of negative items"
super_plot_name = "negatives_NDCG@10"
super_title = "Performance with varying number of negative items"

In [None]:
rows_label_to_add = [main_x_column]
rows_labels = main_rows_labels + rows_label_to_add

subset_selection = {"data_params.collator_params.simultaneous_lookforward":1}

#### Amazon-beauty

In [None]:
subset_selection["data_params.name"] = "amazon_beauty"
subset_selection.pop("model.rec_model.name",None)

In [None]:
table = generate_table(subset_data(complete_data, subset_selection, subset_keep, merge_columns), rows_labels, column_labels, exp_name = "april_30", save_table=True)
table

In [None]:
y_column = "NDCG_@10"
y_label = y_column.replace("_", "")

ax_instructions = {"set_xscale": {"value": "log"},
                   "set_ylabel": {"ylabel": y_label},
                   "get_xaxis": {"__kwargs__":{},
                                 "set_tick_params": {"which":"minor", "size":0}},
                   "get_xaxis": {"__kwargs__":{},
                                 "set_tick_params": {"which":"minor", "width":0}},
                   "legend": {},
                   "grid":{}}

plot_instructions = {"subplots": {"nrows": 1, "ncols": 1, "figsize": (6,4)},
                    "tight_layout":{},
                    "savefig": {"fname": "plot.pdf"},
                    "show":{}}

In [None]:
x_column = main_x_column
x_label = main_x_label
plot_name = super_plot_name+"_"+subset_selection["data_params.name"]

ax_instructions["set_xticks"] = {"ticks": table[x_column].unique().astype(int), "labels": table[x_column].unique().astype(int)}
ax_instructions["set_xlabel"] = {"xlabel": x_label}
ax_instructions["legend"] = {"loc": "upper right"}

plot_instructions["savefig"] = {"fname": f'../out/img/{plot_name}.pdf'}

label_convert_dict = {}

create_plot(plot_instructions, data=table, label_columns=["model.rec_model.name"], x_column=x_column, y_column=y_column,
            ax_instructions = ax_instructions, label_convert_dict = label_convert_dict)

In [None]:
x_column = "train_flops"
x_label = "Training flops"
plot_name = super_plot_name+"_flops_"+subset_selection["data_params.name"]

ax_instructions.pop("set_xticks",None)
ax_instructions["set_xlabel"] = {"xlabel": x_label}
ax_instructions["legend"] = {"loc": "lower left"}
#ax_instructions["set_title"] = {"label": super_title}

plot_instructions["savefig"] = {"fname": f'../out/img/{plot_name}.pdf'}

label_convert_dict = {}

create_plot(plot_instructions, data=table, label_columns=["model.rec_model.name"], x_column=x_column, y_column=y_column,
            ax_instructions = ax_instructions, label_convert_dict = label_convert_dict)


In [None]:
x_column = "epoch"
x_label = "Epoch"
y_column = "val_NDCG_@10/dataloader_idx_1"
y_label = "NDCG@10"
ax_instructions["set_xlabel"] = {"xlabel": x_label}

##### SASRec

In [None]:
subset_selection["model.rec_model.name"] = "SASRec"
epoch_table = subset_data(complete_data, subset_selection, subset_keep, merge_columns)

sas_table = merge_epoch_data(metrics_per_epoch, epoch_table, rows_labels)
sas_table = hotfix_table(sas_table, y_column) #HOTFIX
sas_table

In [None]:
subset_selection["model.rec_model.name"] = "GRU4Rec"
epoch_table = subset_data(complete_data, subset_selection, subset_keep, merge_columns)

gru_table = merge_epoch_data(metrics_per_epoch, epoch_table, rows_labels)
gru_table = hotfix_table(gru_table, y_column) #HOTFIX
gru_table

In [None]:
plot_name = "epochs_"+super_plot_name+"_GRU_SAS_"+subset_selection["data_params.name"]

#plot_instructions["subplots"] = {"nrows": 3, "ncols": 2, "figsize": (12,12)}
plot_instructions["savefig"] = {"fname": f'../out/img/{plot_name}.pdf'}

plot_kwargs = {"markevery": 0.1}

ax_instructions.pop("set_xticks",None)
ax_instructions["legend"] = {"loc": "lower right"}
#ax_instructions["set_title"] = {"label": super_title}

label_convert_dict = {} #x:f"{x} positive item(s)" for x in [1,2,3,5,10,20,50,100]}

repeated_ax_instructions = [ax_instructions.copy() for _ in range(6)]
#repeated_ax_instructions[0]["set_ylim"] = {"bottom":0.525}

repeated_ax_instructions[2]["set_xlim"] = {"left":0,"right":10}
repeated_ax_instructions[2]["set_ylim"] = {"bottom":0.28,"top":0.65}
repeated_ax_instructions[2].pop("set_xscale",None)
repeated_ax_instructions[3]["set_xlim"] = {"left":0,"right":10}
repeated_ax_instructions[3]["set_ylim"] = {"bottom":0.14,"top":0.35}
repeated_ax_instructions[3]["legend"] = {"loc": "lower left"}
repeated_ax_instructions[3].pop("set_xscale",None)


repeated_ax_instructions[4]["set_xlim"] = {"left":100,"right":600}
repeated_ax_instructions[4]["set_ylim"] = {"bottom":0.67,"top":0.695}
repeated_ax_instructions[4].pop("set_xscale",None)
repeated_ax_instructions[5]["set_xlim"] = {"left":100,"right":600}
repeated_ax_instructions[5]["set_ylim"] = {"bottom":0.6,"top":0.7}
repeated_ax_instructions[5].pop("set_xscale",None)


repeated_plot_kwargs = [plot_kwargs.copy() for _ in range(6)]
repeated_plot_kwargs[2]["markevery"] = repeated_plot_kwargs[3]["markevery"] = 1
repeated_plot_kwargs[4]["markevery"] = repeated_plot_kwargs[5]["markevery"] = 100

label_a = ["(a)","(b)","(c)","(d)","(e)","(f)"]
label_b = ["GRU","SAS"]*3
label_c = [" - "+x if x !="" else x for x in ["","","start","start","end","end"]]
for i,(rep_ax_instr, rep_plot_kwargs) in enumerate(zip(repeated_ax_instructions,repeated_plot_kwargs)):
    rep_plot_instr = deepcopy(plot_instructions.copy())
    rep_plot_instr["savefig"]["fname"] = plot_instructions["savefig"]["fname"].split(".pdf")[0]+"_"+str(i)+".pdf"
    rep_ax_instr["set_title"] = {"label": f"{label_a[i]} {label_b[i]}{label_c[i]}"}
    create_plot(rep_plot_instr, data=([gru_table,sas_table]*3)[i], label_columns=[main_x_column], x_column=x_column, y_column=y_column,
            ax_instructions = rep_ax_instr, label_convert_dict = label_convert_dict, plot_type="plot", plot_kwargs=rep_plot_kwargs)

#### Foursquare TKY

In [None]:
subset_selection["data_params.name"] = "foursquare-tky"
subset_selection.pop("model.rec_model.name",None)

In [None]:
table = generate_table(subset_data(complete_data, subset_selection, subset_keep, merge_columns), rows_labels, column_labels, exp_name = "april_30", save_table=True)
table

In [None]:
y_column = "NDCG_@10"
y_label = y_column.replace("_", "")

ax_instructions = {"set_xscale": {"value": "log"},
                   "set_ylabel": {"ylabel": y_label},
                   "get_xaxis": {"__kwargs__":{},
                                 "set_tick_params": {"which":"minor", "size":0}},
                   "get_xaxis": {"__kwargs__":{},
                                 "set_tick_params": {"which":"minor", "width":0}},
                   "legend": {},
                   "grid":{}}

plot_instructions = {"subplots": {"nrows": 1, "ncols": 1, "figsize": (6,4)},
                    "tight_layout":{},
                    "savefig": {"fname": "plot.pdf"},
                    "show":{}}

In [None]:
x_column = main_x_column
x_label = main_x_label
plot_name = super_plot_name+"_"+subset_selection["data_params.name"]

ax_instructions["set_xticks"] = {"ticks": table[x_column].unique().astype(int), "labels": table[x_column].unique().astype(int)}
ax_instructions["set_xlabel"] = {"xlabel": x_label}
ax_instructions["legend"] = {"loc": "center right"}

plot_instructions["savefig"] = {"fname": f'../out/img/{plot_name}.pdf'}

label_convert_dict = {}

create_plot(plot_instructions, data=table, label_columns=["model.rec_model.name"], x_column=x_column, y_column=y_column,
            ax_instructions = ax_instructions, label_convert_dict = label_convert_dict)

In [None]:
x_column = "train_flops"
x_label = "Training flops"
plot_name = super_plot_name+"_flops_"+subset_selection["data_params.name"]

ax_instructions.pop("set_xticks",None)
ax_instructions["set_xlabel"] = {"xlabel": x_label}
ax_instructions["legend"] = {"loc": "lower left"}
#ax_instructions["set_title"] = {"label": super_title}

plot_instructions["savefig"] = {"fname": f'../out/img/{plot_name}.pdf'}

label_convert_dict = {}

create_plot(plot_instructions, data=table, label_columns=["model.rec_model.name"], x_column=x_column, y_column=y_column,
            ax_instructions = ax_instructions, label_convert_dict = label_convert_dict)


In [None]:
x_column = "epoch"
x_label = "Epoch"
y_column = "val_NDCG_@10/dataloader_idx_1"
y_label = "NDCG@10"
ax_instructions["set_xlabel"] = {"xlabel": x_label}

##### SASRec

In [None]:
subset_selection["model.rec_model.name"] = "SASRec"
epoch_table = subset_data(complete_data, subset_selection, subset_keep, merge_columns)

sas_table = merge_epoch_data(metrics_per_epoch, epoch_table, rows_labels)
sas_table = hotfix_table(sas_table, y_column) #HOTFIX
sas_table

In [None]:
subset_selection["model.rec_model.name"] = "GRU4Rec"
epoch_table = subset_data(complete_data, subset_selection, subset_keep, merge_columns)

gru_table = merge_epoch_data(metrics_per_epoch, epoch_table, rows_labels)
gru_table = hotfix_table(gru_table, y_column) #HOTFIX
gru_table

In [None]:
plot_name = "epochs_"+super_plot_name+"_GRU_SAS_"+subset_selection["data_params.name"]

#plot_instructions["subplots"] = {"nrows": 3, "ncols": 2, "figsize": (12,12)}
plot_instructions["savefig"] = {"fname": f'../out/img/{plot_name}.pdf'}

plot_kwargs = {"markevery": 0.1}

ax_instructions.pop("set_xticks",None)
ax_instructions["legend"] = {"loc": "lower right"}
#ax_instructions["set_title"] = {"label": super_title}

label_convert_dict = {} #x:f"{x} positive item(s)" for x in [1,2,3,5,10,20,50,100]}

repeated_ax_instructions = [ax_instructions.copy() for _ in range(6)]
repeated_ax_instructions[0]["set_ylim"] = {"bottom":0.25}
repeated_ax_instructions[0]["legend"] = {"loc": "upper left"}
repeated_ax_instructions[1]["legend"] = {"loc": "upper left"}


repeated_ax_instructions[2]["set_xlim"] = {"left":0,"right":10}
repeated_ax_instructions[2]["set_ylim"] = {"bottom":0.16,"top":0.43}
repeated_ax_instructions[2].pop("set_xscale",None)
repeated_ax_instructions[3]["set_xlim"] = {"left":0,"right":10}
repeated_ax_instructions[3]["set_ylim"] = {"bottom":0.05,"top":0.26}
repeated_ax_instructions[3].pop("set_xscale",None)


repeated_ax_instructions[4]["set_xlim"] = {"left":100,"right":600}
repeated_ax_instructions[4]["set_ylim"] = {"bottom":0.5,"top":0.77}
repeated_ax_instructions[4].pop("set_xscale",None)
repeated_ax_instructions[5]["set_xlim"] = {"left":100,"right":600}
repeated_ax_instructions[5]["set_ylim"] = {"bottom":0.55,"top":0.82}
repeated_ax_instructions[5].pop("set_xscale",None)


repeated_plot_kwargs = [plot_kwargs.copy() for _ in range(6)]
repeated_plot_kwargs[2]["markevery"] = repeated_plot_kwargs[3]["markevery"] = 1
repeated_plot_kwargs[4]["markevery"] = repeated_plot_kwargs[5]["markevery"] = 100

label_a = ["(a)","(b)","(c)","(d)","(e)","(f)"]
label_b = ["GRU","SAS"]*3
label_c = [" - "+x if x !="" else x for x in ["","","start","start","end","end"]]
for i,(rep_ax_instr, rep_plot_kwargs) in enumerate(zip(repeated_ax_instructions,repeated_plot_kwargs)):
    rep_plot_instr = deepcopy(plot_instructions.copy())
    rep_plot_instr["savefig"]["fname"] = plot_instructions["savefig"]["fname"].split(".pdf")[0]+"_"+str(i)+".pdf"
    rep_ax_instr["set_title"] = {"label": f"{label_a[i]} {label_b[i]}{label_c[i]}"}
    create_plot(rep_plot_instr, data=([gru_table,sas_table]*3)[i], label_columns=[main_x_column], x_column=x_column, y_column=y_column,
            ax_instructions = rep_ax_instr, label_convert_dict = label_convert_dict, plot_type="plot", plot_kwargs=rep_plot_kwargs)

#### ML-1M

In [None]:
subset_selection["data_params.name"] = "ml-1m"
subset_selection.pop("model.rec_model.name",None)

In [None]:
table = generate_table(subset_data(complete_data, subset_selection, subset_keep, merge_columns), rows_labels, column_labels, exp_name = "april_30", save_table=True)
table

In [None]:
x_column = main_x_column
x_label = main_x_label
y_column = "NDCG_@10"
y_label = y_column.replace("_", "")

ax_instructions = {"set_xscale": {"value": "log"},
                   "set_ylabel": {"ylabel": y_label},
                   "get_xaxis": {"__kwargs__":{},
                                 "set_tick_params": {"which":"minor", "size":0}},
                   "get_xaxis": {"__kwargs__":{},
                                 "set_tick_params": {"which":"minor", "width":0}},
                   "legend": {},
                   "grid":{}}

plot_instructions = {"subplots": {"nrows": 1, "ncols": 1, "figsize": (6,4)},
                    "tight_layout":{},
                    "savefig": {"fname": "plot.pdf"},
                    "show":{}}

In [None]:
plot_name = super_plot_name+"_"+subset_selection["data_params.name"]

ax_instructions["set_xticks"] = {"ticks": table[x_column].unique().astype(int), "labels": table[x_column].unique().astype(int)}
ax_instructions["set_xlabel"] = {"xlabel": x_label}
ax_instructions["legend"] = {"loc": "upper right"}

plot_instructions["savefig"] = {"fname": f'../out/img/{plot_name}.pdf'}

label_convert_dict = {}

create_plot(plot_instructions, data=table, label_columns=["model.rec_model.name"], x_column=x_column, y_column=y_column,
            ax_instructions = ax_instructions, label_convert_dict = label_convert_dict)

In [None]:
x_column = "train_flops"
x_label = "Training flops"
plot_name = super_plot_name+"_flops_"+subset_selection["data_params.name"]

ax_instructions.pop("set_xticks",None)
ax_instructions["set_xlabel"] = {"xlabel": x_label}
ax_instructions["legend"] = {"loc": "lower left"}
#ax_instructions["set_title"] = {"label": super_title}

plot_instructions["savefig"] = {"fname": f'../out/img/{plot_name}.pdf'}

label_convert_dict = {}

create_plot(plot_instructions, data=table, label_columns=["model.rec_model.name"], x_column=x_column, y_column=y_column,
            ax_instructions = ax_instructions, label_convert_dict = label_convert_dict)


In [None]:
x_column = "epoch"
x_label = "Epoch"
y_column = "val_NDCG_@10/dataloader_idx_1"
y_label = "NDCG@10"
ax_instructions["set_xlabel"] = {"xlabel": x_label}

##### SASRec

In [None]:
subset_selection["model.rec_model.name"] = "SASRec"
epoch_table = subset_data(complete_data, subset_selection, subset_keep, merge_columns)

sas_table = merge_epoch_data(metrics_per_epoch, epoch_table, rows_labels)
sas_table = hotfix_table(sas_table, y_column) #HOTFIX
sas_table

In [None]:
subset_selection["model.rec_model.name"] = "GRU4Rec"
epoch_table = subset_data(complete_data, subset_selection, subset_keep, merge_columns)

gru_table = merge_epoch_data(metrics_per_epoch, epoch_table, rows_labels)
gru_table = hotfix_table(gru_table, y_column) #HOTFIX
gru_table

In [None]:
plot_name = "epochs_"+super_plot_name+"_GRU_SAS_"+subset_selection["data_params.name"]

#plot_instructions["subplots"] = {"nrows": 3, "ncols": 2, "figsize": (12,12)}
plot_instructions["savefig"] = {"fname": f'../out/img/{plot_name}.pdf'}

plot_kwargs = {"markevery": 0.1}

ax_instructions.pop("set_xticks",None)
ax_instructions["legend"] = {"loc": "upper left"}
#ax_instructions["set_title"] = {"label": super_title}

label_convert_dict = {} #x:f"{x} positive item(s)" for x in [1,2,3,5,10,20,50,100]}

repeated_ax_instructions = [ax_instructions.copy() for _ in range(6)]
repeated_ax_instructions[2]["set_xlim"] = {"left":0,"right":10}
repeated_ax_instructions[2]["set_ylim"] = {"bottom":0.225,"top":0.4}
repeated_ax_instructions[2].pop("set_xscale",None)
repeated_ax_instructions[3]["set_xlim"] = {"left":0,"right":10}
repeated_ax_instructions[3]["set_ylim"] = {"bottom":0.03,"top":0.25}
repeated_ax_instructions[3].pop("set_xscale",None)


repeated_ax_instructions[4]["set_xlim"] = {"left":100,"right":600}
repeated_ax_instructions[4]["set_ylim"] = {"bottom":0.54,"top":0.605}
repeated_ax_instructions[4].pop("set_xscale",None)
repeated_ax_instructions[5]["set_xlim"] = {"left":100,"right":600}
repeated_ax_instructions[5]["set_ylim"] = {"bottom":0.4,"top":0.62}
repeated_ax_instructions[5].pop("set_xscale",None)


repeated_plot_kwargs = [plot_kwargs.copy() for _ in range(6)]
repeated_plot_kwargs[2]["markevery"] = repeated_plot_kwargs[3]["markevery"] = 1
repeated_plot_kwargs[4]["markevery"] = repeated_plot_kwargs[5]["markevery"] = 100

label_a = ["(a)","(b)","(c)","(d)","(e)","(f)"]
label_b = ["GRU","SAS"]*3
label_c = [" - "+x if x !="" else x for x in ["","","start","start","end","end"]]
for i,(rep_ax_instr, rep_plot_kwargs) in enumerate(zip(repeated_ax_instructions,repeated_plot_kwargs)):
    rep_plot_instr = deepcopy(plot_instructions.copy())
    rep_plot_instr["savefig"]["fname"] = plot_instructions["savefig"]["fname"].split(".pdf")[0]+"_"+str(i)+".pdf"
    rep_ax_instr["set_title"] = {"label": f"{label_a[i]} {label_b[i]}{label_c[i]}"}
    create_plot(rep_plot_instr, data=([gru_table,sas_table]*3)[i], label_columns=[main_x_column], x_column=x_column, y_column=y_column,
            ax_instructions = rep_ax_instr, label_convert_dict = label_convert_dict, plot_type="plot", plot_kwargs=rep_plot_kwargs)

### Num positives/negatives

In [None]:
main_x_column = "num_pos_neg"
main_x_label = "Number of positive/negative items"
super_plot_name = "pos_neg_NDCG@10"
super_title = "Performance with varying number of positive/negative items"

In [None]:
rows_label_to_add = [main_x_column]
rows_labels = main_rows_labels + rows_label_to_add

subset_selection = {}
merge_columns = {"num_pos_neg":["data_params.collator_params.simultaneous_lookforward",
                                "data_params.collator_params.num_negatives.train"]}

In [None]:
def subset_to_equal_pos_neg(table):
    def eq_pos_neg(x):
        p,n = x.split("_")
        if p==n: return True
        else: return False
    def comb_pos_neg(x):
        p,n = x.split("_")
        return int(p)
    table = table.loc[table["num_pos_neg"].apply(eq_pos_neg)]
    table["num_pos_neg"] = table["num_pos_neg"].apply(comb_pos_neg)
    # sort by number of positive items
    table = table.sort_values("num_pos_neg")
    return table

#### Amazon-beauty

In [None]:
subset_selection["data_params.name"] = "amazon_beauty"
subset_selection.pop("model.rec_model.name",None)

In [None]:
table = generate_table(subset_data(complete_data, subset_selection, subset_keep, merge_columns), rows_labels, column_labels, exp_name = "april_30", save_table=True)
table = subset_to_equal_pos_neg(table)
table

In [None]:
y_column = "NDCG_@10"
y_label = y_column.replace("_", "")

ax_instructions = {"set_xscale": {"value": "log"},
                   "set_ylabel": {"ylabel": y_label},
                   "get_xaxis": {"__kwargs__":{},
                                 "set_tick_params": {"which":"minor", "size":0}},
                   "get_xaxis": {"__kwargs__":{},
                                 "set_tick_params": {"which":"minor", "width":0}},
                   "legend": {},
                   "grid":{}}

plot_instructions = {"subplots": {"nrows": 1, "ncols": 1, "figsize": (6,4)},
                    "tight_layout":{},
                    "savefig": {"fname": "plot.pdf"},
                    "show":{}}

In [None]:
x_column = main_x_column
x_label = main_x_label
plot_name = super_plot_name+"_"+subset_selection["data_params.name"]

ax_instructions["set_xticks"] = {"ticks": table[x_column].unique().astype(int), "labels": table[x_column].unique().astype(int)}
ax_instructions["set_xlabel"] = {"xlabel": x_label}
ax_instructions["legend"] = {"loc": "upper right"}

plot_instructions["savefig"] = {"fname": f'../out/img/{plot_name}.pdf'}

label_convert_dict = {}

create_plot(plot_instructions, data=table, label_columns=["model.rec_model.name"], x_column=x_column, y_column=y_column,
            ax_instructions = ax_instructions, label_convert_dict = label_convert_dict)

In [None]:
x_column = "train_flops"
x_label = "Training flops"
plot_name = super_plot_name+"_flops_"+subset_selection["data_params.name"]

ax_instructions.pop("set_xticks",None)
ax_instructions["set_xlabel"] = {"xlabel": x_label}
ax_instructions["legend"] = {"loc": "lower left"}
#ax_instructions["set_title"] = {"label": super_title}

plot_instructions["savefig"] = {"fname": f'../out/img/{plot_name}.pdf'}

label_convert_dict = {}

create_plot(plot_instructions, data=table, label_columns=["model.rec_model.name"], x_column=x_column, y_column=y_column,
            ax_instructions = ax_instructions, label_convert_dict = label_convert_dict)


In [None]:
x_column = "epoch"
x_label = "Epoch"
y_column = "val_NDCG_@10/dataloader_idx_1"
y_label = "NDCG@10"
ax_instructions["set_xlabel"] = {"xlabel": x_label}

##### SASRec

In [None]:
subset_selection["model.rec_model.name"] = "SASRec"
epoch_table = subset_data(complete_data, subset_selection, subset_keep, merge_columns)
epoch_table = subset_to_equal_pos_neg(epoch_table)

sas_table = merge_epoch_data(metrics_per_epoch, epoch_table, rows_labels)
sas_table = hotfix_table(sas_table, y_column) #HOTFIX
sas_table

In [None]:
subset_selection["model.rec_model.name"] = "GRU4Rec"
epoch_table = subset_data(complete_data, subset_selection, subset_keep, merge_columns)
epoch_table = subset_to_equal_pos_neg(epoch_table)

gru_table = merge_epoch_data(metrics_per_epoch, epoch_table, rows_labels)
gru_table = hotfix_table(gru_table, y_column) #HOTFIX
gru_table

In [None]:
plot_name = "epochs_"+super_plot_name+"_GRU_SAS_"+subset_selection["data_params.name"]

#plot_instructions["subplots"] = {"nrows": 3, "ncols": 2, "figsize": (12,12)}
plot_instructions["savefig"] = {"fname": f'../out/img/{plot_name}.pdf'}

plot_kwargs = {"markevery": 0.1}

ax_instructions.pop("set_xticks",None)
ax_instructions["legend"] = {"loc": "lower right"}
#ax_instructions["set_title"] = {"label": super_title}

label_convert_dict = {} #x:f"{x} positive item(s)" for x in [1,2,3,5,10,20,50,100]}

repeated_ax_instructions = [ax_instructions.copy() for _ in range(6)]
#repeated_ax_instructions[0]["set_ylim"] = {"bottom":0.525}
repeated_ax_instructions[1]["legend"] = {"loc": "upper left"}

repeated_ax_instructions[2]["set_xlim"] = {"left":0,"right":10}
repeated_ax_instructions[2]["set_ylim"] = {"bottom":0.28,"top":0.65}
repeated_ax_instructions[2].pop("set_xscale",None)
repeated_ax_instructions[3]["set_xlim"] = {"left":0,"right":10}
repeated_ax_instructions[3]["set_ylim"] = {"bottom":0.17,"top":0.35}
repeated_ax_instructions[3]["legend"] = {"loc": "lower left"}
repeated_ax_instructions[3].pop("set_xscale",None)


repeated_ax_instructions[4]["set_xlim"] = {"left":100,"right":600}
repeated_ax_instructions[4]["set_ylim"] = {"bottom":0.55,"top":0.7}
repeated_ax_instructions[4].pop("set_xscale",None)
repeated_ax_instructions[4]["legend"] = {"loc": "lower left"}
repeated_ax_instructions[5]["set_xlim"] = {"left":100,"right":600}
repeated_ax_instructions[5]["set_ylim"] = {"bottom":0.35,"top":0.7}
repeated_ax_instructions[5].pop("set_xscale",None)
repeated_ax_instructions[5]["legend"] = {"loc": "lower left"}


repeated_plot_kwargs = [plot_kwargs.copy() for _ in range(6)]
repeated_plot_kwargs[2]["markevery"] = repeated_plot_kwargs[3]["markevery"] = 1
repeated_plot_kwargs[4]["markevery"] = repeated_plot_kwargs[5]["markevery"] = 100

label_a = ["(a)","(b)","(c)","(d)","(e)","(f)"]
label_b = ["GRU","SAS"]*3
label_c = [" - "+x if x !="" else x for x in ["","","start","start","end","end"]]
for i,(rep_ax_instr, rep_plot_kwargs) in enumerate(zip(repeated_ax_instructions,repeated_plot_kwargs)):
    rep_plot_instr = deepcopy(plot_instructions.copy())
    rep_plot_instr["savefig"]["fname"] = plot_instructions["savefig"]["fname"].split(".pdf")[0]+"_"+str(i)+".pdf"
    rep_ax_instr["set_title"] = {"label": f"{label_a[i]} {label_b[i]}{label_c[i]}"}
    create_plot(rep_plot_instr, data=([gru_table,sas_table]*3)[i], label_columns=[main_x_column], x_column=x_column, y_column=y_column,
            ax_instructions = rep_ax_instr, label_convert_dict = label_convert_dict, plot_type="plot", plot_kwargs=rep_plot_kwargs)

#### Foursquare TKY

In [None]:
subset_selection["data_params.name"] = "foursquare-tky"
subset_selection.pop("model.rec_model.name",None)

In [None]:
table = generate_table(subset_data(complete_data, subset_selection, subset_keep, merge_columns), rows_labels, column_labels, exp_name = "april_30", save_table=True)
table = subset_to_equal_pos_neg(table)
table

In [None]:
y_column = "NDCG_@10"
y_label = y_column.replace("_", "")

ax_instructions = {"set_xscale": {"value": "log"},
                   "set_ylabel": {"ylabel": y_label},
                   "get_xaxis": {"__kwargs__":{},
                                 "set_tick_params": {"which":"minor", "size":0}},
                   "get_xaxis": {"__kwargs__":{},
                                 "set_tick_params": {"which":"minor", "width":0}},
                   "legend": {},
                   "grid":{}}

plot_instructions = {"subplots": {"nrows": 1, "ncols": 1, "figsize": (6,4)},
                    "tight_layout":{},
                    "savefig": {"fname": "plot.pdf"},
                    "show":{}}

In [None]:
x_column = main_x_column
x_label = main_x_label
plot_name = super_plot_name+"_"+subset_selection["data_params.name"]

ax_instructions["set_xticks"] = {"ticks": table[x_column].unique().astype(int), "labels": table[x_column].unique().astype(int)}
ax_instructions["set_xlabel"] = {"xlabel": x_label}
ax_instructions["legend"] = {"loc": "upper right"}

plot_instructions["savefig"] = {"fname": f'../out/img/{plot_name}.pdf'}

label_convert_dict = {}

create_plot(plot_instructions, data=table, label_columns=["model.rec_model.name"], x_column=x_column, y_column=y_column,
            ax_instructions = ax_instructions, label_convert_dict = label_convert_dict)

In [None]:
x_column = "train_flops"
x_label = "Training flops"
plot_name = super_plot_name+"_flops_"+subset_selection["data_params.name"]

ax_instructions.pop("set_xticks",None)
ax_instructions["set_xlabel"] = {"xlabel": x_label}
ax_instructions["legend"] = {"loc": "lower left"}
#ax_instructions["set_title"] = {"label": super_title}

plot_instructions["savefig"] = {"fname": f'../out/img/{plot_name}.pdf'}

label_convert_dict = {}

create_plot(plot_instructions, data=table, label_columns=["model.rec_model.name"], x_column=x_column, y_column=y_column,
            ax_instructions = ax_instructions, label_convert_dict = label_convert_dict)


In [None]:
x_column = "epoch"
x_label = "Epoch"
y_column = "val_NDCG_@10/dataloader_idx_1"
y_label = "NDCG@10"
ax_instructions["set_xlabel"] = {"xlabel": x_label}

##### SASRec

In [None]:
subset_selection["model.rec_model.name"] = "SASRec"
epoch_table = subset_data(complete_data, subset_selection, subset_keep, merge_columns)
epoch_table = subset_to_equal_pos_neg(epoch_table)

sas_table = merge_epoch_data(metrics_per_epoch, epoch_table, rows_labels)
sas_table = hotfix_table(sas_table, y_column) #HOTFIX
sas_table

In [None]:
subset_selection["model.rec_model.name"] = "GRU4Rec"
epoch_table = subset_data(complete_data, subset_selection, subset_keep, merge_columns)
epoch_table = subset_to_equal_pos_neg(epoch_table)

gru_table = merge_epoch_data(metrics_per_epoch, epoch_table, rows_labels)
gru_table = hotfix_table(gru_table, y_column) #HOTFIX
gru_table

In [None]:
plot_name = "epochs_"+super_plot_name+"_GRU_SAS_"+subset_selection["data_params.name"]

#plot_instructions["subplots"] = {"nrows": 3, "ncols": 2, "figsize": (12,12)}
plot_instructions["savefig"] = {"fname": f'../out/img/{plot_name}.pdf'}

plot_kwargs = {"markevery": 0.1}

ax_instructions.pop("set_xticks",None)
ax_instructions["legend"] = {"loc": "lower right"}
#ax_instructions["set_title"] = {"label": super_title}

label_convert_dict = {} #x:f"{x} positive item(s)" for x in [1,2,3,5,10,20,50,100]}

repeated_ax_instructions = [ax_instructions.copy() for _ in range(6)]
repeated_ax_instructions[0]["set_ylim"] = {"bottom":0.3}


repeated_ax_instructions[2]["set_xlim"] = {"left":0,"right":10}
repeated_ax_instructions[2]["set_ylim"] = {"bottom":0.2,"top":0.45}
repeated_ax_instructions[2].pop("set_xscale",None)
repeated_ax_instructions[3]["set_xlim"] = {"left":0,"right":10}
repeated_ax_instructions[3]["set_ylim"] = {"bottom":0.15,"top":0.33}
repeated_ax_instructions[3].pop("set_xscale",None)
repeated_ax_instructions[3]["legend"] = {"loc": "upper left"}


repeated_ax_instructions[4]["set_xlim"] = {"left":100,"right":600}
repeated_ax_instructions[4]["set_ylim"] = {"bottom":0.61,"top":0.77}
repeated_ax_instructions[4].pop("set_xscale",None)
repeated_ax_instructions[5]["set_xlim"] = {"left":100,"right":600}
repeated_ax_instructions[5]["set_ylim"] = {"bottom":0.48,"top":0.8}
repeated_ax_instructions[5].pop("set_xscale",None)


repeated_plot_kwargs = [plot_kwargs.copy() for _ in range(6)]
repeated_plot_kwargs[2]["markevery"] = repeated_plot_kwargs[3]["markevery"] = 1
repeated_plot_kwargs[4]["markevery"] = repeated_plot_kwargs[5]["markevery"] = 100

label_a = ["(a)","(b)","(c)","(d)","(e)","(f)"]
label_b = ["GRU","SAS"]*3
label_c = [" - "+x if x !="" else x for x in ["","","start","start","end","end"]]
for i,(rep_ax_instr, rep_plot_kwargs) in enumerate(zip(repeated_ax_instructions,repeated_plot_kwargs)):
    rep_plot_instr = deepcopy(plot_instructions.copy())
    rep_plot_instr["savefig"]["fname"] = plot_instructions["savefig"]["fname"].split(".pdf")[0]+"_"+str(i)+".pdf"
    rep_ax_instr["set_title"] = {"label": f"{label_a[i]} {label_b[i]}{label_c[i]}"}
    create_plot(rep_plot_instr, data=([gru_table,sas_table]*3)[i], label_columns=[main_x_column], x_column=x_column, y_column=y_column,
            ax_instructions = rep_ax_instr, label_convert_dict = label_convert_dict, plot_type="plot", plot_kwargs=rep_plot_kwargs)

#### ML-1M

In [None]:
subset_selection["data_params.name"] = "ml-1m"
subset_selection.pop("model.rec_model.name",None)

In [None]:
table = generate_table(subset_data(complete_data, subset_selection, subset_keep, merge_columns), rows_labels, column_labels, exp_name = "april_30", save_table=True)
table = subset_to_equal_pos_neg(table)
table

In [None]:
x_column = main_x_column
x_label = main_x_label
y_column = "NDCG_@10"
y_label = y_column.replace("_", "")

ax_instructions = {"set_xscale": {"value": "log"},
                   "set_ylabel": {"ylabel": y_label},
                   "get_xaxis": {"__kwargs__":{},
                                 "set_tick_params": {"which":"minor", "size":0}},
                   "get_xaxis": {"__kwargs__":{},
                                 "set_tick_params": {"which":"minor", "width":0}},
                   "legend": {},
                   "grid":{}}

plot_instructions = {"subplots": {"nrows": 1, "ncols": 1, "figsize": (6,4)},
                    "tight_layout":{},
                    "savefig": {"fname": "plot.pdf"},
                    "show":{}}

In [None]:
plot_name = super_plot_name+"_"+subset_selection["data_params.name"]

ax_instructions["set_xticks"] = {"ticks": table[x_column].unique().astype(int), "labels": table[x_column].unique().astype(int)}
ax_instructions["set_xlabel"] = {"xlabel": x_label}
ax_instructions["legend"] = {"loc": "upper right"}

plot_instructions["savefig"] = {"fname": f'../out/img/{plot_name}.pdf'}

label_convert_dict = {}

create_plot(plot_instructions, data=table, label_columns=["model.rec_model.name"], x_column=x_column, y_column=y_column,
            ax_instructions = ax_instructions, label_convert_dict = label_convert_dict)

In [None]:
x_column = "train_flops"
x_label = "Training flops"
plot_name = super_plot_name+"_flops_"+subset_selection["data_params.name"]

ax_instructions.pop("set_xticks",None)
ax_instructions["set_xlabel"] = {"xlabel": x_label}
ax_instructions["legend"] = {"loc": "lower left"}
#ax_instructions["set_title"] = {"label": super_title}

plot_instructions["savefig"] = {"fname": f'../out/img/{plot_name}.pdf'}

label_convert_dict = {}

create_plot(plot_instructions, data=table, label_columns=["model.rec_model.name"], x_column=x_column, y_column=y_column,
            ax_instructions = ax_instructions, label_convert_dict = label_convert_dict)


In [None]:
x_column = "epoch"
x_label = "Epoch"
y_column = "val_NDCG_@10/dataloader_idx_1"
y_label = "NDCG@10"
ax_instructions["set_xlabel"] = {"xlabel": x_label}

##### SASRec

In [None]:
subset_selection["model.rec_model.name"] = "SASRec"
epoch_table = subset_data(complete_data, subset_selection, subset_keep, merge_columns)
epoch_table = subset_to_equal_pos_neg(epoch_table)

sas_table = merge_epoch_data(metrics_per_epoch, epoch_table, rows_labels)
sas_table = hotfix_table(sas_table, y_column) #HOTFIX
sas_table

In [None]:
subset_selection["model.rec_model.name"] = "GRU4Rec"
epoch_table = subset_data(complete_data, subset_selection, subset_keep, merge_columns)
epoch_table = subset_to_equal_pos_neg(epoch_table)

gru_table = merge_epoch_data(metrics_per_epoch, epoch_table, rows_labels)
gru_table = hotfix_table(gru_table, y_column) #HOTFIX
gru_table

In [None]:
plot_name = "epochs_"+super_plot_name+"_GRU_SAS_"+subset_selection["data_params.name"]

#plot_instructions["subplots"] = {"nrows": 3, "ncols": 2, "figsize": (12,12)}
plot_instructions["savefig"] = {"fname": f'../out/img/{plot_name}.pdf'}

plot_kwargs = {"markevery": 0.1}

ax_instructions.pop("set_xticks",None)
ax_instructions["legend"] = {"loc": "upper left"}
#ax_instructions["set_title"] = {"label": super_title}

label_convert_dict = {} #x:f"{x} positive item(s)" for x in [1,2,3,5,10,20,50,100]}

repeated_ax_instructions = [ax_instructions.copy() for _ in range(6)]
repeated_ax_instructions[2]["set_xlim"] = {"left":0,"right":10}
repeated_ax_instructions[2]["set_ylim"] = {"bottom":0.2,"top":0.42}
repeated_ax_instructions[2].pop("set_xscale",None)
repeated_ax_instructions[3]["set_xlim"] = {"left":0,"right":10}
repeated_ax_instructions[3]["set_ylim"] = {"bottom":0.05,"top":0.25}
repeated_ax_instructions[3].pop("set_xscale",None)


repeated_ax_instructions[4]["set_xlim"] = {"left":100,"right":600}
repeated_ax_instructions[4]["set_ylim"] = {"bottom":0.2,"top":0.57}
repeated_ax_instructions[4].pop("set_xscale",None)
repeated_ax_instructions[4]["legend"] = {"loc":"lower right"}
repeated_ax_instructions[5]["set_xlim"] = {"left":100,"right":600}
repeated_ax_instructions[5]["set_ylim"] = {"bottom":0.05,"top":0.6}
repeated_ax_instructions[5].pop("set_xscale",None)
repeated_ax_instructions[5]["legend"] = {"loc":"lower right"}


repeated_plot_kwargs = [plot_kwargs.copy() for _ in range(6)]
repeated_plot_kwargs[2]["markevery"] = repeated_plot_kwargs[3]["markevery"] = 1
repeated_plot_kwargs[4]["markevery"] = repeated_plot_kwargs[5]["markevery"] = 100

label_a = ["(a)","(b)","(c)","(d)","(e)","(f)"]
label_b = ["GRU","SAS"]*3
label_c = [" - "+x if x !="" else x for x in ["","","start","start","end","end"]]
for i,(rep_ax_instr, rep_plot_kwargs) in enumerate(zip(repeated_ax_instructions,repeated_plot_kwargs)):
    rep_plot_instr = deepcopy(plot_instructions.copy())
    rep_plot_instr["savefig"]["fname"] = plot_instructions["savefig"]["fname"].split(".pdf")[0]+"_"+str(i)+".pdf"
    rep_ax_instr["set_title"] = {"label": f"{label_a[i]} {label_b[i]}{label_c[i]}"}
    create_plot(rep_plot_instr, data=([gru_table,sas_table]*3)[i], label_columns=[main_x_column], x_column=x_column, y_column=y_column,
            ax_instructions = rep_ax_instr, label_convert_dict = label_convert_dict, plot_type="plot", plot_kwargs=rep_plot_kwargs)