In [1]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
from sklearn.svm import SVC
import torch, torch_geometric
import wandb
import utils
from IPython.display import clear_output
import json
import math

from sklearn.manifold import TSNE
from sklearn.decomposition import PCA

  from .autonotebook import tqdm as notebook_tqdm


In [106]:
def get_model_type(x):
    if isinstance(x, str) and x.startswith("1WL+NN"): return "1-WL+NN"
    elif isinstance(x, str) and (x.startswith("GAT") or x.startswith("GIN") or x.startswith("GCN")): return "GNN"
    else: return pd.NA

def get_pool_type(x):
    if isinstance(x, str) and x.endswith("Embedding-Sum"): return "Embedding:Sum"
    elif isinstance(x, str) and x.endswith("Embedding-Max"): return "Embedding:Max"
    elif isinstance(x, str) and x.endswith("Embedding-Mean"): return "Embedding:Mean"
    elif  isinstance(x, str) and x.endswith("Sum"): return "Sum"
    elif isinstance(x, str) and x.endswith("Max"): return "Max"
    elif isinstance(x, str) and x.endswith("Mean"): return "Mean"
    elif isinstance(x, str) and x.endswith("Set2Set"): return "Set2Set"
    else: return pd.NA

api = wandb.Api()

# Project is specified by <entity/project-name>
runs = api.runs("eric-bill/BachelorThesisExperiments")

data = []
for run in runs:
    if run.state == "finished":
        data.append(run.summary._json_dict | run.config | {'config' : str(run._attrs['config'])} |
                    {'Tags' : run.tags[0] if len(run.tags) > 0 else pd.NA} | {'name' : run.name, 'run_id' : run.id})

runs_df = pd.DataFrame(data)
runs_df['model_type'] = runs_df['model'].apply(get_model_type)
runs_df['pool_type'] = runs_df['model'].apply(get_pool_type)
runs_df.to_csv("project.csv")

In [183]:
list(run._attrs['rawconfig'])

['_wandb',
 'lr',
 'act',
 'k_wl',
 'norm',
 'seed',
 'model',
 'device',
 'k_fold',
 'dataset',
 'dropout',
 'batch_size',
 'max_epochs',
 'num_layers',
 'embedding_dim',
 'wl_convergence',
 'hidden_channels']

In [178]:
list(run._attrs)

['id',
 'tags',
 'name',
 'displayName',
 'sweepName',
 'state',
 'config',
 'group',
 'jobType',
 'commit',
 'readOnly',
 'createdAt',
 'heartbeatAt',
 'description',
 'notes',
 'systemMetrics',
 'summaryMetrics',
 'historyLineCount',
 'user',
 'historyKeys',
 'rawconfig']

In [174]:
run._attrs['historyKeys']

{'sets': [],
 'keys': {'system/cpu.1.cpu_percent': {'typeCounts': [{'type': 'number',
     'count': 12}]},
  'test_accuracy_std': {'typeCounts': [{'type': 'number', 'count': 1}],
   'monotonic': True,
   'previousValue': 4.396318435668945},
  'val_accuracy_std': {'typeCounts': [{'type': 'number', 'count': 1}],
   'monotonic': True,
   'previousValue': 3.678579807281494},
  'system/cpu.42.cpu_percent': {'typeCounts': [{'type': 'number',
     'count': 12}]},
  'system/cpu.44.cpu_percent': {'typeCounts': [{'type': 'number',
     'count': 12}]},
  'system/cpu.7.cpu_percent': {'typeCounts': [{'type': 'number',
     'count': 12}]},
  'system/cpu.16.cpu_percent': {'typeCounts': [{'type': 'number',
     'count': 12}]},
  'system/gpu.process.0.temp': {'typeCounts': [{'type': 'number',
     'count': 12}]},
  'train_accuracy': {'typeCounts': [{'type': 'number', 'count': 1}],
   'monotonic': True,
   'previousValue': 74.59176635742188},
  'system/cpu.43.cpu_percent': {'typeCounts': [{'type': 'numb

In [132]:
def get_acc_and_std(dataset, group_col):
    data = []

    for (grouping_name, grouping_df) in dataset.groupby(group_col):
        best_idx = grouping_df['test_accuracy'].idxmax()
        
        if math.isnan(best_idx):
            continue
        
        acc = grouping_df.loc[best_idx, 'test_accuracy']
        std = grouping_df.loc[best_idx, 'test_accuracy_std']
        result = f"{acc:.2f} \u00B1 {std:.2f}"

        data.append({group_col[i] : grouping_name[i] for i in range(len(group_col))} |
                     {'result' : result, 'run_id' : grouping_df.loc[best_idx, 'run_id']})
    
    return pd.DataFrame(data)

In [151]:
latex_table = get_acc_and_std(runs_df, group_col=['model_type', 'pool_type', 'dataset'])
latex_table = latex_table.pivot_table(index=['model_type', 'pool_type'], 
                                      columns='dataset', values=['run_id'], aggfunc=lambda x: x)
latex_table

Unnamed: 0_level_0,Unnamed: 1_level_0,run_id,run_id,run_id,run_id
Unnamed: 0_level_1,dataset,ENZYMES,IMDB-BINARY,NCI1,PROTEINS
model_type,pool_type,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
1-WL+NN,Embedding:Max,6950m9q6,mwbkokfq,fw4dl94v,loy2l5ey
1-WL+NN,Embedding:Mean,j4708nh9,37rb1q4m,41i9x3vz,sbxjcjvk
1-WL+NN,Embedding:Sum,fa4e4536,7x2kimsa,yhuvjjrt,usr9vhs2
1-WL+NN,Sum,gxafl3qe,,,
GNN,Max,yhnbpgak,hcc8z9uj,11dzui0u,ijhtz1l5
GNN,Mean,a2syk120,r06n1885,xlya5a4n,s1bpuxrk
GNN,Sum,ca9hfi7v,g4au42cp,51wo6rf8,47647jja


In [None]:
latex_table.to_latex("table.tex", escape=True, multirow=True, multicolumn=True, multicolumn_format='c', bold_rows=False)