In [1]:
DATA_DIR = 'data/'
RESULT_DIR = 'results/original_vs_updated/'
import os
# Create the directories
os.makedirs(DATA_DIR, exist_ok=True)
os.makedirs(RESULT_DIR, exist_ok=True)

In [2]:
import pandas as pd 
import wandb
from dotenv import load_dotenv
load_dotenv(override=True)
api = wandb.Api()


# Project is specified by <entity/project-name>
entity = os.getenv("WANDB_ENTITY", "dl2-2024")
project = os.getenv("WANDB_PROJECT", "dl-2024")
runs = api.runs(f"{entity}/{project}")

summary_list, config_list, name_list = [], [], []
for run in runs: 
    # .summary contains the output keys/values for metrics like accuracy.
    #  We call ._json_dict to omit large files 
    summary_list.append(run.summary._json_dict)
    summary_list[-1]['tags'] = run.tags

    # .config contains the hyperparameters.
    #  We remove special values that start with _.
    config_list.append(
        {k: v for k,v in run.config.items()
          if not k.startswith('_')})

    # .name is the human-readable name of the run.
    name_list.append(run.name)

runs_df = pd.DataFrame({
    "summary": summary_list,
    "config": config_list,
    "name": name_list
    })

runs_df.to_csv(DATA_DIR + "project.csv")

In [3]:
import pandas as pd
from tabulate import tabulate

# Read the CSV file
runs_df = pd.read_csv(DATA_DIR + "project.csv")

# Extract and flatten the summary and config columns
summary_df = pd.json_normalize(runs_df['summary'].apply(eval))
config_df = pd.json_normalize(runs_df['config'].apply(eval))

# Combine the flattened summary and config data
results_df = pd.concat([runs_df[['name']], config_df, summary_df], axis=1)

code_full_val = results_df[results_df['tags'].apply(lambda x: x == ['equivariant features'])]

# Function to get the specified metric for each dataset, model, and transformation combination
def get_metric(df, dataset_name, model_name, transformation, method, metric):
    result = df[(df['dataset_name'] == dataset_name) & 
                (df['model_name'] == model_name) & 
                (df['data_transformations'] == transformation) &
                (df['method'] == method)]
    # print(result.head())
    result = result[metric]

    return round(result.iloc[0],2) if not result.empty else None

datasets = ['CIFAR100']  # Add your datasets here
metrics = ["val_top1_acc", "final_top1_acc"]  
transformations = ['rot90']
methods = ['equitune', 'attention']
architectures = ['RN50']  # Add more architectures if needed

# Create a summary DataFrame for each dataset
for i, dataset in enumerate(datasets):
    summary_data_full_val = {
        "Method": [],
        "Architecture-Transformation": [],
        "Prefinetune Top1 Acc": [],
    }

    # Populate the summary data
    for method in methods:
        combination = f"CLIP w {architectures[0]} - {transformations[0]}"
        
        summary_data_full_val["Method"].append(method)
        summary_data_full_val["Architecture-Transformation"].append(combination)
        summary_data_full_val["Prefinetune Top1 Acc"].append(
            get_metric(code_full_val, dataset, architectures[0], transformations[0], method, metrics[0]))

    summary_df_full_val = pd.DataFrame(summary_data_full_val)
    markdown_table_full_val = tabulate(summary_df_full_val, headers='keys', tablefmt='pipe')
    # Save the markdown table to a file
    with open(f"{RESULT_DIR}{dataset}_equitune_vs_attention.md", "w") as file:
        file.write(markdown_table_full_val)
    print(markdown_table_full_val)

|    | Method    | Architecture-Transformation   |   Prefinetune Top1 Acc |
|---:|:----------|:------------------------------|-----------------------:|
|  0 | equitune  | CLIP w RN50 - rot90           |                  40.95 |
|  1 | attention | CLIP w RN50 - rot90           |                  40.65 |
