In [None]:
import numpy as np
import pandas as pd
from sklearn import metrics
from typing import Tuple, Dict
import plotly.express as px
import plotly.graph_objects as go


In [None]:
DATASET_LABEL = 'dataset'
ORIENTATION_LABEL = 'orientation'
SAME_LABEL = 'same'
SAMPLE_IDX_LABEL = 'sample_idx'

In [None]:
def formalize_pair_type(pairs_list: pd.DataFrame) -> pd.DataFrame:
    """
    given a pairs dist list, with a tupe column like "dataset_name_orientation_same_idx"
    breaks the type to columns:
        ['dataset'] = dataset_name
        ['orientation'] = orientation
        ['same'] = True/False
        ['sample_idx'] = idx
    """
    pairs_list[DATASET_LABEL] = pairs_list['type'].str.split('_').apply(lambda row: '_'.join(row[:-3]))
    pairs_list[ORIENTATION_LABEL] = pairs_list['type'].str.split('_').apply(lambda row: row[-3])
    pairs_list[SAME_LABEL] = pairs_list['type'].str.split('_').apply(lambda row: row[-2] != 'same')
    pairs_list[SAMPLE_IDX_LABEL] = pairs_list['type'].str.split('_').apply(lambda row: row[-1]) # set the batch sample idx
    return pairs_list

In [None]:
def get_aucs(df: pd.DataFrame) -> Tuple[pd.Series, pd.DataFrame]:
    """
    Given the distance lists, calculate the AUCS
    returns (series of the AUCs, mean and stds of aucs in different condition)
    """
    def roc_auc(df: pd.DataFrame):
        label = df[SAME_LABEL]
        score = df['fc7']
        return metrics.roc_auc_score(label, score)
    aucs = df.groupby([DATASET_LABEL, ORIENTATION_LABEL, SAMPLE_IDX_LABEL]).apply(lambda df: roc_auc(df))
    means = aucs.groupby([DATASET_LABEL, ORIENTATION_LABEL]).mean()
    stds = aucs.groupby([DATASET_LABEL, ORIENTATION_LABEL]).std()
    summary = pd.DataFrame({'means': means, 'std': stds})
    return aucs, summary

In [None]:
def get_finetune_curve(df_paths: Dict[str, str]) -> Tuple[pd.DataFrame, pd.DataFrame]:
    """
    Iterates the distance lists and return the AUCs
    """
    dfs = {}
    total_summary = []
    models_aucs_series = {}

    for model in df_paths:
        df = pd.read_csv(df_paths[model])
        df = formalize_pair_type(df)
        dfs[model] = df
        curr_aucs, curr_summary = get_aucs(df)
        models_aucs_series[model] = curr_aucs
        curr_summary['model'] = model
        total_summary.append(curr_summary)

    models_aucs = pd.DataFrame(models_aucs_series)
    total_summary = pd.concat(total_summary)
    return models_aucs, total_summary

In [None]:
fig = go.Figure()

sociable_weavers_faces_finetune = {
    'conv1': '/home/hdd_storage/mlflow/artifact_store/sociable_weavers_faces_finetuning_measurements/376340381f3646fcaf8499c81d64201b/artifacts/dists.csv',
    'conv2': '/home/hdd_storage/mlflow/artifact_store/sociable_weavers_faces_finetuning_measurements/0a18813df60c43e09765b54cf800ff2d/artifacts/dists.csv',
    'conv3': '/home/hdd_storage/mlflow/artifact_store/sociable_weavers_faces_finetuning_measurements/8c2d243410fe432ebe13114830f240d1/artifacts/dists.csv',
    'conv4': '/home/hdd_storage/mlflow/artifact_store/sociable_weavers_faces_finetuning_measurements/0fbdf3c231b641b98d4edd47ebe38b4e/artifacts/dists.csv',
    'conv5': '/home/hdd_storage/mlflow/artifact_store/sociable_weavers_faces_finetuning_measurements/fb3a76d3ba164abea3f07e0792d3d038/artifacts/dists.csv',
    'fc6': '/home/hdd_storage/mlflow/artifact_store/sociable_weavers_faces_finetuning_measurements/64dffc3f7fbd477aa7f611d29529e239/artifacts/dists.csv',
    'fc7': '/home/hdd_storage/mlflow/artifact_store/sociable_weavers_faces_finetuning_measurements/c5406940d5ea441fa5960226c233f67a/artifacts/dists.csv',
    'pretrained': '/home/hdd_storage/mlflow/artifact_store/pretraining_measurements/74153253d5b44b6db53b6746b3279eb9/artifacts/dists.csv',
}
soc_weav_faces_aucs, soc_weav_faces_total_summary = get_finetune_curve(sociable_weavers_faces_finetune)
fig.add_trace(go.Scatter(
    x=soc_weav_faces_total_summary.loc['sociable_weavers', 'upright']['model'], 
    y=soc_weav_faces_total_summary.loc['sociable_weavers', 'upright']['means'],
    name='Sociable weavers, faces finetuning',
    line=dict(color='gray'),
    error_y=dict(
            type='data', # value of error bar given in data coordinates
            array=soc_weav_faces_total_summary.loc['sociable_weavers', 'upright']['std'],
            visible=True)
))


sociable_weavers_objects_finetune = {
    'conv1': '/home/hdd_storage/mlflow/artifact_store/sociable_weavers_objects_finetuning_measurements/d6406d9b73ef44379fabcab0bbcf733b/artifacts/dists.csv',
    'conv2': '/home/hdd_storage/mlflow/artifact_store/sociable_weavers_objects_finetuning_measurements/f61f99ce1a524a17913b1de217eae7e2/artifacts/dists.csv',
    'conv3': '/home/hdd_storage/mlflow/artifact_store/sociable_weavers_objects_finetuning_measurements/5451cb42360b4330bb318d49fc9f62e4/artifacts/dists.csv',
    'conv4': '/home/hdd_storage/mlflow/artifact_store/sociable_weavers_objects_finetuning_measurements/47ffc4c68e8744a9ba2c684ee7d6a414/artifacts/dists.csv',
    'conv5': '/home/hdd_storage/mlflow/artifact_store/sociable_weavers_objects_finetuning_measurements/8ad9503f98d344ada51bbd6be3e6b0e7/artifacts/dists.csv',
    'fc6': '/home/hdd_storage/mlflow/artifact_store/sociable_weavers_objects_finetuning_measurements/33006110c14a46a3813e71495060e504/artifacts/dists.csv',
    'fc7': '/home/hdd_storage/mlflow/artifact_store/sociable_weavers_objects_finetuning_measurements/b35eee70e48d4895b69f3a5572ee72c3/artifacts/dists.csv',
    'pretrained': '/home/hdd_storage/mlflow/artifact_store/pretraining_measurements/922fe622e13a4e97b5aaada805cf708a/artifacts/dists.csv',
}
soc_weav_objects_aucs, soc_weav_objects_total_summary = get_finetune_curve(sociable_weavers_objects_finetune)
fig.add_trace(go.Scatter(
    x=soc_weav_objects_total_summary.loc['sociable_weavers', 'upright']['model'], 
    y=soc_weav_objects_total_summary.loc['sociable_weavers', 'upright']['means'],
    name='Sociable weavers, objects finetuning',
    line=dict(color='#ff7f0e'),
    error_y=dict(
            type='data', # value of error bar given in data coordinates
            array=soc_weav_objects_total_summary.loc['sociable_weavers', 'upright']['std'],
            visible=True)
))



fig.update_layout(showlegend=True)

fig.update_yaxes(range=[0.5, 1])
fig.write_html('/home/ssd_storage/experiments/Expertise/experiment1/sociable_weavers_finetune_curve.html')
fig.show()

In [None]:
fig = go.Figure()

species_faces_finetune = {
    'conv1': '/home/hdd_storage/mlflow/artifact_store/species_faces_finetuning_measurements/487468fcc70345fd8c74e1a5e83de5e9/artifacts/dists.csv',
    'conv2': '/home/hdd_storage/mlflow/artifact_store/species_faces_finetuning_measurements/7fcda0b2938c460880c576f1167c4a82/artifacts/dists.csv',
    'conv3': '/home/hdd_storage/mlflow/artifact_store/species_faces_finetuning_measurements/58c4a259975b4a258a9b0e246c7a1aee/artifacts/dists.csv',
    'conv4': '/home/hdd_storage/mlflow/artifact_store/species_faces_finetuning_measurements/19dd0c1f89d54f3f8b24d15658595fd5/artifacts/dists.csv',
    'conv5': '/home/hdd_storage/mlflow/artifact_store/species_faces_finetuning_measurements/292cc3b9beee4ea09ca44718fb51d5aa/artifacts/dists.csv',
    'fc6': '/home/hdd_storage/mlflow/artifact_store/species_faces_finetuning_measurements/d159a6d91c3043cd9acebf79ce7be63f/artifacts/dists.csv',
    'fc7': '/home/hdd_storage/mlflow/artifact_store/species_faces_finetuning_measurements/e515f72f97984e9086dab43d42554931/artifacts/dists.csv',
    'pretrained': '/home/hdd_storage/mlflow/artifact_store/pretraining_measurements/74153253d5b44b6db53b6746b3279eb9/artifacts/dists.csv',
}
species_faces_aucs, species_faces_total_summary = get_finetune_curve(species_faces_finetune)

fig.add_trace(go.Scatter(
    x=species_faces_total_summary.loc['species', 'upright']['model'], 
    y=species_faces_total_summary.loc['species', 'upright']['means'],
    name='Species, faces finetuning',
    line=dict(color='gray'),
    error_y=dict(
            type='data', # value of error bar given in data coordinates
            array=soc_weav_faces_total_summary.loc['species', 'upright']['std'],
            visible=True)
))
fig.update_layout(showlegend=True)


species_objects_finetune = {
    'conv1': '/home/hdd_storage/mlflow/artifact_store/species_objects_finetuning_measurements/114427021539414183da0842087c5705/artifacts/dists.csv',
    'conv2': '/home/hdd_storage/mlflow/artifact_store/species_objects_finetuning_measurements/77d97872b26a427fa0af28bbd9944ede/artifacts/dists.csv',
    'conv3': '/home/hdd_storage/mlflow/artifact_store/species_objects_finetuning_measurements/2244db729add488f9db5d8d6bd0d94c3/artifacts/dists.csv',
    'conv4': '/home/hdd_storage/mlflow/artifact_store/species_objects_finetuning_measurements/e309480547554ef99cffbb559d6b6a32/artifacts/dists.csv',
    'conv5': '/home/hdd_storage/mlflow/artifact_store/species_objects_finetuning_measurements/15895d513da04213b3f36f9a79caceec/artifacts/dists.csv',
    'fc6': '/home/hdd_storage/mlflow/artifact_store/species_objects_finetuning_measurements/04295fe41594443d9a5f847584366a81/artifacts/dists.csv',
    'fc7': '/home/hdd_storage/mlflow/artifact_store/species_objects_finetuning_measurements/e975778fba534804bec8ec1be7091e1a/artifacts/dists.csv',
    'pretrained': '/home/hdd_storage/mlflow/artifact_store/pretraining_measurements/922fe622e13a4e97b5aaada805cf708a/artifacts/dists.csv',
}
species_objects_aucs, species_objects_total_summary = get_finetune_curve(species_objects_finetune)
fig.add_trace(go.Scatter(
    x=species_objects_total_summary.loc['species', 'upright']['model'], 
    y=species_objects_total_summary.loc['species', 'upright']['means'],
    name='Species, objects finetuning',
    line=dict(color='#ff7f0e'),
    error_y=dict(
            type='data', # value of error bar given in data coordinates
            array=species_objects_total_summary.loc['species', 'upright']['std'] * 2,
            visible=True)
))


fig.update_yaxes(range=[0.5, 1])
fig.update_layout(showlegend=True)

fig.write_html('/home/ssd_storage/experiments/Expertise/experiment1/species_finetune_curve.html')
fig.show()

In [None]:
soc_weav_faces_total_summary.to_csv('/home/ssd_storage/experiments/Expertise/experiment1/soc_weav_faces_summary.csv')
soc_weav_objects_total_summary.to_csv('/home/ssd_storage/experiments/Expertise/experiment1/soc_weav_inanimates_summary.csv')
species_faces_total_summary.to_csv('/home/ssd_storage/experiments/Expertise/experiment1/species_faces_summary.csv')
species_objects_total_summary.to_csv('/home/ssd_storage/experiments/Expertise/experiment1/species_inanimates_summary.csv')

soc_weav_faces_aucs.to_csv('/home/ssd_storage/experiments/Expertise/experiment1/soc_weav_faces_aucs.csv')
soc_weav_objects_aucs.to_csv('/home/ssd_storage/experiments/Expertise/experiment1/soc_weav_inanimates_aucs.csv')
species_faces_aucs.to_csv('/home/ssd_storage/experiments/Expertise/experiment1/species_faces_aucs.csv')
species_objects_aucs.to_csv('/home/ssd_storage/experiments/Expertise/experiment1/species_inanimates_aucs.csv')

soc_weav_faces_aucs['domain'] = 'soc_weav'
soc_weav_objects_aucs['domain'] = 'soc_weav'
soc_weav_faces_aucs['pretraining'] = 'faces'
soc_weav_objects_aucs['pretraining'] = 'inanimates'

species_faces_aucs['domain'] = 'species'
species_objects_aucs['domain'] = 'species'
species_faces_aucs['pretraining'] = 'faces'
species_objects_aucs['pretraining'] = 'inanimates'


In [None]:
pd.concat([species_faces_aucs, species_objects_aucs, soc_weav_faces_aucs, soc_weav_objects_aucs]).to_csv('/home/ssd_storage/experiments/Expertise/experiment1/finetune_curve_aucs.csv')

In [None]:
pd.concat([species_faces_aucs, species_objects_aucs]).to_csv('/home/ssd_storage/experiments/Expertise/experiment1/species_finetune_curve_aucs.csv')
pd.concat([soc_weav_faces_aucs, soc_weav_objects_aucs]).to_csv('/home/ssd_storage/experiments/Expertise/experiment1/soc_weav_finetune_curve_aucs.csv')