# Data Extraction

In [1]:
import pandas as pd

seeds = []

stats = {}
base_conditions = {}
predicted_conditions = {}

def build_path(seed, dataset):
    data_path = '../../data/xdpole'
    return f'{data_path}/{dataset}/{seed}_run.csv'

for seed in seeds:
    stats_path = build_path(seed, 'curriculumstats')
    stats[seed] = pd.read_csv(stats_path)

    base_conditions_path = build_path(seed, 'baseconditions')
    base_conditions[seed] = pd.read_csv(base_conditions_path)

    predicted_conditions_path = build_path(seed, 'predictedbaseconditions')
    predicted_conditions[seed] = pd.read_csv(predicted_conditions_path)


# Data Transformation

In [2]:
import numpy as np
import warnings
warnings.filterwarnings('ignore')

def get_column_mean(column, data):
    m = np.mean([df[column] for df in data.values()])
    return m[~np.isnan(m)]

# Graphs

In [3]:
import plotly.graph_objects as go
import seaborn as sns

sns.color_palette("mako", as_cmap=True)

def score_graph(df, threshold=0.7):
    fg = go.Figure(
        layout=go.Layout(title=f'Specialist Score X Generation {seeds}')
    )

    fg.add_trace(
        go.Scatter(
            x=df.gen,
            y=[threshold for i in range(len(df.gen))],
            fill='tozeroy',
            name='Threshold',
            mode='none',
        ),
    )
    fg.add_trace(
        go.Scatter(
            x=df.gen,
            y=df.score,
            mode='lines',
            name='Specialist Score Mean',
        ),
    )

    fg.update_xaxes(title_text='Generation')
    fg.update_yaxes(title_text='Score')
    fg.show()

def cm_metrics_graph(df):
    fg = go.Figure(
        layout=go.Layout(title=f'Confusion Matrix Metrics X Generation {seeds}')
    )

    fg.add_trace(
        go.Scatter(
            x=df.gen,
            y=df.cm_true_positive,
            mode='lines',
            name='True Positive'
        ),
    )
    fg.add_trace(
        go.Scatter(
            x=df.gen,
            y=df.cm_false_positive,
            mode='lines',
            name='False Positive'
        ),
    )
    fg.add_trace(
        go.Scatter(
            x=df.gen,
            y=df.cm_true_negative,
            mode='lines',
            name='True Negative'
        ),
    )
    fg.add_trace(
        go.Scatter(
            x=df.gen,
            y=df.cm_false_negative,
            mode='lines',
            name='False Negative'
        ),
    )

    fg.update_xaxes(title_text='Generation')
    fg.update_yaxes(title_text='Number of Labels')
    fg.show()

def cm_analysis_graph(df, threshold=0.7):
    fg = go.Figure(
        layout=go.Layout(title=f'Confusion Matrix Analysis {seeds}')
    )

    fg.add_trace(
        go.Scatter(
            x=df.gen,
            y=[threshold for i in range(len(df.gen))],
            fill='tozeroy',
            name='Threshold',
            mode='lines',
            line_color='grey',
        ),
    )
    fg.add_trace(
        go.Scatter(
            x=df.gen,
            y=df.score,
            mode='lines',
            name='Score',
        ),
    )
    fg.add_trace(
        go.Scatter(
            x=df.gen,
            y=df.good_labels,
            mode='lines',
            name='Good Labels'
        ),
    )
    fg.add_trace(
        go.Scatter(
            x=df.gen,
            y=df.bad_labels,
            mode='lines',
            name='Bad Labels'
        ),
    )

    fg.update_xaxes(title_text='Generation')
    fg.update_yaxes(title_text='Number of Labels')
    fg.show()

In [4]:
gen = get_column_mean('gen', stats)
score = get_column_mean('specialist_score', stats)
df_score = pd.DataFrame({'score': score, 'gen': gen})

tp = get_column_mean('cm_true_positive', stats)
fp = get_column_mean('cm_false_positive', stats)
tn = get_column_mean('cm_true_negative', stats)
fn = get_column_mean('cm_false_negative', stats)
gen = [gen[i] for i in range(len(tp))]

df_cm = pd.DataFrame({
    'gen': gen,
    'cm_true_positive': tp,
    'cm_false_positive': fp,
    'cm_true_negative': tn,
    'cm_false_negative': fn,
})

score_graph(df_score)
cm_metrics_graph(df_cm)