# Performance over time

This notebook is used to build a slider chart in which you can select the training trigger and analyze the performance of the model for every data year. This notebook requires the accuracy matrix for each model stored in a pickle format.

See demo_out/demo_yearbook.html for an example output

In [1]:
import os
import pickle
import plotly.graph_objects as go
from lxml import html

In [2]:
DATA_FOLDER = "/your/folder/containing/pickles/of/accuracy/matrices" 
OUTPUT_FILE_NAME = "demo_yearbook.html"

In [3]:
MODYN_BLUE = "#44546c"
MODYN_WHITE = "#f0f0f0"
colors = ["#fb6a4a", "#71c1c9", "#A50104", '#6f9074', '#745474', "#445488" ,'#ffc05e', "#ccacca", "#ccccaa", "#aaccaa", "#acacac", "#abcdef", "#fedcab"]
colors = colors * 3
lighter_colors = ["#f79079", "#96dae0", "#d93b3d","#8db593", "#ad80ad", "#6277bd", "#ffd085", "#ccacca", "#ccccaa", "#aaccaa", "#acacac", "#abcdef", "#fedcab"]
lighter_colors = lighter_colors * 3
format_string_title = "<span style='font-family: Helvetica; color: #44546c; font-weight: 800; font-size: 45'>"
format_string_title_red = "<span style='font-family:  Helvetica; color: #fb6a4a; font-weight: 800; font-size: 45'>"
format_string = "<span style='font-family: Helvetica; color: #44546c; font-weight:  300; font-size: 20'>"

In [4]:
def load_file(name):
    with open(os.path.join(DATA_FOLDER,name), "rb") as f:
        return pickle.load(f)

In [5]:
def get_trace(data, year, color_index, name):
    #Used to create two traces with a different color (past and future)
    x_values = list(range(1930,2014))
    year_index = x_values.index(year)
    y_values = data[year_index]

    trace1 = go.Scatter(x=x_values[:year_index+1], y=y_values[:year_index+1], mode='lines', line=dict(color=lighter_colors[color_index], width=5.5), name = name, showlegend = False)
    trace2 = go.Scatter(x=x_values[year_index:], y=y_values[year_index:], mode='lines', line=dict(color=colors[color_index], width=5.5), name= name )
    return [trace1, trace2]

def get_vertical_line(year):
    return go.Scatter(x=[year, year], y=[0, 1], mode='lines', line=dict(color=MODYN_BLUE, dash="dot", width=4), showlegend=False)

In [6]:
def get_slider_chart(data, experiments):

    layout = go.Layout(xaxis=dict(title=f'{format_string}Time'), yaxis=dict(title=f'{format_string}Accuracy'), title=dict(text=f"{format_string_title}Model performance over time",xanchor= 'center',x=0.5, font=dict(size = 35,family='helvetica' )), plot_bgcolor=MODYN_WHITE, paper_bgcolor=MODYN_WHITE)
    fig = go.Figure(layout = layout)

    # Add traces, one for each slider step
    for year in range(1930,2013):
        traces = []
        for d in data:
            index = data.index(d)
            traces += get_trace(d, year, color_index=index, name = experiments[index])
        traces.append(get_vertical_line(year))
        for i in range(len(traces)):
            fig.add_trace(traces[i])


    number_of_traces = len(traces)

    #Hide all the traces except the first one
    for i in range(len(fig.data)):
        fig.data[i].visible = False
    for i in range(number_of_traces):
        fig.data[i].visible = True


    # Create and add slider
    steps = []
    for i in range(len(fig.data)//number_of_traces):
        step = dict(
            method="update",
            args=[{"visible": [False] * len(fig.data)}, {"title": f"{format_string_title}Trained up to: </b>{format_string_title_red}" + str(1930 + i)}],
        )
        for j in range(number_of_traces):
            step["args"][0]["visible"][number_of_traces*i +j] = True  # Toggle i'th trace to "visible"

        steps.append(step)

    #add sliders
    sliders = [dict(
        active=0,
        currentvalue={"prefix": "Model trained up to: "},
        pad={"t": 50},
        steps=steps,
        bgcolor=MODYN_BLUE,
        tickcolor=MODYN_BLUE,
        font=dict(size=8, color=MODYN_BLUE, family = "helvetica")
    )]

    fig.update_layout(sliders=sliders)

    for i in range(83):
        fig['layout']['sliders'][0]['steps'][i]['label']=1930 + i

    #Style axis
    fig.update_yaxes(showline=True, linewidth=2, linecolor=MODYN_BLUE, range=[0,1], title_font=dict(size=18, color=MODYN_BLUE, family = "helvetica"), tickangle=0, tickfont=dict(family='helvetica', color=MODYN_BLUE, size=14), showgrid=True, gridwidth=0.5, gridcolor='LightGray')
    fig.update_xaxes(showline=True, linewidth=2, linecolor=MODYN_BLUE, range=[1930,2013], title_font=dict(size=18, color=MODYN_BLUE, family = "helvetica"), tickangle=0, tickfont=dict(family='helvetica', color=MODYN_BLUE, size=14), showgrid=True, gridwidth=0.5, gridcolor='LightGray')

    #Style legend
    fig.update_layout(legend=dict( yanchor="bottom", y=1.00, xanchor="right",itemwidth=40, x=0.99,orientation = "h",
        font=dict(size=10, color=MODYN_BLUE, family = "helvetica")
    ))
    return fig

In [7]:
def style_html():
    #Trick to render Hanken Grotesk

    with open(OUTPUT_FILE_NAME, "r") as f:
        html_file = f.read()
        tree = html.fromstring(html_file)

    root = html.Element("html")
    root.append(html.fromstring("""<style>
  @import url('https://fonts.googleapis.com/css2?family=Hanken+Grotesk:ital,wght@0,100;0,200;0,300;0,400;0,500;0,600;0,700;0,800;0,900;1,100;1,200;1,300;1,400;1,500;1,600;1,700;1,800;1,900&display=swap');
</style>"""))

    body = tree.xpath('//body')[0]
    body.set("style", f"background-color: {MODYN_BLUE};")
    root.append(body)

    # Write the merged HTML to a file
    with open(OUTPUT_FILE_NAME, 'wb') as f:
        f.write(html.tostring(root))

In [8]:
if ".DS_Store" in os.listdir(DATA_FOLDER):
    os.remove(os.path.join(DATA_FOLDER, ".DS_Store"))
files = sorted(os.listdir(DATA_FOLDER)) #always sort to keep colors consistent
experiments = [" ".join(file.split("_")).split(".pkl")[0] for file in files]

data = []
for file in files:
    data.append(load_file(file))
fig = get_slider_chart(data, experiments)
fig.write_html(OUTPUT_FILE_NAME, include_plotlyjs="cdn")
style_html()

all_data.pkl
labelBalanced_25pc_1ep.pkl
labelBalanced_50pc_1ep.pkl
labelBalanced_75pc_1ep.pkl
randomNoRep_25pc_1ep.pkl
randomNoRep_25pc_2ep.pkl
randomNoRep_50pc_1ep.pkl
randomNoRep_50pc_2ep.pkl
randomNoRep_75pc_1ep.pkl
random_25pc_1ep .pkl
random_25pc_2ep.pkl
random_25pc_4ep.pkl
random_50pc_1ep.pkl
random_50pc_2ep.pkl
random_75pc_1ep.pkl
