In [None]:
import plotly.express as px
import pandas as pd

In [None]:
df = pd.read_csv('models/params.csv')

In [None]:
# type	estimators	depth	leaf	random_state	warm_start	n_jobs
df = df.astype({'type': 'category', 'estimators': 'int64', 'depth': 'category', 'leaf': 'int64', 'random_state': 'int64', 'warm_start': 'category', 'n_jobs': 'int64', 'average_precision': 'float64', 'recall': 'float64', 'precision': 'float64', 'F1': 'float64'})

In [None]:
df = df.round({'average_precision': 4, 'recall': 4, 'precision': 4, 'F1': 4})
df.sort_values('F1', ascending=False).head()

In [None]:
color_map = px.colors.diverging.Picnic
color_map = px.colors.sequential.Agsunset
do_save = True

for m_type in ['wrist', 'sleep', 'both']:
    fig = px.parallel_categories(df[df.type == m_type].sort_values('F1', ascending=False),
                                 dimensions=['type', 'estimators', 'depth', 'leaf', 'random_state', 'F1'],
                                 color="F1",
                                 color_continuous_scale=color_map,
                                 height=300,
                                 width=1000,
                                 
    )
    fig.update_layout(
        title=f"Model hyperparameter for {m_type}",
        font=dict(
            size=14,
            color="#000000"
        ),
        margin=dict(
            l=20,
            r=50,
            b=20,
            t=50,
            pad=4
        )
    )

    if do_save:
        fig.write_image(f"plots/parallel_categories_{m_type}_params.svg") #), width=1500, height=600)
    else:
        fig.show()



#fig.write_image("fig1.svg", width=1500, height=600)

In [None]:
df.sort_values('F1', ascending=False).head(50)


powershell:
```powershell

$csv = @()
$files = @("r1_m.txt","r1_m2.txt","r1_mboth.txt")

gc $files | Select-String "^Start evaluating model randomforestclassifier-(?<type>\w+)-n_estimators__(?<est>\d+)-max_depth__(?<depth>\w+)-min_samples_leaf__(?<lead>\d+)-random_state__42-.*$" -Context 0,5 |%{[pscustomobject] @{type=$_.Matches.Groups[1].Value; estimators=$_.Matches.Groups[2].Value; depth=$_.Matches.Groups[3].Value; leaf=$_.Matches.Groups[4].Value; random_state=42; warm_start=$true; n_jobs=10; average_precision = $_.Context.PostContext[1]; recall = $_.Context.PostContext[2]; precision = $_.Context.PostContext[3]; F1 = $_.Context.PostContext[4]}} | ConvertTo-Csv -NoTypeInformation | Out-File params.csv
```

In [None]:
import plotly.express as px

x = ["200","200","300","300"]
y = [0.6558 , 0.6594 , 0.6569 , 0.6639]
z = ["whole" , "batched" , "whole" , "batched"]

df = pd.DataFrame(dict(type=x, value=y, z=z), columns=['type', 'value', 'z'])   
df


In [None]:
fig = px.histogram(df, x="type", y="value", color="z",
              barmode='group', height=400, width=600, text_auto=True, title="F1 score for batched vs non-batched ")

fig.update_layout(
    xaxis_tickfont_size=14,
    yaxis_tickfont_size=14,
    yaxis=dict(
        title='F1 score',
    ),
    xaxis=dict(
        title='Number of estimators',
    ),
    )

fig.show()
fig.write_image("plots/batched_vs_non_batched.svg")