In [None]:
from pathlib import Path
from energyclustering.webapp.resultparser import ResultParser, ResultComparison
from energyclustering.visualization.cluster_visualization import all_day_plot
import pandas as pd
import altair as alt
alt.data_transformers.disable_max_rows()
# alt.renderers.enable('png')

In [None]:
wasser_result = ResultParser('result_20210628_koen', 'wasserstein_None')
wasser1H_result = ResultParser('result_20210628_koen', 'wasserstein_1H')
wasser4H_result = ResultParser('result_20210628_koen', 'wasserstein_4H')
matching_result = ResultParser('result_20210628_koen', 'random_profiles_no_data_issues_v1')

In [None]:
match_dist_df = matching_result.queries_with_distances
match_dist_df;

In [None]:
wasser_dist_df = wasser1H_result.queries_with_distances
wasser_dist_df;

In [None]:
dist_df = (
    match_dist_df.copy()
    .rename(columns = {'distance': 'match_distance'})
    .assign(
        wasser_distance = wasser_dist_df.distance
    )

)

## Only constraints Koen answered

In [None]:
alt.Chart(dist_df).mark_circle().encode(
    x = 'match_distance:Q', 
    y = 'wasser_distance:Q', 
    tooltip = ['i1', 'i2']
)

In [None]:
matching_result.similarity_metric_histogram_chart().properties(height = 100, title = 'matching') | wasser1H_result.similarity_metric_histogram_chart().properties(height = 100, title = 'wasserstein') 

In [None]:
matching_result.plot_constraint_pairs(2, constraints = 'ML', sort = 'desc') | wasser1H_result.plot_constraint_pairs(2, constraints = 'ML', sort = 'desc') 

## All constraints

In [None]:
def distance_matrix_to_list(dist_df): 
    dist_df.index = dist_df.index.get_level_values(0)+','+dist_df.index.get_level_values(1).astype('str')
    dist_df.columns = dist_df.columns.get_level_values(0)+','+dist_df.columns.get_level_values(1).astype('str')
    return dist_df.stack().to_frame('distance')

In [None]:
match_dist_df = matching_result.distance_matrix
wasser_dist_df = wasser_result.distance_matrix
match_dist = distance_matrix_to_list(match_dist_df)
wasser_dist = distance_matrix_to_list(wasser_dist_df)
all_distances = match_dist.rename(columns = dict(distance = 'match_dist')).assign(wasser_dist = wasser_dist.distance).rename_axis(['i1', 'i2'], axis = 0).drop_duplicates()
all_distances

For some reason there are some very large distances that should not be there! Filter these out! 


In [None]:
def highlight_profile(profile_id): 
    alt.renderers.enable('png')
    plot_df = (
        all_distances
        .assign(
            color = lambda x: (x.index.get_level_values(0).str.startswith(profile_id))|(x.index.get_level_values(1).str.startswith(profile_id))
        )
    )
    chart = alt.Chart(plot_df.reset_index(), width = 500, height = 500).mark_circle().encode(
        x = 'match_dist:Q', 
        y = 'wasser_dist:Q', 
        color = 'color:N',
        tooltip = ['i1', 'i2']
    )
    return chart

In [None]:
def show_profile(profile_id): 
    alt.renderers.enable('png')
    return all_day_plot(profile_id, matching_result.data_df)

In [None]:
def show_profile_pair(profile1, profile2): 
    wasser_distance = wasser_result.distance_matrix.loc[profile1, profile2].iloc[0,0]
    match_distance = matching_result.distance_matrix.loc[profile1, profile2].iloc[0,0]
    return (show_profile(profile1).properties(title = profile1) | show_profile(profile2).properties(title = profile2)).properties(title = f'wasser={wasser_distance}, matching={match_distance}')

In [None]:
alt.renderers.enable('default')
chart = alt.Chart(all_distances.reset_index(), width = 500, height = 500, title = f'correlation= {all_distances.match_dist.corr(all_distances.wasser_dist)}').mark_circle().encode(
    x = 'match_dist:Q', 
    y = 'wasser_dist:Q', 
#     color = 'color:N',
    tooltip = ['i1', 'i2']
)
chart.interactive()

### region1: bottom left profile 1138

In [None]:
highlight_profile('smartmeter_1138')

In [None]:
show_profile_pair('smartmeter_1138', 'smartmeter_1552')

In [None]:
show_profile_pair('smartmeter_2187', 'smartmeter_1525')