# Analyse the difference between the different variations we have until now

In [None]:
from pathlib import Path
from energyclustering.webapp.resultparser import ResultParser, ResultComparison
import pandas as pd
import altair as alt
alt.data_transformers.disable_max_rows()
alt.renderers.enable('png')

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
SIMPLE = 'random_profiles_v1'
SIMPLE_DATA = 'random_profiles_no_data_issues_v1'
HIST = 'histogram_bins_20'
CORR = 'develop_random_profiles_correction_v1'
SEASON = 'develop_random_profiles_seasonality_v1'

EUCL = 'baseline_euclidean'
# FULL_SEASON = 'random_profiles_seasonality_v1'
directory = Path('/cw/dtaiproj/ml/2020-FLAIR-VITO/profile-clustering/distance_matrices/')
assert directory.exists()
assert (directory/SIMPLE).exists()
assert (directory/SIMPLE_DATA).exists()
assert (directory/CORR).exists()
assert (directory/SEASON).exists()
assert (directory/HIST).exists()
# assert (directory/FULL_SEASON).exists()
assert all((directory/name).exists() for name in [SIMPLE, SIMPLE_DATA, CORR, SEASON, HIST])

# Constraint-based validation

In [None]:
result = ResultParser('result_20210628_koen', SIMPLE_DATA)
euc_result = ResultParser('result_20210628_koen', EUCL)
wasser_result = ResultParser('result_20210628_koen', 'wasserstein_None')
wasser1H_result = ResultParser('result_20210628_koen', 'wasserstein_1H')
wasser4H_result = ResultParser('result_20210628_koen', 'wasserstein_4H')
dtw_one = ResultParser('result_20210628_koen', 'baselines_own_metric_DTW_one_to_one')
euc_matching = ResultParser('result_20210628_koen', 'baselines_own_metric_euclidean_minimal_cost')
euc_one = ResultParser('result_20210628_koen', 'baselines_own_metric_euclidean_one_to_one')

In [None]:
result.similarity_metric_histogram_chart().properties(height = 100)


### Hist distance

In [None]:
wasser_result.similarity_metric_histogram_chart().properties(title = 'no agg', height = 100) | wasser1H_result.similarity_metric_histogram_chart().properties(title = '1H', height = 100)  | wasser4H_result.similarity_metric_histogram_chart().properties(title = '4H', height = 100)

### Euclidean distance

In [None]:
euc_result.similarity_metric_histogram_chart().properties(height = 100)

## Rank correlation

In [None]:
result_df = pd.DataFrame(columns = ['correlation'])

### Own metric: DTW and matching

In [None]:
corr= result.rank_correlation_between_distances_and_queries()
result_df.loc['matching_measure'] = corr

### Own metric DTW without matching

In [None]:
dtw_one.rank_correlation_between_distances_and_queries()

### Own metric euclidean with matching

In [None]:
euc_matching.rank_correlation_between_distances_and_queries()

### Own metric euclidean without matching

In [None]:
euc_one.rank_correlation_between_distances_and_queries()

### Histogram distance

In [None]:
corr = wasser_result.rank_correlation_between_distances_and_queries()
result_df.loc['wasserstein_measure'] = corr

In [None]:
corr = wasser1H_result.rank_correlation_between_distances_and_queries()
result_df.loc['wasserstein_measure_1H'] = corr

In [None]:
corr = wasser4H_result.rank_correlation_between_distances_and_queries()
result_df.loc['wasserstein_measure_4H'] = corr

### Plain euclidean

In [None]:
corr = euc_result.rank_correlation_between_distances_and_queries()
result_df.loc['euclidean'] = corr

In [None]:
result_df

In [None]:
alt.Chart(result_df.reset_index().rename(columns= {'index':'measure'}), height = 200, width = 200, title = 'Correlation with expert').mark_bar().encode(
    x = 'measure:N', 
    y= 'correlation:Q', 
    color = alt.Color('measure:N',legend = None), 
    tooltip = 'correlation'
)

## plot all pairs

In [None]:
# result.plot_constraint_pairs_w_distances()

## plot closest ML pairs 

In [None]:
result.plot_constraint_pairs(3, constraints = 'ML', sort = 'asc')

## plot farthest ML pairs 

In [None]:
result.plot_constraint_pairs(3, constraints = 'ML', sort = 'desc')

## plot far CL pairs 

In [None]:
result.plot_constraint_pairs(2, constraints = 'CL', sort = 'desc')

## plot closest CL pairs 

In [None]:
result.plot_constraint_pairs(7, constraints = 'CL', sort = 'asc')