# Evaluation

- dataset:Motions (full dataset)
- clustering: dbscan
- frameworks: MOSCAT, EvolClustering, Baseline

The authors of EvolClustering suggest to set $w=0.1$ in their solution, so we did this. In case of MOSCAT, we utilized TOPSIS to select the weight, because it is a simple non-parametric method.
We defined the baseline as clustering without temporal context.

## Full Set of Solutions

In [1]:
import pandas as pd
import os
import math
import numpy as np

evaluation_df=pd.read_csv(os.getcwd()+'/stats/evaluation_result.csv')
pd.set_option("display.max_rows", None)
#evaluation_df.query('method=="moscat" | method=="baseline"')
evaluation_df.sort_values(by=['w'])
evaluation_df['av_sq']=evaluation_df['av_sq']/2
evaluation_df

Unnamed: 0,method,w,av_c_num,std_c_num,av_sq,std_sq,av_tq,std_tq,av_total_score,av_purity,std_purity
0,baseline,0.0,2.06,0.678977,0.349645,0.250803,0.3207,0.195248,0.509995,0.2875,0.05713
1,moscat,0.0,2.16,0.581187,0.389017,0.089903,0.3318,0.203923,0.554917,0.29,0.056211
2,moscat,0.1,2.16,0.581187,0.389017,0.089903,0.3318,0.203923,0.554917,0.29,0.056211
3,moscat,0.2,2.09,0.451596,0.386913,0.091775,0.3454,0.199936,0.559613,0.287375,0.053962
4,moscat,0.3,2.23,0.600589,0.371992,0.08606,0.3965,0.128192,0.570242,0.332,0.076465
5,moscat,0.4,2.98,1.333182,0.350299,0.077686,0.5552,0.105366,0.627899,0.429125,0.060408
6,moscat,0.5,3.11,1.262473,0.319808,0.085724,0.5915,0.111621,0.615558,0.47825,0.057286
7,moscat,0.6,2.84,1.022178,0.319179,0.086207,0.6369,0.133906,0.637629,0.477375,0.057039
8,moscat,0.7,2.82,0.967972,0.318739,0.086379,0.6393,0.137196,0.638389,0.477125,0.056883
9,moscat,0.8,2.82,0.967972,0.318726,0.086389,0.6407,0.138872,0.639076,0.477125,0.056883


## Solution Selection by TOPSIS

In [4]:
def calc_topsis(sq,tq):
    dist_to_max=math.sqrt((sq-2)**2+(tq-2)**2)
    dist_to_min=math.sqrt((sq-0)**2+(tq-0)**2)
    oq=dist_to_min/(dist_to_min+dist_to_max)
    return oq

moscat_results=evaluation_df.query('method=="moscat"').copy()
moscat_results['oq'] = moscat_results.apply(lambda row: calc_topsis(row['av_sq'],row['av_tq']),axis=1)
moscat_results
topsis_max_idx=moscat_results['oq'].idxmax()
topsis_moscat_result=moscat_results.loc[topsis_max_idx]
topsis_moscat_result=pd.DataFrame(topsis_moscat_result).transpose()[['method','w','av_purity','std_purity','av_c_num','std_c_num']]

evol_result = evaluation_df.query('method=="evol" & w==0.1').copy()[['method','w','av_purity','std_purity','av_c_num','std_c_num']]
evol_result
baseline=evaluation_df.query('method=="baseline"').copy()[['method','w','av_purity','std_purity','av_c_num','std_c_num']]
topsis_moscat_result

total_result=pd.concat([topsis_moscat_result,evol_result,baseline])
total_result

Unnamed: 0,method,w,av_purity,std_purity,av_c_num,std_c_num
10,moscat,0.9,0.477375,0.056873,2.82,0.967972
13,evol,0.1,0.284,0.048594,2.05,0.329524
0,baseline,0.0,0.2875,0.05713,2.06,0.678977
