# Evaluation

- dataset:Ering (full dataset)
- k \in [2,10]
- max_iter: 20
- frameworks: MOSCAT, EvolClustering, Baseline

The authors of EvolClustering suggest to set $w=0.1$ in their solution, so we did this. In case of MOSCAT, we utilized TOPSIS to select the weight, because it is a simple non-parametric method.
We defined the baseline as clustering without temporal context.

## Full Set of Solutions

In [1]:
import pandas as pd
import os
import math

evaluation_df=pd.read_csv(os.getcwd()+'/stats/evaluation_result.csv')
pd.set_option("display.max_rows", None)
evaluation_df

Unnamed: 0,method,k,w,av_sq,std_sq,av_tq,std_tq,av_oq,std_oq,av_purity,std_purity
0,baseline,2,0.0,0.831766,0.027091,0.922405,0.103981,0.877085,0.048797,0.371125,0.045737
1,baseline,3,0.0,0.835737,0.027935,0.916013,0.104877,0.875875,0.048643,0.387875,0.048476
2,baseline,4,0.0,0.851346,0.025572,0.906935,0.099164,0.87914,0.046056,0.45,0.045885
3,baseline,5,0.0,0.852941,0.027251,0.905568,0.099768,0.879255,0.046391,0.451,0.045494
4,baseline,6,0.0,0.867982,0.024532,0.891034,0.097026,0.879508,0.046217,0.475875,0.035367
5,baseline,7,0.0,0.855904,0.029992,0.902037,0.099192,0.878971,0.046054,0.45925,0.048056
6,baseline,8,0.0,0.869411,0.026407,0.888139,0.097359,0.878775,0.046417,0.48475,0.041689
7,baseline,9,0.0,0.869777,0.026707,0.888386,0.096956,0.879082,0.046246,0.483875,0.040252
8,baseline,10,0.0,0.856128,0.03067,0.901867,0.09938,0.878997,0.046024,0.46075,0.046973
9,evol,2,0.0,0.831766,0.027091,0.922405,0.103981,0.877085,0.048797,0.371125,0.045737


## Solution Selection by TOPSIS

In [2]:
def calc_topsis(sq,tq):
    dist_to_max=math.sqrt((sq-1)**2+(tq-1)**2)
    dist_to_min=math.sqrt((sq-0)**2+(tq-0)**2)
    oq=dist_to_min/(dist_to_min+dist_to_max)
    return oq

moscat_results=evaluation_df.query('method=="moscat"').copy()

moscat_results['oq']=moscat_results.apply(lambda row: calc_topsis(row['av_sq'],row['av_tq']),axis=1)
moscat_results
topsis_max_idx=moscat_results.groupby('k')['oq'].idxmax()
topsis_moscat_result=moscat_results.loc[topsis_max_idx]
topsis_moscat_result
evol_result=evaluation_df.query('method=="evol" & w==0.1').copy()
baseline=evaluation_df.query('method=="baseline"').copy()
total_result=pd.concat([topsis_moscat_result,evol_result,baseline], ignore_index=True)
total_result.pivot(index='k',columns='method',values=['w','av_purity','std_purity'])
#moscat_results


Unnamed: 0_level_0,w,w,w,av_purity,av_purity,av_purity,std_purity,std_purity,std_purity
method,baseline,evol,moscat,baseline,evol,moscat,baseline,evol,moscat
k,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
2,0.0,0.1,0.8,0.371125,0.28,0.28,0.045737,0.01638,0.01638
3,0.0,0.1,0.9,0.387875,0.421,0.39725,0.048476,0.043876,0.037377
4,0.0,0.1,0.8,0.45,0.485125,0.4685,0.045885,0.038791,0.042435
5,0.0,0.1,0.8,0.451,0.4785,0.465,0.045494,0.03815,0.051462
6,0.0,0.1,0.8,0.475875,0.4965,0.50425,0.035367,0.029945,0.047014
7,0.0,0.1,0.8,0.45925,0.498875,0.547375,0.048056,0.026418,0.04705
8,0.0,0.1,0.7,0.48475,0.50275,0.506875,0.041689,0.026327,0.043059
9,0.0,0.1,0.7,0.483875,0.500125,0.5095,0.040252,0.023935,0.047776
10,0.0,0.1,0.7,0.46075,0.50075,0.509875,0.046973,0.02604,0.059387
