# Evaluation

- dataset:Ering (full dataset)
- clustering: dbscan
- frameworks: MOSCAT, EvolClustering, Baseline

The authors of EvolClustering suggest to set $w=0.1$ in their solution, so we did this. In case of MOSCAT, we utilized TOPSIS to select the weight, because it is a simple non-parametric method.
We defined the baseline as clustering without temporal context.

## Full Set of Solutions

In [7]:
import pandas as pd
import os
import math


evaluation_df=pd.read_csv(os.getcwd()+'/stats/evaluation_result.csv')
pd.set_option("display.max_rows", None)
evaluation_df.sort_values(by=['w'])
evaluation_df['av_sq']=evaluation_df['av_sq']/2
evaluation_df

Unnamed: 0,method,w,av_c_num,std_c_num,av_sq,std_sq,av_tq,std_tq,av_total_score,av_purity,std_purity
0,baseline,0.0,2.076923,0.321505,0.360655,0.022743,0.551385,0.325389,0.636347,0.190564,0.057899
1,moscat,0.0,2.076923,0.321505,0.360655,0.022743,0.551385,0.325389,0.636347,0.190564,0.057899
2,moscat,0.1,2.046154,0.211451,0.36059,0.022978,0.564,0.318118,0.64259,0.186359,0.047209
3,moscat,0.2,2.061538,0.242186,0.359977,0.026013,0.558154,0.315237,0.639054,0.18641,0.047208
4,moscat,0.3,2.061538,0.242186,0.35942,0.025779,0.573538,0.291646,0.646189,0.186667,0.047148
5,moscat,0.4,2.061538,0.242186,0.354332,0.029051,0.635692,0.207327,0.672179,0.179231,0.004742
6,moscat,0.5,2.153846,0.363609,0.349164,0.034195,0.675077,0.195721,0.686702,0.180154,0.004803
7,moscat,0.6,2.153846,0.363609,0.348978,0.034521,0.681385,0.197269,0.689671,0.180154,0.004803
8,moscat,0.7,2.153846,0.363609,0.348954,0.034656,0.681846,0.197918,0.689877,0.180205,0.004855
9,moscat,0.8,2.153846,0.363609,0.348954,0.034656,0.681846,0.197918,0.689877,0.180205,0.004855


## Solution Selection by TOPSIS

In [8]:
def calc_topsis(sq,tq):
    dist_to_max = math.sqrt((sq-2)**2+(tq-2)**2)
    dist_to_min = math.sqrt((sq-0)**2+(tq-0)**2)
    oq=dist_to_min/(dist_to_min+dist_to_max)
    return oq

moscat_results = evaluation_df.query('method=="moscat"').copy()
moscat_results['oq'] = moscat_results.apply(lambda row: calc_topsis(row['av_sq'],row['av_tq']),axis=1)
moscat_results
topsis_max_idx = moscat_results['oq'].idxmax()
topsis_moscat_result = moscat_results.loc[topsis_max_idx]
topsis_moscat_result = pd.DataFrame(topsis_moscat_result).transpose()[['method','w','av_purity','std_purity','av_c_num','std_c_num']]

evol_result = evaluation_df.query('method=="evol" & w==0.1').copy()[['method','w','av_purity','std_purity','av_c_num','std_c_num']]
baseline = evaluation_df.query('method=="baseline"').copy()[['method','w','av_purity','std_purity','av_c_num','std_c_num']]

total_result = pd.concat([topsis_moscat_result,evol_result,baseline])
total_result

Unnamed: 0,method,w,av_purity,std_purity,av_c_num,std_c_num
8,moscat,0.7,0.180205,0.004855,2.153846,0.363609
13,evol,0.1,0.177897,0.005156,2.0,0.0
0,baseline,0.0,0.190564,0.057899,2.076923,0.321505
