In [None]:
import logging

# Our imports
import emission.core.get_database as edb
import emission.analysis.modelling.tour_model.cluster_pipeline as pipeline
import emission.analysis.modelling.tour_model.similarity as similarity
import emission.analysis.modelling.tour_model.featurization as featurization
import emission.analysis.modelling.tour_model.representatives as representatives
import emission.storage.decorations.analysis_timeseries_queries as esda
import pandas as pd
from numpy import *
import confirmed_trips_eval_bins_clusters as evaluation
from sklearn import metrics
from pandas.testing import assert_frame_equal

In [None]:
participant_uuid_obj = list(edb.get_profile_db().find({"install_group": "participant"}, {"user_id": 1, "_id": 0}))
all_users = [u["user_id"] for u in participant_uuid_obj]

In [None]:
radius = 100

In [None]:
pd.set_option('max_colwidth',100)
pd.set_option('display.max_rows', None)

## Bins above cutoff

### Original user input

In [None]:
homo_score_ori, comp_score_ori, v_score_ori = evaluation.v_measure_bins(all_users,radius,cutoff=True)

In [None]:
mean_v_ori=round(mean([x for x in v_score_ori if str(x) != 'nan']),3)

### After changing language

In [None]:
homo_score_sp2en, comp_score_sp2en, v_score_sp2en = evaluation.v_measure_bins(all_users,radius,sp2en=True,cutoff=True)

In [None]:
mean_v_sp2en=round(mean([x for x in v_score_sp2en if str(x) != 'nan']),3)

### After converting purposes and replaced mode

In [None]:
homo_score_cvt, comp_score_cvt, v_score_cvt = evaluation.v_measure_bins(all_users,radius,cvt_pur_mo=True,cutoff=True)

In [None]:
mean_v_cvt=round(mean([x for x in v_score_cvt if str(x) != 'nan']),3)

### DataFrame

In [None]:
cutoff_df = pd.DataFrame(data={'homogeneity_score':[homo_score_ori,homo_score_sp2en,homo_score_cvt],
                        'completeness_score':[comp_score_ori,comp_score_sp2en,comp_score_cvt],
                       'v_measure_score':[v_score_ori,v_score_sp2en,v_score_cvt],
                        'mean v_measure_score':[mean_v_ori,mean_v_sp2en,mean_v_cvt]},
                  index = ['original user input','after translation','after converting purposes and replaced mode'])
cutoff_df

#### homogeneity_score

In [None]:
homo_df = pd.DataFrame(data={'original user input':homo_score_ori,'after translation':homo_score_sp2en,
                             'after converting purposes and replaced mode':homo_score_cvt}, 
                           index=['user1','user2','user3','user4','user5','user6','user7','user8',
                                  'user9','user10','user11','user12','user13'])

In [None]:
homo_df.plot(kind = 'bar',title='homogeneity_score',yticks=(0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1))

#### completeness_score

In [None]:
comp_df = pd.DataFrame(data={'original user input':comp_score_ori,'after translation':comp_score_sp2en,
                             'after converting purposes and replaced mode':comp_score_cvt},
                      index=['user1','user2','user3','user4','user5','user6','user7','user8',
                                  'user9','user10','user11','user12','user13'])

In [None]:
comp_df.plot(kind = 'bar',title='completeness_score',yticks=(0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1))

#### v_measure_score

In [None]:
v_df = pd.DataFrame(data={'original user input':v_score_ori,'after translation':v_score_sp2en,
                          'after converting purposes and replaced mode':v_score_cvt},
                   index=['user1','user2','user3','user4','user5','user6','user7','user8',
                                  'user9','user10','user11','user12','user13'])

In [None]:
v_df.plot(kind = 'bar',title='v_measure_score',yticks=(0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1))

## All bins

### Original user input

In [None]:
ab_homo_score_ori, ab_comp_score_ori, ab_v_score_ori = evaluation.v_measure_bins(all_users,radius)

In [None]:
ab_mean_v_ori=round(mean([x for x in ab_v_score_ori if str(x) != 'nan']),3)

### After changing language

In [None]:
ab_homo_score_sp2en, ab_comp_score_sp2en, ab_v_score_sp2en = evaluation.v_measure_bins(all_users,radius,sp2en=True)

In [None]:
ab_mean_v_sp2en=round(mean([x for x in ab_v_score_sp2en if str(x) != 'nan']),3)

### After converting purposes and replaced mode

In [None]:
ab_homo_score_cvt, ab_comp_score_cvt, ab_v_score_cvt = evaluation.v_measure_bins(all_users,radius,cvt_pur_mo=True)

In [None]:
ab_mean_v_cvt=round(mean([x for x in ab_v_score_cvt if str(x) != 'nan']),3)

### DataFrame

In [None]:
all_df = pd.DataFrame(data={'homogeneity_score':[ab_homo_score_ori,ab_homo_score_sp2en,ab_homo_score_cvt],
                        'completeness_score':[ab_comp_score_ori,ab_comp_score_sp2en,ab_comp_score_cvt],
                       'v_measure_score':[ab_v_score_ori,ab_v_score_sp2en,ab_v_score_cvt],
                        'mean v_measure_score':[ab_mean_v_ori,ab_mean_v_sp2en,ab_mean_v_cvt]},
                  index = ['original user input','after translation','after converting purposes and replaced mode'])
all_df

#### homogeneity_score

In [None]:
ab_homo_df=pd.DataFrame(data={'original user input':ab_homo_score_ori,'after translation':ab_homo_score_sp2en,
                              'after converting purposes and replaced mode':ab_homo_score_cvt},
                        index=['user1','user2','user3','user4','user5','user6','user7','user8',
                               'user9','user10','user11','user12','user13'])

In [None]:
ab_homo_df.plot(kind = 'bar',title='homogeneity_score',yticks=(0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1))

#### completeness_score

In [None]:
ab_comp_df = pd.DataFrame(data={'original user input':ab_comp_score_ori,'after translation':ab_comp_score_sp2en,
                                'after converting purposes and replaced mode':ab_comp_score_cvt},
                          index=['user1','user2','user3','user4','user5','user6','user7','user8',
                                 'user9','user10','user11','user12','user13'])

In [None]:
ab_comp_df.plot(kind = 'bar',title='completeness_score',yticks=(0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1))

#### v_measure_score

In [None]:
ab_v_df = pd.DataFrame(data={'original user input':ab_v_score_ori,'after translation':ab_v_score_sp2en,
                             'after converting purposes and replaced mode':ab_v_score_cvt},
                       index=['user1','user2','user3','user4','user5','user6','user7','user8',
                              'user9','user10','user11','user12','user13'])

In [None]:
ab_v_df.plot(kind = 'bar',title='v_measure_score',yticks=(0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1))