This notebook shows the homogeneity scores, completeness scores, and v-measure scores based on original user inputs, user inputs after converting language, and user inputs after converting purposes and replaced mode on valid bins above cutoff and on all valid bins for all users after the first round clustering.

In [None]:
import logging

# Our imports
import emission.core.get_database as edb
import emission.analysis.modelling.tour_model.cluster_pipeline as pipeline
import emission.analysis.modelling.tour_model.similarity as similarity
import emission.analysis.modelling.tour_model.featurization as featurization
import emission.analysis.modelling.tour_model.representatives as representatives
import emission.storage.decorations.analysis_timeseries_queries as esda
import pandas as pd
from numpy import *
import confirmed_trips_eval_bins_clusters as evaluation
from sklearn import metrics
from pandas.testing import assert_frame_equal
import numpy as np

In [None]:
participant_uuid_obj = list(edb.get_profile_db().find({"install_group": "participant"}, {"user_id": 1, "_id": 0}))
all_users = [u["user_id"] for u in participant_uuid_obj]

In [None]:
radius = 100

In [None]:
user_ls,_ = evaluation.get_user_ls(all_users,radius)

In [None]:
pd.set_option('max_colwidth',100)
pd.set_option('display.max_rows', None)

## Bins above cutoff

### Original user input

In [None]:
homo_score_ori, comp_score_ori, v_score_ori = evaluation.v_measure_bins(all_users,radius,cutoff=True)

### After changing language

In [None]:
homo_score_sp2en, comp_score_sp2en, v_score_sp2en = evaluation.v_measure_bins(all_users,radius,sp2en=True,cutoff=True)

### After converting purposes and replaced mode

In [None]:
homo_score_cvt, comp_score_cvt, v_score_cvt = evaluation.v_measure_bins(all_users,radius,cvt_pur_mo=True,cutoff=True)

### DataFrame

In [None]:
cutoff_df = pd.DataFrame(data={'homogeneity score':[homo_score_ori,homo_score_sp2en,homo_score_cvt],
                        'completeness score':[comp_score_ori,comp_score_sp2en,comp_score_cvt],
                       'v-measure score':[v_score_ori,v_score_sp2en,v_score_cvt]},
                  index = ['original user input','after translation','after converting purposes and replaced mode'])
cutoff_df

#### homogeneity score

In [None]:
homo_df = pd.DataFrame(data={'original user input':homo_score_ori,'after translation':homo_score_sp2en,
                             'after converting purposes and replaced mode':homo_score_cvt}, 
                           index=user_ls).dropna()
homo_df

In [None]:
homo_df.plot(kind = 'bar',title='homogeneity score',yticks=(0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1))

#### completeness score

In [None]:
comp_df = pd.DataFrame(data={'original user input':comp_score_ori,'after translation':comp_score_sp2en,
                             'after converting purposes and replaced mode':comp_score_cvt},
                      index=user_ls).dropna()

In [None]:
comp_df.plot(kind = 'bar',title='completeness score',yticks=(0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1))

#### v-measure score

In [None]:
v_df = pd.DataFrame(data={'original user input':v_score_ori,'after translation':v_score_sp2en,
                          'after converting purposes and replaced mode':v_score_cvt},
                   index=user_ls).dropna()
v_df

In [None]:
v_df.plot(kind = 'bar',title='v-measure score',yticks=(0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1))

## All bins

### Original user input

In [None]:
ab_homo_score_ori, ab_comp_score_ori, ab_v_score_ori = evaluation.v_measure_bins(all_users,radius)

### After changing language

In [None]:
ab_homo_score_sp2en, ab_comp_score_sp2en, ab_v_score_sp2en = evaluation.v_measure_bins(all_users,radius,sp2en=True)

### After converting purposes and replaced mode

In [None]:
ab_homo_score_cvt, ab_comp_score_cvt, ab_v_score_cvt = evaluation.v_measure_bins(all_users,radius,cvt_pur_mo=True)

### DataFrame

In [None]:
all_df = pd.DataFrame(data={'homogeneity score':[ab_homo_score_ori,ab_homo_score_sp2en,ab_homo_score_cvt],
                        'completeness score':[ab_comp_score_ori,ab_comp_score_sp2en,ab_comp_score_cvt],
                       'v-measure score':[ab_v_score_ori,ab_v_score_sp2en,ab_v_score_cvt]},
                  index = ['original user input','after translation','after converting purposes and replaced mode'])
all_df

#### homogeneity score

In [None]:
ab_homo_df=pd.DataFrame(data={'original user input':ab_homo_score_ori,'after translation':ab_homo_score_sp2en,
                              'after converting purposes and replaced mode':ab_homo_score_cvt},
                        index=user_ls).dropna()

In [None]:
ab_homo_df.plot(kind = 'bar',title='homogeneity score',yticks=(0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1))

#### completeness score

In [None]:
ab_comp_df = pd.DataFrame(data={'original user input':ab_comp_score_ori,'after translation':ab_comp_score_sp2en,
                                'after converting purposes and replaced mode':ab_comp_score_cvt},
                          index=user_ls).dropna()

In [None]:
ab_comp_df.plot(kind = 'bar',title='completeness score',yticks=(0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1))

#### v-measure score

In [None]:
ab_v_df = pd.DataFrame(data={'original user input':ab_v_score_ori,'after translation':ab_v_score_sp2en,
                             'after converting purposes and replaced mode':ab_v_score_cvt},
                       index=user_ls).dropna()

In [None]:
ab_v_df.plot(kind = 'bar',title='v-measure score',yticks=(0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1))