In [None]:
import emission.core.get_database as edb
import emission.analysis.modelling.tour_model.similarity as similarity
import pandas as pd
import numpy as np
import get_request_percentage as grp
import get_scores as gs
import label_processing as lp
import get_users as gu
import data_preprocessing as preprocess
import get_tuning_score as tuning
import evaluation_pipeline as ep
import matplotlib.pyplot as plt
import get_plot as plot
import emission.core.common as ecc

In [None]:
participant_uuid_obj = list(edb.get_profile_db().find({"install_group": "participant"}, {"user_id": 1, "_id": 0}))
all_users = [u["user_id"] for u in participant_uuid_obj]

In [None]:
radius = 100

In [None]:
# get all/valid user list
user_ls, valid_users = gu.get_user_ls(all_users, radius)

# collect request percentage for the first or second round (requested trips / total trips) for all users
all_percentage_first_tune = []
all_percentage_first_test = []
all_percentage_second_tune = []
all_percentage_second_test = []

# collect homogeneity score for the first/second round for all users
all_homogeneity_score_first_tune = []
all_homogeneity_score_first_test = []
all_homogeneity_score_second_tune = []
all_homogeneity_score_second_test = []

for a in range(len(all_users)):
    user = all_users[a]
    trips = preprocess.read_data(user)
    filter_trips = preprocess.filter_data(trips, radius)
    print('user', a + 1, 'filter_trips len', len(filter_trips))

    # filter out users that don't have enough valid labeled trips
    if not gu.valid_user(filter_trips, trips):
        continue
    tune_idx, test_idx = preprocess.split_data(filter_trips)

    # choose tuning/test set to run the model
    # this step will use KFold (5 splits) to split the data into different subsets
    # - tune: tuning set
    # - test: test set
    # Here we user a bigger part of the data for testing and a smaller part for tuning
    tune_data = preprocess.get_subdata(filter_trips, test_idx)
    test_data = preprocess.get_subdata(filter_trips, tune_idx)

    pct_collect_first, homo_collect_first, pct_collect_second, homo_collect_second = ep.init_score()
   
    # collect tuning parameters
    coll_tune_score = []
    coll_tradeoffs = []
    # tuning
    pct_collect_first,homo_collect_first,pct_collect_second,homo_collect_second,coll_tradeoffs,  coll_tune_score= ep.tuning_test(tune_data,radius,pct_collect_first,homo_collect_first,pct_collect_second,homo_collect_second,coll_tradeoffs,coll_tune_score,tune = True)

    pct_collect_first, homo_collect_first, pct_collect_second, homo_collect_second = ep.init_score()
    # testing
    pct_collect_first,homo_collect_first,pct_collect_second,homo_collect_second,coll_tradeoffs,coll_tune_score = ep.tuning_test(test_data,radius, pct_collect_first, homo_collect_first,pct_collect_second,homo_collect_second, coll_tradeoffs,coll_tune_score,test=True)

    print('colle_tune_score ', coll_tune_score)
    print('coll_tradeoffs',coll_tradeoffs)

    # collect request percentage for the first round for all users
    all_percentage_first_test.append(pct_collect_first)

    # collect homogeneity score for the first round for all users
    all_homogeneity_score_first_test.append(homo_collect_first)

    # collect request percentage for the second round for all users
    all_percentage_second_test.append(pct_collect_second)

    # collect homogeneity score for the second round for all users
    all_homogeneity_score_second_test.append(homo_collect_second)

print('all_percentage_first_test', all_percentage_first_test)
print('all_homogeneity_score_first_test', all_homogeneity_score_first_test)
print('all_percentage_second_test', all_percentage_second_test)
print('all_homogeneity_score_second_test', all_homogeneity_score_second_test)

# plot evaluation scatter for the first round
plt.figure()
plot.get_scatter(all_percentage_first_test, all_homogeneity_score_first_test, valid_users)

# plot evaluation scatter for the second round
plt.figure()
plot.get_scatter(all_percentage_second_test, all_homogeneity_score_second_test, valid_users)