In [None]:
import matplotlib
import matplotlib.pyplot as plt
import numpy
import emission.core.get_database as edb
import logging
import folium
import math
import branca.colormap as cm
import emission.storage.timeseries.abstract_timeseries as esta
import emission.analysis.modelling.tour_model.cluster_pipeline as eamtc
import emission.analysis.modelling.tour_model.similarity as similarity
import emission.analysis.modelling.tour_model.cluster_pipeline as pipeline
import emission.analysis.modelling.tour_model.featurization as featurization
import viz_bin_cluster as viz

In [None]:
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)

In [None]:
# copied from mode_purpose_share.ipynb
# We select participants from real data to be users at this time
participant_uuid_obj = list(edb.get_profile_db().find({"install_group": "participant"}, {"user_id": 1, "_id": 0}))
all_users = [u["user_id"] for u in participant_uuid_obj]

In [None]:
logging.debug('There are %s users, we will plot the graphs for one of them' % len(all_users))

user = all_users[1]

In [None]:
# we should experiment with different values here
radius = 300

In [None]:
trips = pipeline.read_data(uuid=user)

## Visualizing trips from bins above cutoff

In [None]:
bin_trips, bins = pipeline.remove_noise(trips, radius)

In [None]:
logging.debug('The list of bins is %s' % bins)

In [None]:
viz.bins_map(bins,trips)

## Visualizing clusters above cutoff

Note: should run "Trips from bins above the cutoff only" first

In [None]:
# copied from the cluster pipeline code so we can work with the featurization code directly
# min clusters set to 0 instead of len(bins), to compare with the bins
feat = featurization.featurization(bin_trips)
min = 0
max = int(math.ceil(1.5 * len(bins)))
feat.cluster(min_clusters=min, max_clusters=max)

In [None]:
logging.debug('number of clusters: %d' % feat.clusters)

In [None]:
viz.clusters_map(feat.labels,feat.points,feat.clusters)

## Visualizing the difference between bins and clusters above cutoff 

In [None]:
# We use user11 as an example
user = all_users[10]
trips = pipeline.read_data(uuid=user)

### Trips from filtered bins

In [None]:
bin_trips, bins = pipeline.remove_noise(trips, radius)
logging.debug('The list of bins is %s' % bins)

In [None]:
# Should choose a specific bin in viz_bin_cluster.py
viz.specific_bin_map(bins,trips)

### Trips in clusters

In [None]:
feat = featurization.featurization(bin_trips)
min = 0
max = int(math.ceil(1.5 * len(bins)))
feat.cluster(min_clusters=min, max_clusters=max)
logging.debug('number of clusters: %d' % feat.clusters)

In [None]:
# Should choose a specific cluster in viz_bin_cluster.py
viz.specific_cluster_map(feat.labels,feat.points,feat.clusters)

## Data collection

In [None]:
import pandas as pd

In [None]:
# user12 is not typical, so we filter it out

In [None]:
above_cutoff_bins = []
for i in range (len(all_users)):
    if i == 11:
        continue
    user = all_users[i]
    trips = pipeline.read_data(uuid=user)
    bin_trips, bins = pipeline.remove_noise(trips, radius)
    above_cutoff_bins.append(len(bins))
above_cutoff_bins

In [None]:
above_cutoff_clusters = []
for i in range (len(all_users)):
    if i == 11:
        continue
    user = all_users[i]
    trips = pipeline.read_data(uuid=user)
    bin_trips, bins = pipeline.remove_noise(trips, radius)
    feat = featurization.featurization(bin_trips)
    min = 0
    max = int(math.ceil(1.5 * len(bins)))
    feat.cluster(min_clusters=min, max_clusters=max)
    above_cutoff_clusters.append(feat.clusters)
above_cutoff_clusters

In [None]:
user_index = ['user1','user2','user3','user4','user5','user6','user7','user8','user9','user10','user11','user13']

In [None]:
cutoff_data = {'above cutoff bins':[i for i in above_cutoff_bins],'above cutoff clusters':[i for i in above_cutoff_clusters]}

In [None]:
df_cutoff = pd.DataFrame(data = cutoff_data, index = user_index)
df_cutoff

In [None]:
df_cutoff.plot(kind = 'bar')