### imports & load data

In [None]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np
from uuid import UUID

from sklearn import tree
import matplotlib.pyplot as plt

import emission.storage.timeseries.abstract_timeseries as esta
import emission.storage.decorations.trip_queries as esdtq
import emission.core.get_database as edb
import models

import logging
# logging.basicConfig()
# logger = logging.getLogger()
# logger.setLevel(logging.DEBUG)


In [None]:
# to see the same outputs I described, put in the unique tokens for these users
email0 = "replace this" # shankari
email1 = "replace this" # tom
user0 = list(edb.get_uuid_db().find({"user_email": email0}))[0]['uuid']
user1 = list(edb.get_uuid_db().find({"user_email": email1}))[0]['uuid']
user2 = UUID('replace this') # hannah

all_users = esta.TimeSeries.get_uuid_list()
confirmed_trip_df_map = {}
labeled_trip_df_map = {}
expanded_labeled_trip_df_map = {}
expanded_all_trip_df_map = {}
for u in all_users:
    ts = esta.TimeSeries.get_time_series(u)
    ct_df = ts.get_data_df("analysis/confirmed_trip")

    confirmed_trip_df_map[u] = ct_df
    labeled_trip_df_map[u] = esdtq.filter_labeled_trips(ct_df)
    expanded_labeled_trip_df_map[u] = esdtq.expand_userinputs(
        labeled_trip_df_map[u])
    expanded_all_trip_df_map[u] = esdtq.expand_userinputs(
        confirmed_trip_df_map[u])

### visualize a decision tree in the random forest from ForestClassifier

Let's look at user2's results

In [None]:
train_df = expanded_labeled_trip_df_map[user2]
cluster_forest_model = models.ForestClassifier(loc_feature='cluster')
cluster_forest_model.fit(train_df)

view a decision tree from the mode classifier:

In [None]:
plt.style.use('default')

In [None]:
fig, axs = plt.subplots(1, 1, dpi=1000, figsize=(20, 10))
for decision_tree in cluster_forest_model.mode_predictor.estimators_[0:1]:
    tree.plot_tree(
        decision_tree,
        feature_names=cluster_forest_model.base_features +
        cluster_forest_model.cluster_enc.onehot_encoding_cols +
        cluster_forest_model.purpose_enc.onehot_encoding_cols,
        class_names=list(cluster_forest_model.y_mode.unique()),
        filled=True,
        ax=axs,
        #    fontsize=5,
    )


view a decision tree from the purpose classifier:

In [None]:
fig, axs = plt.subplots(1, 1, dpi=1000, figsize=(20, 10))
for decision_tree in cluster_forest_model.purpose_predictor.estimators_[2:3]:
    tree.plot_tree(
        decision_tree,
        feature_names=cluster_forest_model.base_features +
        cluster_forest_model.cluster_enc.onehot_encoding_cols,
        class_names=list(cluster_forest_model.y_purpose.unique()),
        filled=True,
        ax=axs,
        #    fontsize=5,
    )


view a decision tree from the replaced-mode classifier:

In [None]:
fig, axs = plt.subplots(1, 1, dpi=1000, figsize=(20, 10))
for decision_tree in cluster_forest_model.replaced_predictor.estimators_[2:3]:
    tree.plot_tree(
        decision_tree,
        feature_names=cluster_forest_model.base_features +
        cluster_forest_model.cluster_enc.onehot_encoding_cols +
        cluster_forest_model.purpose_enc.onehot_encoding_cols +
        cluster_forest_model.mode_enc.onehot_encoding_cols,
        class_names=list(cluster_forest_model.y_replaced.unique()),
        filled=True,
        ax=axs,
        #    fontsize=5,
    )


### visualize a decision tree in the random forest from BasicForestPredictor

In [None]:
basic_forest_model = models.BasicForestPredictor()
basic_forest_model.fit(train_df)

view a decision tree from the mode classifier:

In [None]:
fig, axs = plt.subplots(1, 1, dpi=1000, figsize=(20, 10))
for decision_tree in basic_forest_model.mode_predictor.estimators_[0:1]:
    tree.plot_tree(
        decision_tree,
        feature_names=basic_forest_model.base_features +
        basic_forest_model.purpose_enc.onehot_encoding_cols,
        class_names=list(basic_forest_model.y_mode.unique()),
        filled=True,
        ax=axs,
        #    fontsize=5,
    )


view a decision tree from the purpose classifier:

In [None]:
fig, axs = plt.subplots(1, 1, dpi=1000, figsize=(20, 10))
for decision_tree in basic_forest_model.purpose_predictor.estimators_[2:3]:
    tree.plot_tree(
        decision_tree,
        feature_names=basic_forest_model.base_features,
        class_names=list(basic_forest_model.y_purpose.unique()),
        filled=True,
        ax=axs,
        #    fontsize=5,
    )


view a decision tree from the replaced-mode classifier:

In [None]:
fig, axs = plt.subplots(1, 1, dpi=1000, figsize=(20, 10))
for decision_tree in basic_forest_model.replaced_predictor.estimators_[2:3]:
    tree.plot_tree(
        decision_tree,
        feature_names=basic_forest_model.base_features +
        basic_forest_model.purpose_enc.onehot_encoding_cols +
        basic_forest_model.mode_enc.onehot_encoding_cols,
        class_names=list(basic_forest_model.y_replaced.unique()),
        filled=True,
        ax=axs,
        #    fontsize=5,
    )
