## Dashboard - a look into the data
This notebook is about looking into the data. We leverage the power of Dash to create interactive graphs that allow the user to explore aspects of the dataset

In [1]:
import pandas as pd
from tinydb import TinyDB, Query
from jupyter_plotly_dash import JupyterDash
from plotly import graph_objects as go
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
import plotly.express as px
import numpy as np

In [2]:
## We brought all the necessary data tranformations from the Analysis script

db = TinyDB('data\hogwarts.json')

cleaned_users = db.table('cleaned_users')
cleaned_actions = db.table('cleaned_actions') 

users_df = pd.DataFrame(cleaned_users.all())
actions_df = pd.DataFrame(cleaned_actions.all())

db.close()


combined_users_actions = pd.merge(
    users_df, actions_df, on="user_id", how="outer", validate= "1:m", indicator=True)


## Due to a bug there is a user without any actions. We drop the user, and then the _merge column
combined_users_actions =  combined_users_actions[combined_users_actions['_merge'] == 'both' ]
combined_users_actions.drop(columns='_merge', inplace=True)


## We get the old action_timestamp and add unique integers - the index -> new action_timestamp with no duplicates
combined_users_actions['action_timestamp'] = \
    combined_users_actions.apply( lambda x : x['action_timestamp'] + '.' +str(x.name).zfill(6) , axis=1)


## Like before we cast the string representation of date into datetime for the advanced capabilities of the type
combined_users_actions['account_ts_to_dt'] = combined_users_actions.account_creation_timestamp.apply(pd.to_datetime)
combined_users_actions['action_ts_to_dt'] = combined_users_actions.action_timestamp.apply(pd.to_datetime)


## We'll start working with the time difference between registration and action - we start with each action separately.
combined_users_actions['time_diff_in_hour'] = \
(   
    combined_users_actions.action_timestamp.apply(pd.to_datetime) - \
    combined_users_actions.account_creation_timestamp.apply(pd.to_datetime)

).apply( lambda x: round(x.total_seconds() / (60*60), 0) )

df = combined_users_actions

In [4]:



app = JupyterDash('Hogwarts dashboard2')

available_indicators = list(df.house.unique())


app.layout = html.Div(children=[
    html.H1(children='Actions as a percentage of total actions'),

    html.Div([dcc.Markdown('''
        
        We present the count for all actions, as a percentage.
        
        We use two filters:
        - 1. The four houses students belong to
        - 2. The time range when actions take place: how long ago since registration?
   ''' )]),
    
    dcc.Dropdown(
                id='dropdown',
                options=[{'label': i, 'value': i} for i in available_indicators],
                value='Gryffindor'
            ),

    dcc.Graph(id='hogwarts-graph'),
    
    dcc.RangeSlider(
        id='time-range-slider',
        min=df.time_diff_in_hour.min(),
        max=df.time_diff_in_hour.max(),
        step=0.25,
        value=[140,1060],
        marks = {(i): str(i)+'min' for (i) in range( 0, int(df.time_diff_in_hour.max()), 120)},
        allowCross=False
    )
])


@app.callback(
    Output('hogwarts-graph', 'figure'),
    [Input('dropdown', 'value')
    ,Input('time-range-slider', 'value')
    ]
)
def update_figure(selected_house, time_range):
    house = selected_house
    left_limit, right_limit = time_range[0], time_range[1]
    
    less_than = df[df['house'] == house].query(
        'time_diff_in_hour < {} '.format(left_limit)).groupby(
        'action')[['user_id','action_timestamp']].count()

    more_than = df[df['house'] == house].query(
        'time_diff_in_hour > {} '.format(right_limit)).groupby(
        'action')[['user_id','action_timestamp']].count()

    less_than['percent'] = (less_than['user_id'] / less_than['user_id'].sum()) * 100

    more_than['percent'] = (more_than['user_id'] / more_than['user_id'].sum()) * 100
    
    fig4 = go.Figure(
        data=[
            go.Bar(
                name="< {} min".format(left_limit),
                x=less_than.index,
                y=less_than["percent"],
                offsetgroup=0,
            ),
            go.Bar(
                name="> {} min".format(right_limit),
                x=more_than.index,
                y=more_than['percent'],
                offsetgroup=1,
            ),
        ],
        layout=go.Layout(
            title="Actions as percentage of total actions, for two groups. House is {}. Num of actions A:{}, B:{} ".format(
                house
                ,less_than.user_id.sum()
                ,more_than.user_id.sum() 
                ),
            yaxis_title="Actions in percentage"
        )
    )
    fig4.update_xaxes(
        tickangle = 45,
        title_text = "Actions")

    return fig4


app