# Objectiv modeling demo

## Getting started
The open model hub package can be installed with `pip install objectiv-modelhub` (this installs Bach as well).  

In [None]:
# import the required packages for this notebook
from modelhub import ModelHub
from bach import display_sql_as_markdown

In [None]:
# instantiate the model hub and set the default time aggregation to daily
modelhub = ModelHub(time_aggregation='%Y-%m-%d')

In [None]:
# get the Bach DataFrame with Objectiv data
df = modelhub.get_objectiv_dataframe(db_url='postgresql://@localhost:5432/objectiv',
start_date='2022-05-01',
table_name='data_clean')

## Have a look at the data

In [None]:
df.sort_values(['session_id', 'session_hit_number'], ascending=False).head()

In [None]:
# extract some data into colums that we need later 
df['application'] = df.global_contexts.gc.application
df['root_location'] = df.location_stack.ls.get_from_context_with_type_series(type='RootLocationContext', key='id')

## A first, super simple model, unique users

In [None]:
daily_users = modelhub.aggregate.unique_users(df)
daily_users.sort_index(ascending=False).head(10)

## Little more advanced, retention modeling

In [None]:
retention_matrix = modelhub.aggregate.retention_matrix(df, time_period='monthly', percentage=True, display=True)
retention_matrix.head()

## What are the top used product features?

In [None]:
top_product_features = modelhub.aggregate.top_product_features(df)
top_product_features.head(10)

## How much time do users spend per main product section?

In [None]:
# model hub: duration, monthly average per root location
duration_root_month = modelhub.aggregate.session_duration(df, groupby=['application', 'root_location', modelhub.time_agg(df, '%Y-%m')]).sort_index()
duration_root_month.head(20)

## Defining a conversion

In [None]:
# in this example, anyone who goes on to read the documentation
df['is_conversion_event'] = False
df.loc[df['application'] == 'objectiv-docs', 'is_conversion_event'] = True

## Running the Funnel Discovery model

In [None]:
# instantiate the FunnelDiscovery model from the open model hub
funnel = modelhub.get_funnel_discovery()
# set the maximum n steps
max_steps = 4

In [None]:
df_steps_till_conversion = funnel.get_navigation_paths(df, steps=max_steps, by='user_id', add_conversion_step_column=True, only_converted_paths=True)

In [None]:
condition_convert_on_step_4 = df_steps_till_conversion['_first_conversion_step_number'] == 4

In [None]:
funnel.plot_sankey_diagram(df_steps_till_conversion[condition_convert_on_step_4], n_top_examples=5)