# Objectiv modeling demo

## Getting started
The open model hub package can be installed with `pip install objectiv-modelhub` (this installs Bach as well).  

In [None]:
import os
os.environ["OBJECTIV_VERSION_CHECK_DISABLE"] = "true"

In [None]:
# import & instantiate the model hub, set the default time aggregation to daily
from modelhub import ModelHub
modelhub = ModelHub(time_aggregation='%Y-%m-%d')

In [None]:
# connect to the SQL database
df = modelhub.get_objectiv_dataframe(db_url='postgresql://@localhost:5432/objectiv',
                                     start_date='2022-07-01',
                                     end_date='2022-08-17',
                                     identity_resolution=None,
                                     table_name='data_clean')

## Have a look at the data

In [None]:
df.sort_values(['session_id', 'session_hit_number'], ascending=False).head()

In [None]:
# extract some data into colums that we need later 
df['application'] = df.global_contexts.gc.application
df['root_location'] = df.location_stack.ls.get_from_context_with_type_series(type='RootLocationContext', key='id')
df['utm_source'] = df.global_contexts.gc.get_from_context_with_type_series(type='MarketingContext', key='source')
df['utm_medium'] = df.global_contexts.gc.get_from_context_with_type_series(type='MarketingContext', key='medium')
df['utm_campaign'] = df.global_contexts.gc.get_from_context_with_type_series(type='MarketingContext', key='campaign')

## A first, super simple model, unique users

In [None]:
daily_users = modelhub.aggregate.unique_users(df)
daily_users.to_frame().sort_index(ascending=False).head()

## Little more advanced, retention modeling

In [None]:
retention_matrix = modelhub.aggregate.retention_matrix(df, time_period='weekly', percentage=True, display=True)
retention_matrix.head()

## What are the top used product features?

In [None]:
top_product_features = modelhub.aggregate.top_product_features(df)
top_product_features.head(10)

## How much time do users spend per main product section?

In [None]:
# model hub: duration, monthly average per root location
duration_root_month = modelhub.aggregate.session_duration(df, groupby=['application', 'root_location', modelhub.time_agg(df, '%Y-%m')]).sort_index()


#duration_root_month[session_seconds] = duration_root_month['session_duration'].dt.total_seconds
duration_root_month = duration_root_month.to_frame()
duration_root_month['session_seconds'] = duration_root_month['session_duration'].dt.total_seconds
duration_root_month.head()

## Defining a conversion

In [None]:
df['github_press'] = df.location_stack.json[{'id': 'modeling', '_type': 'RootLocationContext'}:]

# define which events to use as conversion events
modelhub.add_conversion_event(location_stack=df.github_press, name='github_press')

df['is_conversion_event'] = modelhub.map.is_conversion_event(df, 'github_press')

## What is our daily conversation rate?

In [None]:
conversions = modelhub.aggregate.unique_users(df[df.is_conversion_event])
conversion_rate = conversions / daily_users

conversion_rate.to_frame().sort_index(ascending=False).head()

## How fast do users convert?

In [None]:
# label sessions with a conversion
df['converted_users'] = modelhub.map.conversions_counter(df, name='github_press') >= 1

# label hits where at that point in time, there are 0 conversions in the session
df['zero_conversions_at_moment'] = modelhub.map.conversions_in_time(df, 'github_press') == 0

# filter on above created labels
converted_users = df[(df.converted_users & df.zero_conversions_at_moment)]

# how much time do users spend before they convert?
time_before_conversion = modelhub.aggregate.session_duration(converted_users, groupby=modelhub.time_agg(converted_users, '%Y-%W'))
time_before_conversion = time_before_conversion.to_frame()
time_before_conversion['session_seconds'] = time_before_conversion['session_duration'].dt.total_seconds

time_before_conversion.head(50)

## Discovering conversion funnels

In [None]:
# instantiate the FunnelDiscovery model from the open model hub and apply settings
funnel = modelhub.get_funnel_discovery()
max_steps = 4

df_steps_till_conversion = funnel.get_navigation_paths(df, steps=max_steps, by='user_id', add_conversion_step_column=True, only_converted_paths=True)


condition_convert_on_step_4 = df_steps_till_conversion['_first_conversion_step_number'] == max_steps

funnel.plot_sankey_diagram(df_steps_till_conversion[condition_convert_on_step_4], n_top_examples=3)

## Most uses product features before conversion

In [None]:
# show the top product features prior to convertion 
top_features_before_conversion = modelhub.agg.top_product_features_before_conversion(df, name='github_press')

top_features_before_conversion.head()

## Which users are most likely to convert?

In [None]:
# only look at press events and count the root locations
features = df[(df.event_type=='PressEvent')].groupby('user_id').root_location.value_counts()

# unstack the series, to create a DataFrame with the number of clicks per root location as columns
features_unstacked = features.unstack(fill_value=0)

y_column = 'tracking'
y = features_unstacked[y_column] > 0
X = features_unstacked.drop(columns=[y_column])

lr = modelhub.get_logistic_regression(fit_intercept=False)
lr.fit(X, y)

features_unstacked['predicted_values'] = lr.predict_proba(X)
features_unstacked['predicted_labels'] = lr.predict(X)

results = features_unstacked[['predicted_values', 'predicted_labels']]
results.sort_values('predicted_values', ascending=False).head(10)

# How many users do we have per marketing campaign?

In [None]:
# users by marketing campaign
campaign_users = modelhub.agg.unique_users(df, groupby=['utm_source', 'utm_medium', 'utm_campaign'])
campaign_users = campaign_users.reset_index().dropna(axis=0, how='any', subset='utm_source')

campaign_users.sort_values('utm_source', ascending=True).head()

## Get the SQL for any analysis

In [None]:
# just one analysis as an example, this works for anything you do with Objectiv
from bach import display_sql_as_markdown

display_sql_as_markdown(time_before_conversion)