# OCTOPUS METRIC

Consists of:
1. Monthly Revenue
2. Monthly Active Users
3. Monthly Order Count
4. Average Revenue
5. New Customer Ratio

In [1]:
import numpy as np
import pandas as pd
from datetime import datetime
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (20, 12)

from collections import Counter
from PIL import Image

from termcolor import colored
from IPython.display import display, HTML

import plotly.express as px
import plotly.graph_objects as go
import plotly.offline as pyoff
import plotly.io as pio
pyoff.init_notebook_mode(connected=True)

In [2]:
# Read the data

%store -r df
%store -r df_completed

## MONTHLY REVENUE

In [3]:
# Assumption : Total Revenue Will be Approximated by Total Quantity, Total Amount and Total Weight

df_revenue = df_completed.groupby('month_year')[['total_amount','total_weight']].sum().reset_index()
df_revenue.head()

Unnamed: 0,month_year,total_amount,total_weight
0,2020-09,895.0,11603.53
1,2020-10,830.0,23551.6
2,2020-11,5385.0,81533.508
3,2020-12,6585.0,30487.397
4,2021-01,2232.0,16047.497


In [4]:
def monthly_revenue(data,var,title):
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=data['month_year'], y=data[var], mode='lines+markers', name=var))
    fig.update_yaxes(title_text=var)
    fig.update_layout(title=title)
    fig.show()

In [5]:
#monthly_revenue(df_revenue,'total_quantity', 'Monthly Revenue Based on Total Quantity')

In [6]:
monthly_revenue(df_revenue,'total_amount', 'Monthly Revenue Based on Total Amount')

In [7]:
monthly_revenue(df_revenue,'total_weight', 'Monthly Revenue Based on Total Weight')

From monthly revenue:
1. Total monthly revenue (probing by total amount) is quite fluctuative. There were sharp decline of total quantity by July 2021 and May 2022. But still, graph above depicts the upwards trend of total quantity in period of July of October 2022. The total amount declined in November (1/3 October.)
2. This clearly shows our total monthly revenue (probing by total amount and total weight) is exponentially growing on October 2022 onwards due to fest event that produced immense amount waste in October. However, although there is Brightspot event in early November, the total amount and weight still dropped. The cause might the aim of Brighstpot = gain new user.

In [8]:
# Monthly Revenue Growth

def growth(data):
    data['amount_growth'] = data['total_amount'].pct_change().mul(100)
    data['weight_growth'] = data['total_weight'].pct_change().mul(100)
    return data

df_growth = growth(df_revenue)
df_growth.head()

Unnamed: 0,month_year,total_amount,total_weight,amount_growth,weight_growth
0,2020-09,895.0,11603.53,,
1,2020-10,830.0,23551.6,-7.26257,102.969269
2,2020-11,5385.0,81533.508,548.795181,246.190951
3,2020-12,6585.0,30487.397,22.284123,-62.607525
4,2021-01,2232.0,16047.497,-66.104784,-47.363506


In [9]:
monthly_revenue(df_growth,'amount_growth', 'Monthly Revenue Growth Based on Total Amount (%)')

In [10]:
monthly_revenue(df_growth,'weight_growth', 'Monthly Revenue Growth Based on Total Amount (%)')

From monthly growth we get:
1. In October 2022, everything seems good, we saw 84% growth in total quantity. Moreover, the total amount and weight order also show 10 and 7.6 times growth respectively. (november is still incomplete).
2. Significant growth in October might due to Fest event.
3. The growth in November showed negative value.

## MONTHLY ACTIVE USERS

In [11]:
# Monthly active customer is the number of unique customers who made at least one purchase in a given month

monthly_active = df_completed.groupby('month_year')['user_id'].nunique().reset_index()

fig = px.bar(monthly_active, x='month_year', y='user_id', title='Monthly Active Customer')
fig.update_xaxes(title_text='Month')
fig.update_yaxes(title_text='Number of Active Customer')
fig.show()

* In October 2022, total active users increase from 877 to 2,495 users. 
* In other words there were 184% increase of total active customers on October, compare to September.
* This value declined in November.

## MONTHLY ORDER COUNT

In [12]:
monthly_order = df_completed.groupby('month_year')['order_id'].nunique().reset_index()

fig = px.bar(monthly_order, x='month_year', y='order_id', title='Monthly Order')
fig.update_xaxes(title_text='Month')
fig.update_yaxes(title_text='Number of Order')
fig.show()

* As expected, total order in October 2022 increase by 266% (compare to previous month.) 
* In November, there is 48% declined in total complete order created compare to October 2022.

## MONTHLY AVERAGE REVENUE

Monthly Average Revenue divided into:
1. Monthly Average Revenue per Order
2. Monthly Average Revenue per User

In [13]:
df_revenue = df_revenue.merge(monthly_active, on='month_year').merge(monthly_order, on='month_year')
df_revenue.head()

Unnamed: 0,month_year,total_amount,total_weight,amount_growth,weight_growth,user_id,order_id
0,2020-09,895.0,11603.53,,,5,7
1,2020-10,830.0,23551.6,-7.26257,102.969269,6,8
2,2020-11,5385.0,81533.508,548.795181,246.190951,19,33
3,2020-12,6585.0,30487.397,22.284123,-62.607525,22,37
4,2021-01,2232.0,16047.497,-66.104784,-47.363506,22,27


In [14]:
def avg_revenue(data):
    data['avg_amount_per_customer'] = data['total_amount']/data['user_id']
    data['avg_weight_per_customer'] = data['total_weight']/data['user_id']
    data['avg_amount_per_order'] = data['total_amount']/data['order_id']
    data['avg_weight_per_order'] = data['total_weight']/data['order_id']
    return data

df_revenue = avg_revenue(df_revenue)

In [15]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=df_revenue['month_year'], y=df_revenue['avg_amount_per_customer'], mode='lines+markers', name='Average Amount per Customer'))
fig.add_trace(go.Scatter(x=df_revenue['month_year'], y=df_revenue['avg_weight_per_customer'], mode='lines+markers', name='Average Weight per Customer'))
fig.update_layout(title='Average Quantity, Amount, and Weight per User')
fig.show()

In [16]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=df_revenue['month_year'], y=df_revenue['avg_amount_per_order'], mode='lines+markers', name='Average Amount per Order'))
fig.add_trace(go.Scatter(x=df_revenue['month_year'], y=df_revenue['avg_weight_per_order'], mode='lines+markers', name='Average Weight per Order'))
fig.update_layout(title='Average Quantity, Amount, and Weight per Order')
fig.show()

## MONTHLY NEW ACTIVE USER

In [17]:
# Assumption : new active user is those who make their first purchase in given month

df_min_order = df.groupby('user_id')['month_year'].min().reset_index()
df_min_order.columns = ['user_id','first_order_month']

# Merge to complete dataset

df_completed = df_completed.merge(df_min_order, on='user_id')

# Create new column to indicate whether the user is new or not

df_completed['user_type'] = np.where(df_completed['month_year'] > df_completed['first_order_month'],'Existing','New')
df_completed.head()

Unnamed: 0,order_id,user_id,timestamp,total_quantity,total_amount,total_weight,order_status,province,city,premium_status,day,day_name,month,year,month_year,first_order_month,user_type
0,f3e8a25c-c040-43af-94f4-0cd847d0b1e0,64836,2021-05-23 11:59:55.963158,10.0,25.0,2500.0,completed,Jawa Barat,Bandung,premium,23,Sunday,5,2021,2021-05,2021-05,New
1,86d49f24-8d60-48d5-835e-2904d666380b,64836,2021-06-06 11:13:15.346324,10.0,25.0,2500.0,completed,Jawa Barat,Bandung,premium,6,Sunday,6,2021,2021-06,2021-05,Existing
2,4e3e13ff-82f6-49f1-907a-33e24b752a34,64836,2021-06-15 13:22:06.002626,12.0,20.833333,3000.0,completed,Jawa Barat,Bandung,premium,15,Tuesday,6,2021,2021-06,2021-05,Existing
3,e2c7b716-2ad9-49ca-be86-1d96ff119a2c,64836,2021-06-01 12:30:47.031117,30.0,8.333333,7500.0,completed,Jawa Barat,Bandung,premium,1,Tuesday,6,2021,2021-06,2021-05,Existing
4,0f8838d9-29b0-4aa2-9b9f-e9419e944bbe,59156,2021-07-25 14:17:05.284055,103.0,44.0,2165.78,completed,Jawa Barat,Bandung,not premium,25,Sunday,7,2021,2021-07,2021-07,New


In [18]:
# Total Revenue Based on User Type

df_revenue_user = df_completed.groupby(['month_year','user_type'])[['total_quantity','total_amount','total_weight']].sum().reset_index()
df_revenue_user.head()

Unnamed: 0,month_year,user_type,total_quantity,total_amount,total_weight
0,2020-09,New,769.0,895.0,11603.53
1,2020-10,Existing,428.0,515.0,6476.423
2,2020-10,New,1127.0,315.0,17075.177
3,2020-11,Existing,3229.0,2185.0,49322.802
4,2020-11,New,2106.0,3200.0,32210.706


In [19]:
# Monthly Revenue Based on User Type

def monthly_revenue_user(data, var):
    fig = px.line(data, x='month_year', y=var, color='user_type', title='Monthly Revenue Based on User Type: '+var)
    fig.update_xaxes(title_text='Month')
    fig.update_yaxes(title_text=var)
    fig.show()

In [20]:
monthly_revenue_user(df_revenue_user,'total_amount')

In [21]:
monthly_revenue_user(df_revenue_user,'total_weight')

In [22]:
#  New User Ratio

df_new_user_ratio_n = df_completed[df_completed['user_type']=='New'].groupby('month_year')['user_id'].nunique().reset_index()
df_new_user_ratio_n.columns = ['month_year','new_user']
df_new_user_ratio_e = df_completed[df_completed['user_type']=='Existing'].groupby('month_year')['user_id'].nunique().reset_index()
df_new_user_ratio_e.columns = ['month_year','existing_user']
df_new_user_ratio = df_new_user_ratio_n.merge(df_new_user_ratio_e, on='month_year')
df_new_user_ratio['new_user_ratio'] = round(df_new_user_ratio['new_user']/(df_new_user_ratio['existing_user']+df_new_user_ratio['new_user'])*100,2)
df_new_user_ratio.head()

Unnamed: 0,month_year,new_user,existing_user,new_user_ratio
0,2020-10,3,3,50.0
1,2020-11,14,5,73.68
2,2020-12,15,7,68.18
3,2021-01,11,11,50.0
4,2021-02,175,20,89.74


In [23]:
# Print New User Ratio in 2022

df_new_user_ratio.tail(12)

Unnamed: 0,month_year,new_user,existing_user,new_user_ratio
15,2022-01,81,648,11.11
16,2022-02,69,604,10.25
17,2022-03,125,657,15.98
18,2022-04,104,690,13.1
19,2022-05,88,521,14.45
20,2022-06,179,392,31.35
21,2022-07,529,380,58.2
22,2022-08,574,616,48.24
23,2022-09,600,766,43.92
24,2022-10,531,954,35.76


In [24]:
fig = px.bar(df_new_user_ratio, x='month_year', y='new_user_ratio', title='New User Ratio')
fig.update_xaxes(title_text='Month')
fig.update_yaxes(title_text='New User Ratio')
fig.show()

In [25]:
%store df_completed
%store df_revenue
%store df_revenue_user
%store df_new_user_ratio
%store df

Stored 'df_completed' (DataFrame)
Stored 'df_revenue' (DataFrame)
Stored 'df_revenue_user' (DataFrame)
Stored 'df_new_user_ratio' (DataFrame)
Stored 'df' (DataFrame)
