In [None]:
# BadukPop Analytics Dashboard

# setup
import time, os, calendar, sys
import envkey
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
%matplotlib inline
from pandas.plotting import register_matplotlib_converters
import seaborn as sns
from sqlalchemy.engine import create_engine

# allow importing modules from ../..
sys.path.insert(1, os.path.join(sys.path[0], '../..'))

register_matplotlib_converters()
sns.set(rc={'figure.figsize':(11, 4)})

ANALYTICS_MYSQL_ENDPOINT = os.getenv('ANALYTICS_MYSQL_ENDPOINT')
ANALYTICS_MYSQL_PASSWORD = os.getenv('ANALYTICS_MYSQL_PASSWORD')

if not (ANALYTICS_MYSQL_ENDPOINT and ANALYTICS_MYSQL_PASSWORD):
    raise Exception('ANALYTICS_MYSQL credentials not found')

engine = create_engine(f'mysql://analytics1:{ANALYTICS_MYSQL_PASSWORD}@{ANALYTICS_MYSQL_ENDPOINT}/tr_upcache', connect_args={'connect_timeout': 10})

def process_daily_time_series(df, pivot_columns = None, pivot_values = None):
    # massage dataframe containing raw integer "ts" timestamps to clean daily time series
    df.index = pd.DatetimeIndex(df['ts']).floor('D') # attach explicitly daily time index
    del df['ts'] # remove raw timestamps
    if not pivot_columns:
        # plain scalar time series
        return df.asfreq('D').fillna(0)
    elif isinstance(pivot_columns, str):
        # single pivot/groupby dimension
        df = df.pivot(columns=pivot_columns, values=pivot_values)
        return df.asfreq('D').fillna(0)
    elif isinstance(pivot_columns, list):
        df = pd.pivot_table(df, index=[df.index,] + pivot_columns, values=pivot_values, aggfunc=np.sum, fill_value=0)
        #print(df) #print(ret[p])
        return df

def process_hourly_time_series(df):
    df.index = pd.DatetimeIndex(df['ts']).floor('H') # attach explicit hourly time index
    del df['ts'] # remove raw timestamps
    return df.asfreq('H').fillna(0)

def process_minutely_time_series(df, pivot_columns = None, pivot_values = None):
    df.index = pd.DatetimeIndex(df['ts']).floor('min') # attach explicit minutely time index
    del df['ts'] # remove raw timestamps
    df = pd.pivot_table(df, index=[df.index,] + pivot_columns, values=pivot_values, aggfunc=np.sum, fill_value=0)
    return df

FRAME_PLATFORM_LIST = ('fb','kg','k2','bh')
COUNTRY_TIER_LIST=('1','2','3','4')

time_now = int(time.time())
print(f"Dashboard updated %s" % time.strftime('%a, %d %b %Y at %H:%M:%S UTC', time.gmtime()))

In [None]:
# DAU by country, platform, and join month over last N days
DAU_DAYS = 60
dau = process_daily_time_series(pd.read_sql(f'''
select from_unixtime(day) as ts,
       frame_platform,
       CONVERT(country_tier, char) AS country_tier,
       sum(dau) as dau
from tr_sessions_daily_summary
where day >= UNIX_TIMESTAMP() - {DAU_DAYS}*86400
group by 1, 2, 3 order by 1 asc, 2 asc, 3 asc''', engine), pivot_columns=['frame_platform', 'country_tier'], pivot_values='dau')

In [None]:
print(dau)

In [None]:
# DAU plots
dau_by_none = dau.groupby(level=('ts',)).sum()
dau_y_max = max(dau_by_none['dau']) * 1.2

fig, (dau_axis, dau_by_platform_axis, dau_by_country_axis) = plt.subplots(1, 3, figsize = (15, 2*6))

# hide unneeded graph spaces
fig.autofmt_xdate() # put the labels at 45deg since they tend to be too long

dau_axis.bar(dau_by_none.index, dau_by_none['dau'])
dau_axis.set_title(f'DAU (last {DAU_DAYS} days)')
dau_axis.set_ylim([0, dau_y_max]);
dau_axis.xaxis.set_major_formatter(mdates.DateFormatter('%b %d'));

dau_by_platform = dau.groupby(level=('ts','frame_platform')).sum().unstack(fill_value=0).droplevel(0, axis=1)
for os_name in FRAME_PLATFORM_LIST:
    if os_name not in dau_by_platform:
        dau_by_platform[os_name] = 0
dau_by_platform_plots = [dau_by_platform_axis.bar(dau_by_platform.index,
                                              dau_by_platform[os_name],
                                              bottom=sum(dau_by_platform[FRAME_PLATFORM_LIST[j]] for j in range(0, i))) 
                   for i, os_name in enumerate(FRAME_PLATFORM_LIST) if os_name != 'Unknown']
dau_by_platform_axis.legend(dau_by_platform_plots, FRAME_PLATFORM_LIST, loc = 'upper left')
dau_by_platform_axis.set_title(f'DAU by Frame Platform (last {DAU_DAYS} days)')
dau_by_platform_axis.set_ylim([0, dau_y_max]);
dau_by_platform_axis.xaxis.set_major_formatter(mdates.DateFormatter('%b %d'));

dau_by_country = dau.groupby(level=('ts','country_tier')).sum().unstack(fill_value=0).droplevel(0, axis=1)
dau_by_country_plots = [dau_by_country_axis.bar(dau_by_country.index,
                                                dau_by_country[country],
                                                bottom=sum(dau_by_country[COUNTRY_TIER_LIST[j]] for j in range(0, i))) 
                   for i, country in enumerate(COUNTRY_TIER_LIST)]
dau_by_country_axis.set_title(f'DAU by Country Tier (last {DAU_DAYS} days)')
dau_by_country_axis.set_ylim([0, dau_y_max]);
dau_by_country_axis.legend(dau_by_country_plots, COUNTRY_TIER_LIST, loc = 'upper left', bbox_to_anchor=(1, 1))
dau_by_country_axis.xaxis.set_major_formatter(mdates.DateFormatter('%b %d'));