In [None]:
import pandas as pd
import json
import plotly as py
# Set notebook mode to work in offline
py.offline.init_notebook_mode()
import plotly.graph_objects as go
import ipywidgets
import matplotlib

In [None]:
with open('homebrew_analytics.json') as f:
    data = json.load(f)
    
df = pd.io.json.json_normalize(data)
df.head(3)

In [None]:
df_top10 = df.sort_values(by='installs.30d', ascending=False)[:10]

fig = go.Figure(data=[
    go.Bar(
        x=df_top10['name'], 
        y=df_top10['installs.30d'],
        text = df_top10['installs.30d'],
        textposition='outside'
    )
])
fig.update_layout(
    title = 'Installs for 30 days', 
    yaxis=dict(
        title = 'Downloads'
    ),
)
fig.show()

In [None]:
df_top10 = df.sort_values(by='installs_on_request.365d', ascending=False)[:10]

fig = go.Figure(data=[
    go.Bar(
        name='30 days',
        x=df_top10['name'],
        y=df_top10['installs_on_request.30d'],
        hovertext = df_top10['installs_on_request.30d'],
        hoverinfo='text'
    ),
    go.Bar(
        name='90 days',
        x=df_top10['name'],
        y=df_top10['installs_on_request.90d']-df_top10['installs_on_request.30d'],
        hovertext = df_top10['installs_on_request.90d'],
        hoverinfo='text'
    ),
    go.Bar(
        name='365 days',
        x=df_top10['name'],
        y=df_top10['installs_on_request.365d']-df_top10['installs_on_request.90d'],
        hovertext = df_top10['installs_on_request.365d'],
        hoverinfo='text'
    )
])

fig.update_layout(
    title='Most Popular installs on request', 
    barmode='stack',
    yaxis=dict(
        title='Number of installs'
    )
)
fig.show()


In [None]:
def make_pie_chart(labels, values, title=None):
    fig = go.Figure(data=[
        go.Pie(
            labels = labels,
            values = values,
            textinfo = 'label+percent',
        )
    ])
    fig.update_layout(title=title)
    fig.show()
    
    
df_top = df.sort_values(by='installs_on_request.365d', ascending=False)[:20]
make_pie_chart(df_top['name'], 
               df_top['installs_on_request.365d'], 
               title='Top 20 Requested Installs on request for the Last 365 Days')

In [None]:
df_video_packages = df[df['desc'].str.contains('video')]

make_pie_chart(
    df_video_packages['name'], 
    df_video_packages['installs.365d'], 
    title='Video Package Installs for the Last 365 Days'
)

In [None]:
total_video_package_downloads = sum(df_video_packages['installs.365d'])

df_video_simplified = pd.DataFrame(columns=df_video_packages.columns)
df_video_simplified.loc[0]=['other','n/a', 0,0,0, 0,0,0]

for i, row in df_video_packages.iterrows():
    if row['installs.365d']/total_video_package_downloads < .01:
        df_video_simplified.loc[0]['installs_on_request.30d']  += row['installs_on_request.30d']
        df_video_simplified.loc[0]['installs_on_request.90d']  += row['installs_on_request.90d']
        df_video_simplified.loc[0]['installs_on_request.365d'] += row['installs_on_request.365d']

        df_video_simplified.loc[0]['installs.30d']  += row['installs.30d']
        df_video_simplified.loc[0]['installs.90d']  += row['installs.90d']
        df_video_simplified.loc[0]['installs.365d'] += row['installs.365d']

    else:
        df_video_simplified = df_video_simplified.append(row)
        
make_pie_chart(df_video_simplified['name'], 
               df_video_simplified['installs.365d'],
               title='Video Package Installs for the last 365 Days')
        