# Set Up

In [1]:
import pandas as pd
import numpy as np
import socket
from dns import reversename, resolver

import pygeoip
import pycountry_convert as pycountry

import plotly
from plotly.offline import iplot, init_notebook_mode
import plotly.graph_objs as go
import plotly.figure_factory as ff
import plotly.io as pio

init_notebook_mode(connected=True)

pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', 50)

# Loading Data

In [2]:
downloads_req_df = pd.read_json('data/download_requests.json')
topics_req_df = pd.read_json('data/topic_requests.json')

ValueError: Expected object or value

In [None]:
topics_req_df.head()

In [None]:
downloads_req_df.head()

# Analysis of CSO downloads

In [None]:
versions = downloads_req_df.groupby('version')[['id']].nunique()

pie = go.Pie(values=versions.id,
              labels=versions.index,
              textposition='inside',
              name='Version',
              hoverinfo='label+value+name',
              hole=.4)

donut = go.Layout(title='CSO downloads version fragmentation')
fig = go.Figure(data=[pie], layout=donut)
plotly.offline.iplot(fig)

In [None]:
formats = downloads_req_df.groupby('format')[['id']].nunique()

pie = go.Pie(values=formats.id,
              labels=formats.index,
              textposition='inside',
              name='Format',
              hoverinfo='label+value+name',
              hole=.4)

donut = go.Layout(title='CSO downloads format fragmentation')
fig = go.Figure(data=[pie], layout=donut)
plotly.offline.iplot(fig)

In [None]:
download_data = downloads_req_df.groupby(['country'])['id'].count().reset_index()
downloads_map = [ dict(
        type = 'choropleth',
        locationmode = 'country names',
        locations = download_data['country'],
        z = download_data['id'],
        text = download_data['id'],
        autocolorscale = True,
        reversescale = False,
        marker = dict(
            line = dict (
                color = 'rgb(180,180,180)',
                width = 0.5
            ) ),
        colorbar = dict(
            title = '#downloads'),
      ) ]

layout = dict(
    title = 'Download distribution',
    geo = dict(
        showframe = False,
        showcoastlines = True,
        projection = dict(
            type = 'equirectangular'
        )
    )
)

fig = dict(data=downloads_map, layout=layout)
plotly.offline.iplot(fig, validate=False)

In [None]:
download_data.sort_values(by='id', ascending=False).head(n=10)

# Analysis of CSO topic requests

In [None]:
formats = topics_req_df.groupby('format')[['id']].nunique()

pie = go.Pie(values=formats.id,
              labels=formats.index,
              textposition='inside',
              name='Format',
              hoverinfo='label+value+name',
              hole=.4)

donut = go.Layout(title='CSO topic requests format fragmentation')
fig = go.Figure(data=[pie], layout=donut)
plotly.offline.iplot(fig)

In [None]:
hits = topics_req_df.groupby('exists')[['id']].nunique()

pie = go.Pie(values=hits.id,
              labels=hits.index,
              textposition='inside',
              name='Hit',
              hoverinfo='label+value+name',
              hole=.4)

donut = go.Layout(title='CSO topics requests hits')
fig = go.Figure(data=[pie], layout=donut)
plotly.offline.iplot(fig)

In [None]:
requests_data = topics_req_df.groupby(['lat', 'lon'])['id'].count().reset_index()
users_data = topics_req_df.groupby(['country'])['user'].nunique().reset_index()

user_map = dict(
        type = 'choropleth',
        locationmode = 'country names',
        locations = users_data['country'],
        z = users_data['user'],
        text = users_data['user'],
        autocolorscale = True,
        reversescale = False,
        marker = dict(
            line = dict (
                color = 'rgb(180,180,180)',
                width = 0.5
            ) ),
        colorbar = dict(
            title = '#unique users'),
      )

bubbles = dict(
    type = 'scattergeo',
    lon = requests_data['lon'],
    lat = requests_data['lat'],
    text = requests_data['id'],
    marker = dict(
        size = requests_data['id']/10,
        line = dict(width=0.5, color='rgb(40,40,40)'),
        sizemode = 'area'
    ))

layout = dict(
        title = 'Unique user and topic requests',
        showlegend = False,
        geo = dict(
        showframe = False,
        showcoastlines = True,
        projection = dict(
            type = 'equirectangular'
        )
    )
    )

fig = dict(data=[user_map, bubbles], layout=layout)
plotly.offline.iplot(fig, validate=False)

In [None]:
bars_data = topics_req_df[topics_req_df['exists'] == 1]
bars_data = bars_data.groupby('topic')[['id']].count().sort_values(by='id', ascending=False).reset_index()
bars_data = bars_data[0:50]

trace0 = go.Bar(
    x = bars_data['topic'],
    y = bars_data['id'])

layout = go.Layout(title='Top-50 topics (hits)',
                   xaxis=dict(tickangle=-45,
                             automargin=True,
                             tickfont=dict(size=9)),
                   yaxis=dict(title='# requests',
                              type='log'))
                   
fig = go.Figure(data=[trace0], layout=layout)
plotly.offline.iplot(fig)

In [None]:
bars_data = topics_req_df[topics_req_df.exists == 0].groupby('topic')[['id']].count().sort_values(by='id', ascending=False).reset_index()
bars_data = bars_data[0:50]

trace0 = go.Bar(
    x = bars_data['topic'],
    y = bars_data['id'],
    marker = dict(color='#cf0a5f'))

layout = go.Layout(title='Top-50 requested but non existing topics (hits)',
                   xaxis=dict(tickangle=-45,
                             automargin=True,
                             tickfont=dict(size=9)),
                   yaxis=dict(title='# requests',
                              type='log'))

fig = go.Figure(data=[trace0], layout=layout)
plotly.offline.iplot(fig)