## Imports

In [1]:
import geopandas as gpd
import pandas as pd
import hvplot.pandas

## Load Datasets

In [2]:
country_codes = pd.read_csv('https://gist.githubusercontent.com/tadast/8827699/raw/7255fdfbf292c592b75cf5f7a19c16ea59735f74/countries_codes_and_coordinates.csv')
country_codes['Alpha-2 code'] = country_codes['Alpha-2 code'].str.strip().str.strip('"')
country_codes['Alpha-3 code'] = country_codes['Alpha-3 code'].str.strip().str.strip('"')

codes = pd.DataFrame({
    'iso_a2': country_codes['Alpha-2 code'].str.strip().str.strip('"'),
    'iso_a3': country_codes['Alpha-3 code'].str.strip().str.strip('"')
})
codes.head()

Unnamed: 0,iso_a2,iso_a3
0,AF,AFG
1,AL,ALB
2,DZ,DZA
3,AS,ASM
4,AD,AND


In [3]:
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
world = world.merge(codes)
world.head()

Unnamed: 0,pop_est,continent,name,iso_a3,gdp_md_est,geometry,iso_a2
0,28400000.0,Asia,Afghanistan,AFG,22270.0,"POLYGON ((61.21081709172574 35.65007233330923,...",AF
1,12799293.0,Africa,Angola,AGO,110300.0,(POLYGON ((16.32652835456705 -5.87747039146621...,AO
2,3639453.0,Europe,Albania,ALB,21810.0,"POLYGON ((20.59024743010491 41.85540416113361,...",AL
3,4798491.0,Asia,United Arab Emirates,ARE,184300.0,"POLYGON ((51.57951867046327 24.24549713795111,...",AE
4,40913584.0,South America,Argentina,ARG,573900.0,(POLYGON ((-65.50000000000003 -55.199999999999...,AR


In [4]:
downloads = pd.read_csv('data/libraries_tidy.csv')
downloads['download_month'] = pd.to_datetime(downloads.download_month)
downloads['downloads'] = downloads['downloads'].astype('int')
downloads = downloads.sort_values('download_month')
downloads['py3'] = downloads.python_version == 3
downloads.head()

Unnamed: 0,library,download_month,country,system_name,python_version,downloads,py3
38363,bokeh,2017-01-01,IQ,Windows,2,0,False
23587,seaborn,2017-01-01,KZ,Linux,3,0,True
23576,seaborn,2017-01-01,SV,Windows,3,0,True
23563,seaborn,2017-01-01,AL,Windows,3,1,True
44506,seaborn,2017-01-01,SA,Windows,2,4,False


## Libraries over time

In [5]:
# Total downloads
lib_over_time = downloads.groupby(['download_month', 'library'])[['downloads']].sum()
lib_over_time.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,downloads
download_month,library,Unnamed: 2_level_1
2017-01-01,bokeh,23576
2017-01-01,plotly,77620
2017-01-01,seaborn,79949
2017-02-01,bokeh,24446
2017-02-01,plotly,79135


In [6]:
# Fraction of Python 3
version_downloads = downloads.groupby(['download_month', 'library', 'py3'])[['downloads']].sum()
version_fractions = (
    version_downloads
    .groupby(['download_month', 'library'])
    .sum()
    .rename({'downloads': 'total_downloads'}, axis=1)
    .merge(version_downloads.reset_index(['py3']), on=['download_month', 'library'])
    .query('py3 == True')
)
lib_over_time['python3_percentage'] = (version_fractions.downloads / version_fractions.total_downloads)*100

In [7]:
# Fraction of platforms
system_downloads = downloads.groupby(['download_month', 'library', 'system_name'])[['downloads']].sum()

system_fractions = (
    system_downloads
    .groupby(['download_month', 'library'])
    .sum()
    .rename({'downloads': 'total_downloads'}, axis=1)
    .merge(system_downloads.reset_index(['system_name']), on=['download_month', 'library'])
)

for name in ['Windows', 'Darwin', 'Linux']:
    fractions = system_fractions.query(f'system_name == "{name}"')
    lib_over_time[f'{name}_percentage'] = (fractions.downloads / fractions.total_downloads)*100

In [8]:
col_to_title = {
    'downloads': 'Total Downloads',
    'python3_percentage': 'Python 3 Percentage',
    'Linux_percentage': 'Linux Percentage',
    'Windows_percentage': 'Windows Percentage',
    'Darwin_percentage': 'MacOS Percentage'
}
title_to_col = {v: k for k, v in col_to_title.items()}

In [9]:
import panel as pn
from panel.interact import interact
from panel import widgets

In [10]:
@interact(column=widgets.Select(options=title_to_col))
def lib_over_time_plot(column):
    return (lib_over_time.hvplot(x='download_month',
                                 y=column,
                                 by='library',
                                 title=col_to_title[column])
            .options(legend_position='bottom'))

In [11]:
#lib_over_time_plot

# World
All time downloads per library

In [12]:
world_downloads = world.merge(downloads.groupby(['country', 'library'], as_index=False).sum(), left_on='iso_a2', right_on='country')
world_downloads.drop([col for col in world_downloads if col not in ['downloads', 'country', 'library', 'geometry']], axis=1, inplace=True)

In [13]:
@interact(library=widgets.Select(options=['plotly', 'bokeh', 'seaborn']))
def world_per_library_plot(library):
    return (world_downloads[world_downloads.library == library]
     .hvplot(c='downloads', logz=True))

## Platforms in a time window

In [14]:
start=downloads.download_month.min()
end=downloads.download_month.max()
slider = pn.widgets.DateRangeSlider(
    start=start, end=end, value=(start, end))

In [15]:
@interact(time_range=slider)
def system_downloads_in_range_plot(time_range):
    start_stamp, end_stamp = time_range
    tmp_frame = (system_downloads.reset_index('library')
     .loc[start_stamp:end_stamp]
     .groupby('system_name').downloads.sum()
     .sort_values(ascending=False).reset_index()
    )
    tmp_frame.iloc[3:, 0] = 'Other'
    return (tmp_frame.groupby('system_name')
            .downloads.sum()
            .reindex(['Other', 'Windows', 'Darwin', 'Linux'])
            .hvplot.bar(title='System Downloads'))

In [16]:
#world_per_library_plot

## Logo

In [17]:
logo = pn.pane.HTML('<img src="https://pypi.org/static/images/logo-large.72ad8bf1.svg"></img>',
                    height=200, width=400)

## Description

In [18]:
description = pn.pane.Markdown("""
### Description
A dashboard for exploring [PyPI](https://pypi.org/)
download rates for the [plotly](https://github.com/plotly/plotly.py),
[bokeh](https://github.com/bokeh/bokeh),
and [seaborn](https://github.com/mwaskom/seaborn) data visualization libraries.


Constructed on top of [Panel](https://github.com/pyviz/panel) and
[hvplot](https://github.com/pyviz/hvplot), which are in turn built on
[Holoviews](https://github.com/ioam/holoviews) and [Bokeh](https://github.com/bokeh/bokeh).
""", height=200, width = 200, sizing_mode='scale_width')

## Dashboard

In [19]:
dashboard = pn.layout.Row(pn.layout.Column(logo, description),
              pn.layout.Column(lib_over_time_plot,
                               world_per_library_plot,
                               system_downloads_in_range_plot))
dashboard.servable()