# Read and format data

In [1]:
%pylab inline
import pandas as pd
import numpy as np
import re

def wraplines(txt, width=64):
    output = "" 
    tmp = ""
    for i in txt.split():
        if len(tmp) + len(i) < width:
            tmp += " " + i
        else:
            output += tmp.strip() + "<br>"
            tmp = i
    output += tmp.strip() # if text is shorter than width just return 
    return output

# nan values flavour: NA is the country code for Namibia
df = pd.read_excel('InternetUsageByCountry.xlsx', sheet_name='Data', 
                   na_values=['1.#IND', '1.#QNAN', 'N/A', 'NULL', 'NaN', 'n/a', 'nan', 'null', ''], keep_default_na=False)
df = df.fillna(0)
df.loc[:, 'Story'] = df.loc[:, 'Story'].replace(0, 'This is a country. And it\'s so much we can tell about it.')
df.loc[:, 'Story'] = "<i>A story from 2017:</i> " + df.loc[:, 'Story']
df.loc[:, 'Story'] = df.loc[:, 'Story'].apply(wraplines, width=64)

# generate data for logscaling
df['PopulationLog'] = df['Population'].add(1).apply(np.log)

# generate negative values
df['NotInternetUsers'] = df.loc[:, 'Population'].subtract(df.loc[:, 'InternetUsers'])
df['NotInternetPenetration'] = df.loc[:, 'NotInternetUsers'].divide(df.loc[:, 'Population'])
df['NotInternetUsers'] = df.loc[:, 'NotInternetUsers'] .clip(0,1e12)
df['NotInternetPenetration'] = df.loc[:, 'NotInternetPenetration'] .clip(0,1)
df.loc[:, 'NotInternetPenetration'] = df.loc[:, 'NotInternetPenetration'].fillna(0)

# read country codes because plotly can only use the ALPHA-3 codes
country_codes = pd.read_excel('InternetUsageByCountry.xlsx', sheet_name='Coding',
                   na_values=['1.#IND', '1.#QNAN', 'N/A', 'NULL', 'NaN', 'n/a', 'nan', 'null'], keep_default_na=False)

df = df.merge(country_codes, left_on='Symbol', right_on='ISO "ALPHA-2 Code')

Populating the interactive namespace from numpy and matplotlib


# Visualization

In [2]:
import plotly.plotly as py
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from copy import deepcopy

init_notebook_mode(connected=True)

# Some frequently used parameters
my_font_style = 'Raleway' 
background_color = 'rgb(52,51,50)'
font_color = 'rgb(230,222,216)'
ocean_color = 'rgba(250,250,255,1)'  
scaling_color = 'rgba(255,50,22,1)'  

default_parameters = dict(
    type='choropleth',
    showlegend=True,
    showscale=True,
    locations=df['ISO ALPHA-3 Code'],
    zauto=False,
    zmin=0,
    reversescale=False,
    colorbar=dict(
        autotick=False,
        thickness=20,
        len=.5,
        titleside='top',
        titlefont=dict(family=my_font_style, 
                       size=14),
        tickmode='array',
        tickfont=dict(family=my_font_style, 
                      size=12),
        ),
    )

data1 = [deepcopy(default_parameters)]
data1[0]['z'] = df['InternetPenetration']  # .apply(np.log),
data1[0]['colorscale'] = [[0, ocean_color], [1, scaling_color]] # ocean color because opacity was not supported in array format
data1[0]['zmax'] = 1
data1[0]['text'] = '<b>' + df['Country'] \
    + '</b><br><i>Internet Penetration:</i> ' + df['InternetPenetration'
        ].multiply(100).round(2).astype(str) \
    + '%<br><i>Total Population:</i> ' + df['Population'
        ].div(1e6).astype(int).astype(str) + ' millions<br>' + df.loc[:
        , 'Story']
data1[0]['marker'] = dict(line=dict(color=df['InternetPenetration'
                          ].apply(lambda x: \
                          'rgba(0,0,0,{})'.format(x)), 
                                    width=0.6))  
data1[0]['colorbar']['y'] = .5
data1[0]['colorbar']['title'] = 'Internet penetration'
data1[0]['colorbar']['titlefont'] = dict(family=my_font_style,
                                         color=font_color)
data1[0]['colorbar']['tickfont'] = dict(family=my_font_style,
                                        color=font_color)
data1[0]['colorbar']['tickvals'] = [0.05, 0.95]
data1[0]['colorbar']['ticktext'] = ['<1%', '100%']
data1[0]['hoverinfo'] = 'text'

layout = \
    dict(title='The invisible countries of no data<br>\
Source: <a href="http://www.internetworldstats.com/stats.htm">\
Internet world stats</a>', 
         titlefont=dict(family=my_font_style, 
                        size=26,
                        color=font_color), 
         paper_bgcolor=background_color, 
         geo=dict(  showframe=False,
                    showcoastlines=False,
                    showland=True,
                    landcolor=ocean_color,
                    showocean=True,
                    oceancolor=ocean_color,
                    bgcolor=background_color,
                    projection=dict(type='mollweide'),
                    lonaxis=dict(showgrid=True, gridcolor='grey', gridwidth=0.05),
                    lataxis=dict(showgrid=True, gridcolor='grey', gridwidth=0.05),
                    )
                 )

fig = dict(data=data1, layout=layout)
iplot(fig, validate=False)

Final formatting touches were done in Plotly online (eg. fonts, responsive design)