Installing plotly

In [1]:
import numpy as np
import pandas as pd
import matplotlib
#needed because of "Cannot get window extent w/o renderer"
matplotlib.use('qt4agg')
import scipy
import seaborn as sns
sns.set_style('whitegrid')

import altair
from plotly import __version__
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams["patch.force_edgecolor"] = True
%store -r cleaned

import sys
!/opt/anaconda/bin/conda install --yes --prefix {sys.prefix} plotly

In [2]:
cleaned = cleaned.copy()
cleaned.head()
cleaned.to_csv("digital_divide.csv")

# First, a nice chloropleth map


In [3]:
map_df = cleaned[cleaned['date'] == 2016][['country', 'Code', 'internet_users']]
map_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,country,Code,internet_users
country,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Afghanistan,2016,Afghanistan,AFG,10.595726
Albania,2016,Albania,ALB,66.363445
Algeria,2016,Algeria,DZA,42.945527
Angola,2016,Angola,AGO,13.0
Antigua and Barbuda,2016,Antigua and Barbuda,ATG,73.0


In [4]:
original = cleaned.copy()
cleaned.dropna(inplace=True)
cleaned

Unnamed: 0_level_0,Unnamed: 1_level_0,country,date,Access Electricity,Cellular %,Fixed broadband %,GDP Growth (annual),GDP pcp PPP,Landline %,Secure Servers,Urbanisation,internet_users,Code,Region,cgroup
country,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Afghanistan,2016,Afghanistan,2016,84.137138,62.335417,2.539529,2.366712,1944.117005,0.329501,426.0,27.132,10.595726,AFG,South Asia,L
Afghanistan,2015,Afghanistan,2015,71.500000,58.420528,2.094764,1.310040,1926.357336,0.326056,74.0,26.703,8.260000,AFG,South Asia,L
Afghanistan,2014,Afghanistan,2014,89.500000,56.191333,0.457903,2.690522,1937.235365,0.311163,57.0,26.282,7.000000,AFG,South Asia,L
Afghanistan,2013,Afghanistan,2013,67.259552,52.966473,0.472714,3.900575,1913.160644,0.301809,46.0,25.871,5.900000,AFG,South Asia,L
Afghanistan,2012,Afghanistan,2012,69.100000,49.972753,0.488648,14.434741,1873.153946,0.293244,38.0,25.468,5.454545,AFG,South Asia,L
Afghanistan,2011,Afghanistan,2011,43.222019,46.444058,0.504712,6.113685,1660.739856,0.045472,16.0,25.074,5.000000,AFG,South Asia,L
Afghanistan,2010,Afghanistan,2010,42.700000,35.467766,0.520776,8.433290,1581.600836,0.057702,14.0,24.689,4.000000,AFG,South Asia,L
Afghanistan,2009,Afghanistan,2009,44.854885,37.494200,0.357088,21.020649,1482.098837,0.018319,0.0,24.313,3.550000,AFG,South Asia,L
Afghanistan,2003,Afghanistan,2003,11.751966,0.867120,0.000000,8.444163,922.829449,0.159117,0.0,22.237,0.087891,AFG,South Asia,L
Albania,2016,Albania,2016,100.000000,115.152265,910.277930,3.352159,11559.300844,8.496597,416.0,58.376,66.363445,ALB,Europe & Central Asia,UM


In [5]:
def configure_plotly_browser_state():
  import IPython
  display(IPython.core.display.HTML('''
        <script src="/static/components/requirejs/require.js"></script>
        <script>
          requirejs.config({
            paths: {
              base: '/static/base',
              plotly: 'https://cdn.plot.ly/plotly-latest.min.js?noext',
            },
          });
        </script>
        '''))

In [6]:
configure_plotly_browser_state()
data = [ 
    dict(
        type = 'choropleth', 
        locations = map_df['Code'], 
        z = map_df['internet_users'], 
        text = map_df['country'], 
        colorscale = "Viridis",
        autocolorscale = False, 
        reversescale = True, 
        marker = dict(
            line = dict(
                color = 'rgb(180, 180, 180)', 
                width = 0.5
            )), 
        colorbar=dict(
            autotick = False, 
            tickprefix = '%',
            title= "Internet Users"), 
        #colorscale='Viridis'
    )]
layout = dict(
    title = "2016 Internet Users in %, Source: World Bank",
    geo = dict(
    showframe=False, 
    showcoastlines = False,
    projection = dict(
        type= "Mercator"
    )
    )
)
fig = dict(data=data, layout = layout)
iplot(fig, validate=False, filename="test")

Now, I want to create an animated graph that goes through the different years

First, I'll use another example to get me familiar

In [7]:
import pandas as pd

init_notebook_mode(connected=True)

years = ['1996', '2001', '2006', '2011', '2016']
regions = []
for region in cleaned['Region']: 
    if region not in regions:
        regions.append(region)
# make figure
figure = {
    'data': [],
    'layout': {},
    'frames': []
}

# fill in most of layout
figure['layout']['xaxis'] = {'range': [-10, 20] , 'title': 'GDP Growth'}
figure['layout']['yaxis'] = {'range': [-10, 110], 
                             'title': 'Internet Users'}
figure['layout']['hovermode'] = 'closest'
figure['layout']['sliders'] = {
    'args': [
        'transition', {
            'duration': 400,
            'easing': 'cubic-in-out'
        }
    ],
    'initialValue': '1952',
    'plotlycommand': 'animate',
    'values': years,
    'visible': True
}
figure['layout']['updatemenus'] = [
    {
        'buttons': [
            {
                'args': [None, {'frame': {'duration': 500, 'redraw': False},
                         'fromcurrent': True, 'transition': {'duration': 300, 'easing': 'quadratic-in-out'}}],
                'label': 'Play',
                'method': 'animate'
            },
            {
                'args': [[None], {'frame': {'duration': 0, 'redraw': False}, 'mode': 'immediate',
                'transition': {'duration': 0}}],
                'label': 'Pause',
                'method': 'animate'
            }
        ],
        'direction': 'left',
        'pad': {'r': 10, 't': 87},
        'showactive': False,
        'type': 'buttons',
        'x': 0.1,
        'xanchor': 'right',
        'y': 0,
        'yanchor': 'top'
    }
]

sliders_dict = {
    'active': 0,
    'yanchor': 'top',
    'xanchor': 'left',
    'currentvalue': {
        'font': {'size': 20},
        'prefix': 'Year:',
        'visible': True,
        'xanchor': 'right'
    },
    'transition': {'duration': 300, 'easing': 'cubic-in-out'},
    'pad': {'b': 10, 't': 50},
    'len': 0.9,
    'x': 0.1,
    'y': 0,
    'steps': []
}

# make data
year = 1991
for region in regions:
    dataset_by_year = cleaned[cleaned['date'] == year]
    dataset_by_year_and_cont = dataset_by_year[dataset_by_year['Region'] == region]

    data_dict = {
        'x': list(dataset_by_year_and_cont['GDP Growth (annual)']),
        'y': list(dataset_by_year_and_cont['internet_users']),
        'mode': 'markers',
        'text': list(dataset_by_year_and_cont['country']),
        'marker': {
            'sizemode': 'area',
            'sizeref': 100,
            'size': list(dataset_by_year_and_cont['GDP pcp PPP'])
        },
        'name': region
    }
    figure['data'].append(data_dict)
    
# make frames
for year in years:
    frame = {'data': [], 'name': str(year)}
    for region in regions:
        dataset_by_year = cleaned[cleaned['date'] == int(year)]
        dataset_by_year_and_cont = dataset_by_year[dataset_by_year['Region'] == region]

        data_dict = {
            'x': list(dataset_by_year_and_cont['GDP Growth (annual)']),
            'y': list(dataset_by_year_and_cont['internet_users']),
            'mode': 'markers',
            'text': list(dataset_by_year_and_cont['country']),
            'marker': {
                'sizemode': 'area',
                'sizeref': 200,
                'size': list(dataset_by_year_and_cont['GDP pcp PPP'])
            },
            'name': region
        }
        frame['data'].append(data_dict)

    figure['frames'].append(frame)
    slider_step = {'args': [
        [year],
        {'frame': {'duration': 300, 'redraw': False},
         'mode': 'immediate',
       'transition': {'duration': 300}}
     ],
     'label': year,
     'method': 'animate'}
    sliders_dict['steps'].append(slider_step)

    
figure['layout']['sliders'] = [sliders_dict]

iplot(figure)

This code from plotly.com works, but my code below does not

In [8]:
from itertools import cycle
import plotly.tools as tls
lines = ["-","--","-."]
linecycler = cycle(lines)
fig = plt.figure()
ax = fig.add_subplot(111)
for i in range(10):
    x = range(i,i+10)
    ax.plot(range(10),x,next(linecycler))

ax.set_title('Cycling through line styles in matplotlib')

plotly_fig = tls.mpl_to_plotly( fig )
iplot(plotly_fig, filename='mpl-cycle-linestyles')



In [9]:
cleaned.head

<bound method NDFrame.head of                       country  date  Access Electricity  Cellular %  \
country     date                                                      
Afghanistan 2016  Afghanistan  2016           84.137138   62.335417   
            2015  Afghanistan  2015           71.500000   58.420528   
            2014  Afghanistan  2014           89.500000   56.191333   
            2013  Afghanistan  2013           67.259552   52.966473   
            2012  Afghanistan  2012           69.100000   49.972753   
            2011  Afghanistan  2011           43.222019   46.444058   
            2010  Afghanistan  2010           42.700000   35.467766   
            2009  Afghanistan  2009           44.854885   37.494200   
            2003  Afghanistan  2003           11.751966    0.867120   
Albania     2016      Albania  2016          100.000000  115.152265   
            2015      Albania  2015          100.000000  116.337513   
            2014      Albania  2014          10

In [10]:

grouped = cleaned.groupby(['date', 'Region'], as_index = False).mean()

fig, ax = plt.subplots()
regions = list(cleaned['Region'].unique())

style={x:y for x, y in zip(regions, ['s-', 'o-', '^-', ':', '--', '-', '-.'])}

for region in regions:
    grouped.loc[grouped['Region']==region].plot(x = 'date', y='internet_users', style=style, ax = ax, label = region)
ax.legend(loc='center left', bbox_to_anchor=(1.3, 0.5),
          fancybox=True, shadow=True)

plotly_fig = tls.mpl_to_plotly( fig )
iplot(plotly_fig, filename='mpl-cycle-linestyles')


'date' is both a column name and an index level.
Defaulting to column but this will raise an ambiguity error in a future version


Bummer! Plotly can currently only draw Line2D objects from matplotlib that are in 'data' coordinates!


Looks like the annotation(s) you are trying 
to draw lies/lay outside the given figure size.

Therefore, the resulting Plotly figure may not be 
large enough to view the full text. To adjust 
the size of the figure, use the 'width' and 
'height' keys in the Layout object. Alternatively,
use the Margin object to adjust the figure's margins.


I found a path object that I don't think is part of a bar chart. Ignoring.



In [11]:
f, ax = plt.subplots(figsize=(20,10))
ax = sns.swarmplot(x='date', y ='internet_users', data = cleaned)
plt.tight_layout()
plotly_fig = tls.mpl_to_plotly( f)
iplot(plotly_fig, filename='mpl-cycle-linestyles')

In [12]:
import plotly.plotly as py
import plotly.tools as tls

import matplotlib.pyplot as plt
import numpy as np

spread = np.random.rand(50) * 100
center = np.ones(25) * 50
flier_high = np.random.rand(10) * 100 + 100
flier_low = np.random.rand(10) * -100
data = np.concatenate((spread, center, flier_high, flier_low), 0)

mpl_fig = plt.figure()
ax = mpl_fig.add_subplot(111)

ax.boxplot(data)

ax.set_xlabel('Data Points')
ax.set_ylabel('Variance')

plotly_fig = tls.mpl_to_plotly( mpl_fig )
iplot(plotly_fig, filename='boxplot-basic')

ValueError: 
    Invalid value of type 'builtins.str' received for the 'color' property of scatter.marker
        Received value: 'none'

    The 'color' property is a color and may be specified as:
      - A hex string (e.g. '#ff0000')
      - An rgb/rgba string (e.g. 'rgb(255,0,0)')
      - An hsl/hsla string (e.g. 'hsl(0,100%,50%)')
      - An hsv/hsva string (e.g. 'hsv(0,100%,100%)')
      - A named CSS color:
            aliceblue, antiquewhite, aqua, aquamarine, azure,
            beige, bisque, black, blanchedalmond, blue,
            blueviolet, brown, burlywood, cadetblue,
            chartreuse, chocolate, coral, cornflowerblue,
            cornsilk, crimson, cyan, darkblue, darkcyan,
            darkgoldenrod, darkgray, darkgrey, darkgreen,
            darkkhaki, darkmagenta, darkolivegreen, darkorange,
            darkorchid, darkred, darksalmon, darkseagreen,
            darkslateblue, darkslategray, darkslategrey,
            darkturquoise, darkviolet, deeppink, deepskyblue,
            dimgray, dimgrey, dodgerblue, firebrick,
            floralwhite, forestgreen, fuchsia, gainsboro,
            ghostwhite, gold, goldenrod, gray, grey, green,
            greenyellow, honeydew, hotpink, indianred, indigo,
            ivory, khaki, lavender, lavenderblush, lawngreen,
            lemonchiffon, lightblue, lightcoral, lightcyan,
            lightgoldenrodyellow, lightgray, lightgrey,
            lightgreen, lightpink, lightsalmon, lightseagreen,
            lightskyblue, lightslategray, lightslategrey,
            lightsteelblue, lightyellow, lime, limegreen,
            linen, magenta, maroon, mediumaquamarine,
            mediumblue, mediumorchid, mediumpurple,
            mediumseagreen, mediumslateblue, mediumspringgreen,
            mediumturquoise, mediumvioletred, midnightblue,
            mintcream, mistyrose, moccasin, navajowhite, navy,
            oldlace, olive, olivedrab, orange, orangered,
            orchid, palegoldenrod, palegreen, paleturquoise,
            palevioletred, papayawhip, peachpuff, peru, pink,
            plum, powderblue, purple, red, rosybrown,
            royalblue, saddlebrown, salmon, sandybrown,
            seagreen, seashell, sienna, silver, skyblue,
            slateblue, slategray, slategrey, snow, springgreen,
            steelblue, tan, teal, thistle, tomato, turquoise,
            violet, wheat, white, whitesmoke, yellow,
            yellowgreen
      - A number that will be interpreted as a color
        according to scatter.marker.colorscale
      - A list or array of any of the above

In [None]:
#py.sign_in('DemoAccount', 'lr1c37zw81')

n = 50
x, y, z, s, ew = np.random.rand(5, n)
c, ec = np.random.rand(2, n, 4)
area_scale, width_scale = 500, 5

fig, ax = plt.subplots()
sc = ax.scatter(x, y, c=c,
                s=np.square(s)*area_scale,
                edgecolor=ec,
                linewidth=ew*width_scale)
ax.grid()
plotly_fig = tls.mpl_to_plotly(mpl_fig)

unique_url = iplot(plotly_fig)