In [35]:
import plotly.offline as py
import plotly.graph_objs as go
import numpy as np
import pandas as pd
import math
plotly.offline.init_notebook_mode(connected=True)

# Scatter plots

In [4]:
x0 = np.linspace(0.01,1,250)
y0 = np.sin(1/x0**69)

# go.Scatter creates a plotly object which is actually a Python dictionary,
# with all elements clearly identified (plot type, x numpy array, y numpy array, line type, legend line name).
trace0 = go.Scatter(x=x0, y=y0, mode='lines+markers', name='sin(1/x^69)')

# In fact, we can rewrite this routine with dictionaries:
# trace0 = dict(type='scatter', x=x0, y=y0, mode='lines+markers', name='sin(1/x)')

# Then we create a list data of such objects and pass it to the plotting routine.
data = [trace0]

plotly.offline.iplot(data)

## Exercise 1

Pass a list of two objects the plotting routine with `data = [trace1,trace2]`. Let the second dataset `trace2` contain another mathematical function. The idea is to have multiple objects in the plot.

Notice:

* How we can hover over each data point, and its (x,y) will be shown
* the toolbar at the top
* Double-clicking on the plot will reset it

In [8]:
y1 = np.cosh(x0)
y2 = np.tanh(x0)

trace1 = go.Scatter(x=x0, y=y1,mode='lines+markers', name='cosh(x)')
trace2 = go.Scatter(x=x0, y=y2,mode='lines+markers', name='tanh(x)')

data = [trace1,trace2]

plotly.offline.iplot(data)

## Exercise 2
Add a bunch of dots to the plot with `dots = go.Scatter(x=[.2,.4,.6,.8], y=[2,1.5,2,1.2])`. What is default scatter mode?

In [11]:
dots = go.Scatter(x=[.2,.4,.6,.8], y=[2,1.5,2,1.2], name='dots')
data = [trace1, trace2, dots]

plotly.offline.iplot(data)

## Exercise 2.1

Change line colour and width by adding the dictionary `line=dict(color=('rgb(205,12,24)'),width=4)` to `dots`:

In [12]:
dots = go.Scatter(x=[.2,.4,.6,.8], y=[2,1.5,2,1.2], line=dict(color=('rgb(205,12,24)'),width=4), name='dots')
data = [trace1, trace2, dots]

plotly.offline.iplot(data)

## Exercise 3

Use `go.Scatter()` to produce a real scatter plot showing a Gaussian distribution in 2D with 1,000 random points.

In [14]:
num_points = 1000
x_gauss = np.random.random(num_points)
y_gauss = np.random.random(num_points)  

trace_gauss = go.Scatter(x=x_gauss, y=y_gauss, mode='markers')

plotly.offline.iplot([trace_gauss])

In [None]:
dir(go)

# Bar plots

In [17]:
data = [go.Bar(x=['Vancouver', 'Calgary', 'Toronto', 'Montreal', 'Halifax'],
               y=[2463431, 1392609, 5928040, 4098927, 403131])]

plotly.offline.iplot(data)

In [19]:
# Let’s plot inner city population vs. greater metro area for each city:


cities = ['Vancouver', 'Calgary', 'Toronto', 'Montreal', 'Halifax']
proper = [631486, 1239220, 2731571, 1704694, 316701]
metro = [2463431, 1392609, 5928040, 4098927, 403131]

bar1 = go.Bar(x=cities, y=proper, name='inner city')
bar2 = go.Bar(x=cities, y=metro, name='greater area')

plotly.offline.iplot([bar1,bar2])

In [23]:
# Let’s now do a stacked plot, with outer city population on top of inner city population:

outside = [m-p for p,m in zip(proper,metro)]   # need to subtract

bar1 = go.Bar(x=cities, y=proper, name='inner city')
bar2 = go.Bar(x=cities, y=outside, name='outer city')

# layout = go.Layout(barmode='stack')               # new element!
layout = go.Layout(barmode='stack', title='Population', plot_bgcolor = 'rgb(153, 204, 255)')
fig = go.Figure(data=[bar1,bar2], layout=layout)  # new element!
plotly.offline.iplot(fig)                         # we get a stacked bar chart

In [None]:
help(go.Layout)

# Heatmaps

In [5]:
# Heatmap of monthly temperatures at the South Pole.

months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec', 'Year']
recordHigh = [-14.4,-20.6,-26.7,-27.8,-25.1,-28.8,-33.9,-32.8,-29.3,-25.1,-18.9,-12.3,-12.3]
averageHigh = [-26.0,-37.9,-49.6,-53.0,-53.6,-54.5,-55.2,-54.9,-54.4,-48.4,-36.2,-26.3,-45.8]
dailyMean = [-28.4,-40.9,-53.7,-57.8,-58.0,-58.9,-59.8,-59.7,-59.1,-51.6,-38.2,-28.0,-49.5]
averageLow = [-29.6,-43.1,-56.8,-60.9,-61.5,-62.8,-63.4,-63.2,-61.7,-54.3,-40.1,-29.1,-52.2]
recordLow = [-41.1,-58.9,-71.1,-75.0,-78.3,-82.8,-80.6,-79.3,-79.4,-72.0,-55.0,-41.1,-82.8]
trace = go.Heatmap(z=[recordHigh, averageHigh, dailyMean, averageLow, recordLow],
                   x=months,
                   y=['record high', 'aver.high', 'daily mean', 'aver.low', 'record low'])
data = [trace]
py.iplot(data)

# Contour Maps

## Exercise 4

Pretend that our heatmap is defined over a 2D domain and plot the same temperature data as a contour map. Remove the `Year` data (last column) and use `go.Contour` to plot the 2D contour map.

In [9]:
trace = go.Contour(z=[recordHigh[:-1], averageHigh[:-1], dailyMean[:-1], averageLow[:-1], recordLow[:-1]],
                   x=months[:-1],
                   y=['record high', 'aver.high', 'daily mean', 'aver.low', 'record low'],colorscale='Jet')
data = [trace]
py.iplot(data)

# Geoographical Scatterplot

## Download data

On terminal
$ pip install wget

On Jupyter Notebook
#import wget
#wget.download('http://bit.ly/paraviewzip') <br>
!unzip paraviewzip <br>
!mv data/*.csv . <br>
!mv data/*.nc . <br>

In [30]:
def normalize(x):
    ''' x scaled into [0,1]'''
    return math.log10(x/smallest)/math.log10(largest/smallest)

In [31]:
# lists name,pop,lat,lon for 254 Canadian cities and towns.
df = pd.read_csv('cities.csv')   
df.head()

Unnamed: 0,name,pop,lat,lon
0,Selkirk,9819.5,50.150025,-96.883322
1,Berens River,522.5,52.366557,-97.033313
2,Pukatawagan,431.0,55.733276,-101.316617
3,Gimli,2316.0,50.633303,-96.999981
4,Island Lake,10.0,53.966588,-94.766578


In [72]:
# Add new column for mouse-over. This new column has the city's population in millions.
df['text'] = df['name'] + '<br>Population ' + (df['pop']/1e6).astype(str) +' million' 

largest, smallest = df['pop'].max(), df['pop'].min()    # Cities with largest and smallest populations.
df['logsize'] = round(df['pop'].apply(normalize)*255)   # New column. Normalize population population in [0,1].

In [55]:
cities = go.Scattergeo(
    lon = df['lon'], lat = df['lat'], text = df['text'],
    marker = dict(
        size = df['pop']/5000,
        color = df['logsize'],
        colorscale = 'Viridis',
        showscale = True,   # show the colourbar
        line = dict(width=0.5, color='rgb(40,40,40)'),
        sizemode = 'area'))
layout = go.Layout(title = 'City populations',
                       showlegend = False,   # do not show legend for first plot
                       geo = dict(
                           scope = 'north america',
                           resolution = 50,   # base layer resolution of km/mm
                           lonaxis = dict(range=[-130,-55]), lataxis = dict(range=[44,70]), # plot range
                           showland = True, landcolor = 'rgb(217,217,217)',
                           showrivers = True, rivercolor = 'rgb(153,204,255)',
                           showlakes = True, lakecolor = 'rgb(153,204,255)',
                           subunitwidth = 1, subunitcolor = "rgb(255,255,255)",   # province border
                           countrywidth = 2, countrycolor = "rgb(255,255,255)"))  # country border
fig = go.Figure(data=[cities], layout=layout)
py.iplot(fig)

## Exercise 5

Modify the code to display only the 10 largest cities.

In [67]:
# Sort data frame by population size and keep the ten largest.
df_largest = df.sort_values(by=['pop'],ascending=False)[:10]
df_largest

Unnamed: 0,name,pop,lat,lon,text,logsize
253,Toronto,4573710.5,43.69998,-79.420021,Toronto<br>Population 4.5737105 million,255.0
251,Montréal,3017278.0,45.499999,-73.583297,Montréal<br>Population 3.017278 million,247.0
252,Vancouver,1458415.0,49.273417,-123.121644,Vancouver<br>Population 1.458415 million,233.0
233,Calgary,1012661.0,51.082992,-114.079998,Calgary<br>Population 1.012661 million,226.0
242,Ottawa,978564.5,45.416697,-75.700015,Ottawa<br>Population 0.9785645 million,225.0
250,Edmonton,885195.5,53.550025,-113.499982,Edmonton<br>Population 0.8851955 million,223.0
157,Hamilton,620501.0,43.249982,-79.829996,Hamilton<br>Population 0.620501 million,216.0
229,Winnipeg,603688.0,49.882987,-97.165992,Winnipeg<br>Population 0.603688 million,215.0
245,Québec,576386.0,46.839969,-71.24561,Québec<br>Population 0.576386 million,214.0
36,Kitchener,413056.5,43.449995,-80.500007,Kitchener<br>Population 0.4130565 million,208.0


In [68]:
cities_largest = go.Scattergeo(
    lon = df_largest['lon'], lat = df_largest['lat'], text = df_largest['text'],
    marker = dict(
        size = df_largest['pop']/5000,
        color = df_largest['logsize'],
        colorscale = 'Viridis',
        showscale = True,   # show the colourbar
        line = dict(width=0.5, color='rgb(40,40,40)'),
        sizemode = 'area'))
layout = go.Layout(title = '10 Largest populations',
                       showlegend = False,   # do not show legend for first plot
                       geo = dict(
                           scope = 'north america',
                           resolution = 50,   # base layer resolution of km/mm
                           lonaxis = dict(range=[-130,-55]), lataxis = dict(range=[44,70]), # plot range
                           showland = True, landcolor = 'rgb(217,217,217)',
                           showrivers = True, rivercolor = 'rgb(153,204,255)',
                           showlakes = True, lakecolor = 'rgb(153,204,255)',
                           subunitwidth = 1, subunitcolor = "rgb(255,255,255)",   # province border
                           countrywidth = 2, countrycolor = "rgb(255,255,255)"))  # country border
fig_largest = go.Figure(data=[cities_largest], layout=layout)
py.iplot(fig_largest)

Recall how we combined several scatter plots in one figure before. You can combine several plots on top of a single map – let’s combine scattergeo + choropleth:

In [71]:
df = pd.read_csv('cities.csv')
df['text'] = df['name'] + '<br>Population ' + \
             (df['pop']/1e6).astype(str)+' million' # add new column for mouse-over
cities = go.Scattergeo(lon = df['lon'],
                       lat = df['lat'],
                       text = df['text'],
                       marker = dict(
                           size = df['pop']/5000,
                           color = "lightblue",
                           line = dict(width=0.5, color='rgb(40,40,40)'),
                           sizemode = 'area'))
gdp = pd.read_csv('gdp.csv')   # read name, gdp, code for 222 countries
c1 = [0,"rgb(5, 10, 172)"]     # define colourbar from top (0) to bottom (1)
c2, c3 = [0.35,"rgb(40, 60, 190)"], [0.5,"rgb(70, 100, 245)"]
c4, c5 = [0.6,"rgb(90, 120, 245)"], [0.7,"rgb(106, 137, 247)"]
c6 = [1,"rgb(220, 220, 220)"]
countries = go.Choropleth(locations = gdp['CODE'],
                          z = gdp['GDP (BILLIONS)'],
                          text = gdp['COUNTRY'],
                          colorscale = [c1,c2,c3,c4,c5,c6],
                          autocolorscale = False,
                          reversescale = True,
                          marker = dict(line = dict(color='rgb(180,180,180)',width = 0.5)),
                          zmin = 0,
                          colorbar = dict(tickprefix = '$',title = 'GDP<br>Billions US$'))
layout = go.Layout(hovermode = "x", showlegend = False)  # do not show legend for first plot
fig = go.Figure(data=[cities,countries], layout=layout)
py.iplot(fig)