# Advanced plot

There are several libraries to pruduce amazing plots, like:

<ul>
    <li><a href="https://plot.ly/">Plotly</a></li>
    <li><a href="https://bokeh.pydata.org/">Bokeh</a></li>
    <li><a href="http://python-visualization.github.io/folium/">Folium</a></li>
    <li><a href="https://matplotlib.org/">Matplotlib</a></li>
    <li><a href="https://seaborn.pydata.org/">Seaborn</a></li>
    <li><a href="https://ggplot2.tidyverse.org/">ggplot</a></li>
    <li><a href="http://www.pygal.org">Pygal</a></li>
    <li><a href="https://github.com/andrea-cuttone/geoplotlib/wiki/User-Guide">Geoplotlib</a></li>
    <li><a href="https://github.com/ResidentMario/missingno">missingno</a></li>
</ul>

Let's use the first one, because it is one of the most used libraries in data science and has the capacity of create complex charts with few lines of code. 
<br>
<div class="alert alert-info">
<b>Let's start code</b>
</div>

In [1]:
# Importing libraries
import pandas as pd
import plotly as py
import plotly.graph_objs as go
from __future__ import division

# Setting the configuration to use plotly in offline mode
py.offline.init_notebook_mode(connected=True)

# Reading dataset
global_power = pd.read_csv('../datasets/global_power_plant_database.csv')
global_power

Unnamed: 0,country,country_long,name,gppd_idnr,capacity_mw,latitude,longitude,fuel1,fuel2,fuel3,...,owner,source,url,geolocation_source,year_of_capacity_data,generation_gwh_2013,generation_gwh_2014,generation_gwh_2015,generation_gwh_2016,estimated_generation_gwh
0,AFG,Afghanistan,Kajaki Hydroelectric Power Plant Afghanistan,GEODB0040538,33.000,32.3220,65.1190,Hydro,,,...,,GEODB,http://globalenergyobservatory.org,GEODB,2017.0,,,,,
1,AFG,Afghanistan,Mahipar Hydroelectric Power Plant Afghanistan,GEODB0040541,66.000,34.5560,69.4787,Hydro,,,...,,GEODB,http://globalenergyobservatory.org,GEODB,2017.0,,,,,
2,AFG,Afghanistan,Naghlu Dam Hydroelectric Power Plant Afghanistan,GEODB0040534,100.000,34.6410,69.7170,Hydro,,,...,,GEODB,http://globalenergyobservatory.org,GEODB,2017.0,,,,,
3,AFG,Afghanistan,Nangarhar (Darunta) Hydroelectric Power Plant ...,GEODB0040536,11.550,34.4847,70.3633,Hydro,,,...,,GEODB,http://globalenergyobservatory.org,GEODB,2017.0,,,,,
4,AFG,Afghanistan,Northwest Kabul Power Plant Afghanistan,GEODB0040540,42.000,34.5638,69.1134,Gas,,,...,,GEODB,http://globalenergyobservatory.org,GEODB,2017.0,,,,,
5,AFG,Afghanistan,Pul-e-Khumri Hydroelectric Power Plant Afghani...,GEODB0040537,6.000,35.9416,68.7100,Hydro,,,...,,GEODB,http://globalenergyobservatory.org,GEODB,2017.0,,,,,
6,AFG,Afghanistan,Sarobi Dam Hydroelectric Power Plant Afghanistan,GEODB0040535,22.000,34.5865,69.7757,Hydro,,,...,,GEODB,http://globalenergyobservatory.org,GEODB,2017.0,,,,,
7,ALB,Albania,Bistrica 1,WRI1002169,27.000,39.9116,20.1047,Hydro,,,...,,Energy Charter Secretariat,http://www.energycharter.org/fileadmin/Documen...,GEODB,,,,,,89.132075
8,ALB,Albania,Fierza,WRI1002170,500.000,42.2514,20.0431,Hydro,,,...,,Energy Charter Secretariat,http://www.energycharter.org/fileadmin/Documen...,GEODB,,,,,,1650.593990
9,ALB,Albania,Koman,WRI1002171,600.000,42.1033,19.8224,Hydro,,,...,,Energy Charter Secretariat,http://www.energycharter.org/fileadmin/Documen...,GEODB,,,,,,1980.712788


## 1 - Polar Chart: Top 5 producers

In [46]:
# Filtering dataset by country and fuel
countries_fuel = global_power[['country', 'fuel1', 'country_long']]

# Getting the top 5 energy producer
top_5 = countries_fuel.groupby('country').count().sort_values(by=['fuel1'], ascending=False).head(5).index.tolist()

# Choosing energies
energies = ['Hydro', 'Wind', 'Oil', 'Gas', 'Solar']
data  = []

index = 1
for country in top_5:
    c = countries_fuel.loc[countries_fuel['country'] == country].groupby('fuel1').count()
    c = c.loc[energies]['country'].tolist()
    data.append(
        go.Scatterpolar(
            r = c,
            theta = energies,
            fill = 'toself',
            name = country,
            subplot = "polar" + str(index)
       )
    )
    index += 1

layout = go.Layout(
    title = 'No. of production places in major countries',
    polar1 = dict(
        domain = dict(
            x = [0, .2],
            y = [0, .5]
        ),
        radialaxis = dict(
            visible = True,
            range = [0, 2000]
        )
    ),
    polar2 = dict(
        domain = dict(
            x = [.2, .4],
            y = [.5, 1]
        ),
        radialaxis = dict(
            visible = True,
            range = [0, 1000]
        )
    ),
    polar3 = dict(
        domain = dict(
            x = [.4, .6],
            y = [0, .5]
        ),
        radialaxis = dict(
            visible = True,
            range = [0, 1200]
        )
    ),
    polar4 = dict(
        domain = dict(
            x = [.6, .8],
            y = [.5, 1]
        ),
        radialaxis = dict(
            visible = True,
            range = [0, 800]
        )
    ),
    polar5 = dict(
        domain = dict(
            x = [.8, 1],
            y = [0, .5]
        ),
        radialaxis = dict(
            visible = True,
            range = [0, 800]
        )
    ),
)

fig = go.Figure(data=data, layout=layout)
py.offline.iplot(fig)

In [47]:
data_ = []

index = 1
for country in top_5:
    c = countries_fuel.loc[countries_fuel['country'] == country].groupby('fuel1').count()
    c = c.loc[energies]['country'].tolist()
    data_.append(
        go.Scatterpolar(
            r = c,
            theta = energies,
            fill = 'toself',
            name = country,
            subplot = "polar"
       )
    )
    index += 1

layout_ = go.Layout(
    title = 'No. of production places in major countries',
    polar = dict(
        domain = dict(
            x = [0, 1],
            y = [0, 1]
        ),
        radialaxis = dict(
            visible = True,
            range = [0, 2000]
        )
    ),
)

fig = go.Figure(data=data_, layout=layout_)
py.offline.iplot(fig)

## 2 - Map: Places of energy production

In [48]:
# Limiting the dataset to Brazil location
countries_fuel_pos = global_power.loc[global_power['country'] == 'BRA'][['fuel1', 'latitude', 'longitude', 'capacity_mw']]
# Filtering by energies
countries_fuel_pos = countries_fuel_pos[countries_fuel_pos.fuel1.isin(energies)]
countries_fuel_pos

Unnamed: 0,fuel1,latitude,longitude,capacity_mw
1045,Hydro,-29.0648,-51.6749,100.71000
1046,Oil,-23.5927,-48.0901,3.84000
1047,Oil,-22.8152,-47.1781,6.40000
1048,Oil,-23.9277,-52.4869,2.19000
1049,Oil,-23.5218,-46.6691,3.87440
1050,Oil,-7.1110,-34.8536,1.35000
1051,Oil,-20.4633,-54.5918,2.16000
1053,Hydro,-13.0992,-54.8189,6.66000
1055,Oil,-16.0044,-49.7867,3.68800
1056,Hydro,-30.7944,-52.6272,4.86000


In [50]:
energies = ['Hydro', 'Wind', 'Oil', 'Gas', 'Solar']
colors  = [
            [[0,"rgb(0,191,255)"], [1,"rgb(0,191,255)"]], 
            [[0,"rgb(173,255,47)"], [1,"rgb(173,255,47)"]], 
            [[0,"rgb(105,105,105)"], [1,"rgb(105,105,105)"]], 
            [[0,"rgb(255,69,0)"], [1,"rgb(255,69,0)"]], 
            [[0,"rgb(255,215,0)"], [1,"rgb(255,215,0)"]],
          ]
markers = ['circle', 'square', 'diamond-tall-dot', 'hexagram', 'triangle-up']

data_map = []
index = 0
for energy in energies:
    c = countries_fuel_pos.loc[countries_fuel_pos['fuel1'] == energy]
    data_map.append(
        dict(
            type = 'scattergeo',
            locationmode = 'country names',
            lon = c['longitude'],
            lat = c['latitude'],
            mode = 'markers',
            name = energy,
            marker = dict(
                size = 8,
                opacity = 1,
                symbol = markers[index],
                colorscale = colors[index],
                cmin = 0,
                color = countries_fuel_pos['capacity_mw'],
                cmax = countries_fuel_pos['capacity_mw'].max(),
                line = dict (
                    color = 'rgb(0,0,0)',
                    width = 1
                )
            )
        )
    )
    index += 1

layout_map = dict(
        title = 'Places of energy production<br>in Brazil',
        width = 800,
        height = 800,
        geo = dict(
            scope='south america',
            projection=dict( type='mercator' ),
            showland = True,
            landcolor = "rgb(250, 250, 250)",
            subunitcolor = "rgb(217, 217, 217)",
            countrycolor = "rgb(217, 217, 217)",
            countrywidth = 1,
            subunitwidth = 1
        ),
    )

fig = dict( data=data_map, layout=layout_map )
py.offline.iplot( fig )

## 3 - Funnel Chart: Oil production in Brazil in relation to the world

In [51]:
# Reading a new dataset of countries
all_countries = pd.read_csv('../datasets/all_countries.csv')
all_countries

Unnamed: 0,name,alpha-2,alpha-3,country-code,iso_3166-2,region,sub-region,intermediate-region,region-code,sub-region-code,intermediate-region-code
0,Afghanistan,AF,AFG,4,ISO 3166-2:AF,Asia,Southern Asia,,142.0,34.0,
1,Åland Islands,AX,ALA,248,ISO 3166-2:AX,Europe,Northern Europe,,150.0,154.0,
2,Albania,AL,ALB,8,ISO 3166-2:AL,Europe,Southern Europe,,150.0,39.0,
3,Algeria,DZ,DZA,12,ISO 3166-2:DZ,Africa,Northern Africa,,2.0,15.0,
4,American Samoa,AS,ASM,16,ISO 3166-2:AS,Oceania,Polynesia,,9.0,61.0,
5,Andorra,AD,AND,20,ISO 3166-2:AD,Europe,Southern Europe,,150.0,39.0,
6,Angola,AO,AGO,24,ISO 3166-2:AO,Africa,Sub-Saharan Africa,Middle Africa,2.0,202.0,17.0
7,Anguilla,AI,AIA,660,ISO 3166-2:AI,Americas,Latin America and the Caribbean,Caribbean,19.0,419.0,29.0
8,Antarctica,AQ,ATA,10,ISO 3166-2:AQ,,,,,,
9,Antigua and Barbuda,AG,ATG,28,ISO 3166-2:AG,Americas,Latin America and the Caribbean,Caribbean,19.0,419.0,29.0


In [52]:
# Filtering the dataset to limit only oil production
global_oil = global_power.loc[global_power['fuel1'] == 'Oil'][['country']]

# Number of places which produce oil in world
global_oil_size = global_oil.size
print("Global: ", global_oil_size)

Global:  2925


In [53]:
# Filtering the dataset to limit only American countries
american_countries      = all_countries.loc[all_countries['region'] == 'Americas']
american_countries_code = american_countries['alpha-3'].unique().tolist()
american_oil = global_oil[global_oil.country.isin(american_countries_code)]

# Number of places which produce oil in america
american_oil_size = american_oil.size
print("America: ", american_oil_size)

America:  2385


In [54]:
# Filtering the dataset to limit only South-American countries
south_american_countries      = american_countries.loc[american_countries['intermediate-region'] == 'South America']
south_american_countries_code = south_american_countries['alpha-3'].unique().tolist()
south_american_oil = american_oil[american_oil.country.isin(south_american_countries_code)]

# Number of places which produce oil in south-america
south_american_oil_size = south_american_oil.size
print("South-America: ", south_american_oil_size)

South-America:  818


In [55]:
# Filtering the dataset to limit only Brazil
brazil_oil = south_american_oil.loc[south_american_oil['country'] == 'BRA']

# Number of places which produce oil in brazil
brazil_oil_size = brazil_oil.size
print("Brazil: ", brazil_oil_size)

Brazil:  627


<div class="alert alert-info">
<b>Creation of funnel Chart</b>
</div>

In [56]:
# Chart data
values = [global_oil_size, american_oil_size, south_american_oil_size, brazil_oil_size]
phases  = ['World', 'America', 'S.America', 'Brazil']

# color of each funnel section
colors = ['rgb(32,155,160)', 'rgb(253,93,124)', 'rgb(28,119,139)', 'rgb(182,231,235)']

In [57]:
n_phase = len(phases)
plot_width = 400

# height of a section and difference between sections 
section_h = 100
section_d = 10

# multiplication factor to calculate the width of other sections
unit_width = plot_width / max(values)

# width of each funnel section relative to the plot width
phase_w = [int(value * unit_width) for value in values]

# plot height based on the number of sections and the gap in between them
height = section_h * n_phase + section_d * (n_phase + 1)

In [58]:
# list containing all the plot shapes
shapes = []

# list containing the Y-axis location for each section's name and value text
label_y = []

for i in range(n_phase):
    if (i == n_phase-1):
        points = [phase_w[i] / 2, height, phase_w[i] / 2, height - section_h]
    else:
        points = [phase_w[i] / 2, height, phase_w[i+1] / 2, height - section_h]
    
    # SVG code to draw polygons
    path = 'M {0} {1} L {2} {3} L -{2} {3} L -{0} {1} Z'.format(*points)

    shape = {
        'type': 'path',
        'path': path,
        'fillcolor': colors[i],
        'line': {
            'width': 1,
            'color': colors[i]
        }
    }
    shapes.append(shape)

    # Y-axis location for this section's details (text)
    label_y.append(height - (section_h / 2))

    height = height - (section_h + section_d)

In [59]:
# For phase names
label_trace = go.Scatter(
    x=[-350]*n_phase,
    y=label_y,
    mode='text',
    text=phases,
    textfont=dict(
        color='rgb(200,200,200)',
        size=15
    )
)
 
# For phase values
value_trace = go.Scatter(
    x=[350]*n_phase,
    y=label_y,
    mode='text',
    text=values,
    textfont=dict(
        color='rgb(200,200,200)',
        size=15
    )
)

data = [label_trace, value_trace]
 
layout = go.Layout(
    title="<b>Funnel Chart</b>",
    titlefont=dict(
        size=20,
        color='rgb(203,203,203)'
    ),
    shapes=shapes,
    height=560,
    width=900,
    showlegend=False,
    paper_bgcolor='rgba(44,58,71,1)',
    plot_bgcolor='rgba(44,58,71,1)',
    xaxis=dict(
        showticklabels=False,
        zeroline=False,
    ),
    yaxis=dict(
        showticklabels=False,
        zeroline=False
    )
)
 
fig = go.Figure(data=data, layout=layout)
py.offline.iplot(fig)

In [96]:
import squarify

# Grouping the countries by number of production places
countries_fuel_sorted = countries_fuel.groupby('country').count().sort_values(by=['fuel1'], ascending=False).head(12)

x = 0.
y = 0.
width = 500.
height = 500.

values      = countries_fuel_sorted['fuel1'].tolist()
values_names = countries_fuel_sorted.index.tolist()
values_fullnames = countries_fuel[countries_fuel.country.isin(values_names)][['country','country_long']].drop_duplicates()

normed = squarify.normalize_sizes(values, width, height)
rects = squarify.squarify(normed, x, y, width, height)

# Choose colors from http://colorbrewer2.org/ under "Export"
color_brewer = ['rgb(141,211,199)','rgb(255,255,179)','rgb(190,186,218)',
                'rgb(251,128,114)','rgb(128,177,211)','rgb(253,180,98)',
                'rgb(179,222,105)','rgb(252,205,229)','rgb(217,217,217)',
                'rgb(188,128,189)','rgb(204,235,197)','rgb(255,237,111)']
shapes = []
annotations = []
counter = 0

index = 0
for r in rects:
    shapes.append( 
        dict(
            type = 'rect', 
            x0 = r['x'], 
            y0 = r['y'], 
            x1 = r['x']+r['dx'], 
            y1 = r['y']+r['dy'],
            line = dict( width = 2 ),
            fillcolor = color_brewer[counter]
        ) 
    )
    annotations.append(
        dict(
            x = r['x']+(r['dx']/2),
            y = r['y']+(r['dy']/2),
            text = str(values_names[index]) + ' - ' + str(values[index]),
            showarrow = False
        )
    )
    index += 1
    counter = counter + 1
    if counter >= len(color_brewer):
        counter = 0

# For hover text
trace0 = go.Scatter(
    x = [ r['x']+(r['dx']/2) for r in rects ], 
    y = [ r['y']+(r['dy']/2) for r in rects ],
    text = [ values_fullnames.loc[values_fullnames['country'] == v]['country_long'].tolist() for v in values_name ], 
    mode = 'text',
)
        
layout = dict(
    title="Top 12 countries with more energy productions",
    width=900,
    height=600,
    xaxis=dict(showgrid=False,zeroline=False,showticklabels=False),
    yaxis=dict(showgrid=False,zeroline=False,showticklabels=False),
    shapes=shapes,
    annotations=annotations,
    hovermode='closest'
)

figure = dict(data=[trace0], layout=layout)
py.offline.iplot(figure)