# Advanced plot

There are several libraries to pruduce amazing plots, like:

<ul>
    <li><a href="https://plot.ly/">Plotly</a></li>
    <li><a href="https://bokeh.pydata.org/">Bokeh</a></li>
    <li><a href="http://python-visualization.github.io/folium/">Folium</a></li>
    <li><a href="https://matplotlib.org/">Matplotlib</a></li>
    <li><a href="https://seaborn.pydata.org/">Seaborn</a></li>
    <li><a href="https://ggplot2.tidyverse.org/">ggplot</a></li>
    <li><a href="http://www.pygal.org">Pygal</a></li>
    <li><a href="https://github.com/andrea-cuttone/geoplotlib/wiki/User-Guide">Geoplotlib</a></li>
    <li><a href="https://github.com/ResidentMario/missingno">missingno</a></li>
</ul>

Let's use the first one, because it is one of the most used libraries in data science and has the capacity of create complex charts with few lines of code. 

<br>
<div class="alert alert-info">
<b>Let's start code</b>
</div>

In [59]:
# Importing libraries
import pandas as pd
import plotly as py
import plotly.graph_objs as go

# Setting the configuration to use plotly in offline mode
py.offline.init_notebook_mode(connected=True)

# Reading dataset
global_power = pd.read_csv('../datasets/global_power_plant_database.csv')
global_power

Unnamed: 0,country,country_long,name,gppd_idnr,capacity_mw,latitude,longitude,fuel1,fuel2,fuel3,...,owner,source,url,geolocation_source,year_of_capacity_data,generation_gwh_2013,generation_gwh_2014,generation_gwh_2015,generation_gwh_2016,estimated_generation_gwh
0,AFG,Afghanistan,Kajaki Hydroelectric Power Plant Afghanistan,GEODB0040538,33.000,32.3220,65.1190,Hydro,,,...,,GEODB,http://globalenergyobservatory.org,GEODB,2017.0,,,,,
1,AFG,Afghanistan,Mahipar Hydroelectric Power Plant Afghanistan,GEODB0040541,66.000,34.5560,69.4787,Hydro,,,...,,GEODB,http://globalenergyobservatory.org,GEODB,2017.0,,,,,
2,AFG,Afghanistan,Naghlu Dam Hydroelectric Power Plant Afghanistan,GEODB0040534,100.000,34.6410,69.7170,Hydro,,,...,,GEODB,http://globalenergyobservatory.org,GEODB,2017.0,,,,,
3,AFG,Afghanistan,Nangarhar (Darunta) Hydroelectric Power Plant ...,GEODB0040536,11.550,34.4847,70.3633,Hydro,,,...,,GEODB,http://globalenergyobservatory.org,GEODB,2017.0,,,,,
4,AFG,Afghanistan,Northwest Kabul Power Plant Afghanistan,GEODB0040540,42.000,34.5638,69.1134,Gas,,,...,,GEODB,http://globalenergyobservatory.org,GEODB,2017.0,,,,,
5,AFG,Afghanistan,Pul-e-Khumri Hydroelectric Power Plant Afghani...,GEODB0040537,6.000,35.9416,68.7100,Hydro,,,...,,GEODB,http://globalenergyobservatory.org,GEODB,2017.0,,,,,
6,AFG,Afghanistan,Sarobi Dam Hydroelectric Power Plant Afghanistan,GEODB0040535,22.000,34.5865,69.7757,Hydro,,,...,,GEODB,http://globalenergyobservatory.org,GEODB,2017.0,,,,,
7,ALB,Albania,Bistrica 1,WRI1002169,27.000,39.9116,20.1047,Hydro,,,...,,Energy Charter Secretariat,http://www.energycharter.org/fileadmin/Documen...,GEODB,,,,,,89.132075
8,ALB,Albania,Fierza,WRI1002170,500.000,42.2514,20.0431,Hydro,,,...,,Energy Charter Secretariat,http://www.energycharter.org/fileadmin/Documen...,GEODB,,,,,,1650.593990
9,ALB,Albania,Koman,WRI1002171,600.000,42.1033,19.8224,Hydro,,,...,,Energy Charter Secretariat,http://www.energycharter.org/fileadmin/Documen...,GEODB,,,,,,1980.712788


## Polar Chart: Top 5 producers

In [60]:
# Filtering dataset by country and fuel
countries_fuel = global_power[['country', 'fuel1']]

# Getting the top 5 energy producer
top_5 = countries_fuel.groupby('country').count().sort_values(by=['fuel1'], ascending=False).head(5).index.tolist()

# Choosing energies
energies = ['Hydro', 'Wind', 'Oil', 'Gas', 'Solar']
data  = []

index = 1
for country in top_5:
    c = countries_fuel.loc[countries_fuel['country'] == country].groupby('fuel1').count()
    c = c.loc[energies]['country'].tolist()
    data.append(
        go.Scatterpolar(
            r = c,
            theta = energies,
            fill = 'toself',
            name = country,
            subplot = "polar" + str(index)
       )
    )
    index += 1

layout = go.Layout(
    title = 'No. of production places in major countries',
    polar1 = dict(
        domain = dict(
            x = [0, .2],
            y = [0, .5]
        ),
        radialaxis = dict(
            visible = True,
            range = [0, 2000]
        )
    ),
    polar2 = dict(
        domain = dict(
            x = [.2, .4],
            y = [.5, 1]
        ),
        radialaxis = dict(
            visible = True,
            range = [0, 1000]
        )
    ),
    polar3 = dict(
        domain = dict(
            x = [.4, .6],
            y = [0, .5]
        ),
        radialaxis = dict(
            visible = True,
            range = [0, 1200]
        )
    ),
    polar4 = dict(
        domain = dict(
            x = [.6, .8],
            y = [.5, 1]
        ),
        radialaxis = dict(
            visible = True,
            range = [0, 800]
        )
    ),
    polar5 = dict(
        domain = dict(
            x = [.8, 1],
            y = [0, .5]
        ),
        radialaxis = dict(
            visible = True,
            range = [0, 800]
        )
    ),
)

fig = go.Figure(data=data, layout=layout)
py.offline.iplot(fig)

In [61]:
data_ = []

index = 1
for country in top_5:
    c = countries_fuel.loc[countries_fuel['country'] == country].groupby('fuel1').count()
    c = c.loc[energies]['country'].tolist()
    data_.append(
        go.Scatterpolar(
            r = c,
            theta = energies,
            fill = 'toself',
            name = country,
            subplot = "polar"
       )
    )
    index += 1

layout_ = go.Layout(
    title = 'No. of production places in major countries',
    polar = dict(
        domain = dict(
            x = [0, 1],
            y = [0, 1]
        ),
        radialaxis = dict(
            visible = True,
            range = [0, 2000]
        )
    ),
)

fig = go.Figure(data=data_, layout=layout_)
py.offline.iplot(fig)

## Places of energy production on map

In [119]:
# Limiting the dataset to USA location
countries_fuel_pos = global_power.loc[global_power['country'] == 'USA'][['fuel1', 'latitude', 'longitude', 'capacity_mw']]
# Filtering by energies and capacity lower 2000 Mw
countries_fuel_pos = countries_fuel_pos[countries_fuel_pos.fuel1.isin(energies)]
countries_fuel_pos

Unnamed: 0,fuel1,latitude,longitude,capacity_mw
20181,Solar,40.2003,-74.5761,1.9
20182,Solar,42.0761,-71.4227,2.0
20183,Solar,33.7943,-118.2414,1.3
20184,Solar,40.5358,-74.3913,3.8
20185,Gas,41.9084,-89.0466,4.2
20186,Solar,44.4777,-73.1534,1.5
20187,Solar,40.5161,-74.3400,1.9
20188,Oil,33.7583,-84.3869,2.4
20189,Solar,42.1091,-72.1712,2.0
20190,Solar,42.1093,-72.1705,3.0


In [120]:
scl     = [[0,"rgb(15,155,15)"], [1,"rgb(0,0,0)"]]
markers = ['circle', 'square', 'star', 'diamond', 'triangle-up']

data_map = []
index = 0
for energy in energies:
    c = countries_fuel_pos.loc[countries_fuel_pos['fuel1'] == energy]
    data_map.append(
        dict(
            type = 'scattergeo',
            locationmode = 'USA-states',
            lon = c['longitude'],
            lat = c['latitude'],
            mode = 'markers',
            marker = dict(
                size = 8,
                opacity = 1,
                symbol = markers[index],
                colorscale = scl,
                cmin = 0,
                color = countries_fuel_pos['capacity_mw'],
                cmax = countries_fuel_pos['capacity_mw'].max(),
                line = dict (
                    color = 'rgb(0,0,0)',
                    width = 1
                ),
                colorbar=dict(
                    title="Capacity (MW)"
                )
            )
        )
    )
    index += 1

layout_map = dict(
        title = 'Most trafficked US airports<br>(Hover for airport names)',
        geo = dict(
            scope='usa',
            projection=dict( type='albers usa' ),
            showland = True,
            landcolor = "rgb(250, 250, 250)",
            subunitcolor = "rgb(217, 217, 217)",
            countrycolor = "rgb(217, 217, 217)",
            countrywidth = 1,
            subunitwidth = 1
        ),
    )

fig = dict( data=data_map, layout=layout_map )
py.offline.iplot( fig )