### <center> Plotting EIA Power Plant Data
<br>
###### This Jupyter Notebook takes in already processed data from the EIA and builds a time series choropleth plot using Plot.ly

In [9]:
# Get some libraries
import pandas as pd
import numpy as np
import requests

import plotly.plotly as py
import plotly.tools as tools
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

#import dash
#import dash_core_components as dcc
#import dash_html_components as html
#from dash.dependencies import Input, Output

init_notebook_mode(connected=True)

<b>Important</b><br><br>You must fill in a Mapbox access token to use this notebook. Plotly username/key not necessary

In [17]:
# Fill these Variables with your copy

# Mapbox Access Token
# Will need to create a free Mapbox account to use
my_mapbox_access_token = 'pk.eyJ1IjoiamJvbmlmaWVsZDMiLCJhIjoiY2o5bjJrdjRjNHl4NTMzcW0zanR4Z2Z6eCJ9.ooDfIgzxDtQTHq5GPHBErA'

# Plotly Username and API Key
# Will need to create a free Plot.ly account to use

username = 'jbonifield3'
plotly_api_key = '6WzCVz8xdvAVZbGfH0qI'


<b> Define some functions used to generate formatting and data traces/layout for the plot


In [10]:
### Define some functions used to generate formatting and data traces for the plot

###   Function "get_formatting"
###   args: Class (str) - whether powerplant is "Clean" or "Fossil", i.e. CO2 emitting

def get_formatting(df, min_point, max_point, min_netgen, max_netgen):
    
    # Get Colors for points
    if df['Class'] == 'Clean':
        vals = ['rgb(0, 128, 0)','rgb(144, 238, 144)']
    elif df['Class'] == 'Fossil':
        vals = ['rgb(255, 0, 0)','rgb(242, 177, 172)']
    else:
        raise Exception('Invalid Class {} found.'.format(Class))
        
    #Normalize netgen data to 0-1 range
    vals.append(min_point + ((df['netgen'] - min_netgen) / (max_netgen - min_netgen)) * (max_point - min_point))
    return vals

###   Function "generate_plot_data"
###   args: 
###     df (pandas dataframe) - data to plot, must have "Latitude", "Lonigitude", "year", & "Formatting" columns
###     start_year (int) - year to start plot. 
###     end_year (int) - year to end plot

def generate_plot_data(df, start_year, end_year):
    data = [go.Scattermapbox(
        lat=df['Latitude'][df['year'] == x],
        lon=df['Longitude'][df['year'] == x],
        mode='markers',
        marker=go.scattermapbox.Marker(
            size=[i[2] for i in df['Formatting'][df['year'] == x].tolist()],
            color=[i[0] for i in df['Formatting'][df['year'] == x].tolist()],
            opacity=0.7
        ),
        text = df[['plant name','netgen']][df['year'] == x]
        .apply(lambda j: '<br>Net Generation (MWh): '.join(j.astype(str)), axis=1),
        #text = df['plant name'][df['year'] == x] + '\n' + 'Net Generation: ' + str(df['netgen'][df['year'] == x]) + 'MWh',
        hoverinfo='text') for x in range(start_year, end_year+1)]
    data[end_year-start_year]['visible'] = True
    return data

###   Function "generate_plot_layout"
###   args: 
###     title (str),
###     start_year (int),
###     end_year (int),
###     mapbox_access_token (str),
###     center (list of lat/long),
###     style (mapbox parameter)
###   First, builds slider then builds layout including slider

def generate_plot_layout(title, start_year, end_year, mapbox_access_token, center = (38,-94) ,style = 'light'):
    
    # Build Slider
    steps = []
    for i in range(start_year,end_year + 1):
        step = dict(
            method = 'update',
            label = str(i),
            args = [
                {'visible': [False] * (end_year-start_year + 1)},
                {'title.text': title + ' (%d)' % i},
                {'frame': {'duration': 300, 'redraw': False},
                 'mode': 'immediate',
                 'transition': {'duration': 300}}
            ]
        )
        step['args'][0]['visible'][i - start_year] = True # Toggle i'th trace to "visible"
        steps.append(step)
        
    new_slider = [dict(
        active = start_year,
        currentvalue = {"prefix": "Year: "},
        activebgcolor = '#d9bec8',
        pad = {"t": 50},
        steps = steps)]
        
    # Build Layout 
    return go.Layout(
        title= {'text': title + ' (%d)' % start_year},
        autosize = True,
        hovermode = 'closest',
        showlegend = False,
        sliders = new_slider,
        mapbox = dict(
            accesstoken = mapbox_access_token,
            bearing = 0,
            center = dict(
                lat = center[0],
                lon = center[1]
            ),
        pitch = 0,
        zoom = 3,
        style = style))


<b>Next, we'll get the data from Google Drive and apply the formatting function to generate a list in the DF with attributes like: </b><br>
    * Color (clean vs polluting) 
    * Size (based on netgen)

In [14]:
### Get cleaned data from Google Drive into Pandas

url = 'https://docs.google.com/spreadsheets/d/1Pn3L_yltqpL92LQ-pxy8GwiXDaaxbsKELlM_Yrv3v_4/export?format=csv'
data = pd.read_csv(url)

# Round Netgen numbers
data.round({'netgen':0})
data.head(5)

Unnamed: 0,plant id,plant name,state,year,County,Latitude,Longitude,In Service Year,Class,PrimaryFuelType,netgen
0,2,Bankhead Dam,AL,1970,Tuscaloosa,33.458665,-87.35682,1963,Clean,Hydroelectric,147701.7
1,2,Bankhead Dam,AL,1971,Tuscaloosa,33.458665,-87.35682,1963,Clean,Hydroelectric,188612.0
2,2,Bankhead Dam,AL,1972,Tuscaloosa,33.458665,-87.35682,1963,Clean,Hydroelectric,158977.6
3,2,Bankhead Dam,AL,1973,Tuscaloosa,33.458665,-87.35682,1963,Clean,Hydroelectric,225054.0
4,2,Bankhead Dam,AL,1974,Tuscaloosa,33.458665,-87.35682,1963,Clean,Hydroelectric,195276.0


In [15]:
min_val = 4
max_val = 20
max_netgen = data['netgen'].max()
min_netgen = data['netgen'].min()
data['Formatting'] = data.apply(lambda x: get_formatting(x, min_val, max_val, min_netgen, max_netgen), axis=1)

<b>Build Plot Components and Plot</b>

In [16]:
### Set Parameters and Build Plot Components

start_year = 1970
end_year = 1975
title = 'US Power Plant Generation' 

plot_data = generate_plot_data(data, start_year, end_year)
plot_layout = generate_plot_layout(title, start_year, end_year, my_mapbox_access_token, style = 'dark')


NameError: name 'my_mapbox_access_token' is not defined

In [309]:
### Plot Data

fig = dict(data = plot_data, layout = plot_layout)
py.iplot(fig, filename='US Power Plants (1970-2000)')