In [34]:
# Import dependecies

import pandas as pd
import csv
import os
import requests
import json
import numpy as np
import matplotlib.pyplot as plt
from sklearn import preprocessing

import plotly
import plotly.plotly as py
plotly.tools.set_credentials_file(username='jherronplahn', api_key='removed for security')

In [135]:
# Read in income xlsx
incomefile = os.path.join('Resources', 'median_income_yearly.xlsx')
income_df = pd.read_excel(incomefile, encoding='UTF-8')
income_df.head()

Unnamed: 0,State,2017 Median Income,2016 Median Income,2015 Median Income,2014 Median Income,2013 Median Income,2012 Median Income,2011 Median Income,2010 Median Income,2009 Median Income,2008 Median Income,2007 Median Income,2006 Median Income,2005 Median Income,2004 Median Income,2003 Median Income,2002 Median Income,2001 Median Income,2000 Median Income
0,United States,61372.0,59039.0,56516.0,53657.0,53585.0,51017.0,50054.0,49276.0,49777.0,50303.0,50233.0,48201.0,46326.0,44334.0,43318.0,42409.0,42228.0,41990.0
1,Alabama,51113.0,47221.0,44509.0,42278.0,47320.0,43464.0,42590.0,40933.0,39980.0,44476.0,42212.0,37952.0,37150.0,36629.0,37255.0,37603.0,35160.0,35424.0
2,Alaska,72231.0,75723.0,75112.0,67629.0,72472.0,63648.0,57431.0,57848.0,61604.0,63989.0,62993.0,56418.0,55891.0,55063.0,51837.0,52774.0,57363.0,52847.0
3,Arizona,61125.0,57100.0,52248.0,49254.0,52611.0,47044.0,48621.0,46896.0,45739.0,46914.0,47215.0,46657.0,45245.0,43846.0,41166.0,39734.0,42704.0,39783.0
4,Arkansas,48829.0,45907.0,42798.0,44922.0,39376.0,39018.0,41302.0,38587.0,36538.0,39586.0,40795.0,37057.0,36658.0,34984.0,32002.0,32387.0,33339.0,29697.0


In [136]:
# reorganize to include only 2000, 2005, 2010, and 2014 data
reorg_income = income_df[["State","2000 Median Income","2005 Median Income","2010 Median Income","2014 Median Income"]]
reorg_income.head()

Unnamed: 0,State,2000 Median Income,2005 Median Income,2010 Median Income,2014 Median Income
0,United States,41990.0,46326.0,49276.0,53657.0
1,Alabama,35424.0,37150.0,40933.0,42278.0
2,Alaska,52847.0,55891.0,57848.0,67629.0
3,Arizona,39783.0,45245.0,46896.0,49254.0
4,Arkansas,29697.0,36658.0,38587.0,44922.0


In [137]:
# Read in state abbbreviations from online csv into dataframe
state_codes = pd.read_csv('https://raw.githubusercontent.com/jasonong/List-of-US-States/master/states.csv')
state_codes.head(5)

Unnamed: 0,State,Abbreviation
0,Alabama,AL
1,Alaska,AK
2,Arizona,AZ
3,Arkansas,AR
4,California,CA


In [138]:
# Merge dataframes
income_state_codes = pd.merge(reorg_income, state_codes, on='State')
income_state_codes.head()

Unnamed: 0,State,2000 Median Income,2005 Median Income,2010 Median Income,2014 Median Income,Abbreviation
0,Alabama,35424.0,37150.0,40933.0,42278.0,AL
1,Alaska,52847.0,55891.0,57848.0,67629.0,AK
2,Arizona,39783.0,45245.0,46896.0,49254.0,AZ
3,Arkansas,29697.0,36658.0,38587.0,44922.0,AR
4,California,46816.0,51755.0,54283.0,60487.0,CA


In [139]:
# Calculate percentage of US total and add to dataframe

income_state_codes['Percent 2000']= income_state_codes['2000 Median Income']/income_state_codes['2000 Median Income'].sum()
income_state_codes['Percent 2005']= income_state_codes['2005 Median Income']/income_state_codes['2005 Median Income'].sum()
income_state_codes['Percent 2010']= income_state_codes['2010 Median Income']/income_state_codes['2010 Median Income'].sum()
income_state_codes['Percent 2014']= income_state_codes['2014 Median Income']/income_state_codes['2014 Median Income'].sum()

income_state_codes.head()

Unnamed: 0,State,2000 Median Income,2005 Median Income,2010 Median Income,2014 Median Income,Abbreviation,Percent 2000,Percent 2005,Percent 2010,Percent 2014
0,Alabama,35424.0,37150.0,40933.0,42278.0,AL,0.01696,0.01607,0.016413,0.015384
1,Alaska,52847.0,55891.0,57848.0,67629.0,AK,0.025302,0.024177,0.023195,0.024609
2,Arizona,39783.0,45245.0,46896.0,49254.0,AZ,0.019047,0.019572,0.018803,0.017923
3,Arkansas,29697.0,36658.0,38587.0,44922.0,AR,0.014218,0.015858,0.015472,0.016346
4,California,46816.0,51755.0,54283.0,60487.0,CA,0.022414,0.022388,0.021765,0.02201


In [144]:
# Normalize percentage data between 0 and 1 for map comparisons 

# Create x, where x the 'scores' column's values as floats
x_2000 = income_state_codes[['Percent 2000']].values.astype(float)
x_2005 = income_state_codes[['Percent 2005']].values.astype(float)
x_2010 = income_state_codes[['Percent 2010']].values.astype(float)
x_2014 = income_state_codes[['Percent 2014']].values.astype(float)

# Create a minimum and maximum processor object
min_max_scaler = preprocessing.MinMaxScaler()

# Create an object to transform the data to fit minmax processor
x_00_scaled = min_max_scaler.fit_transform(x_2000)
x_05_scaled = min_max_scaler.fit_transform(x_2005)
x_10_scaled = min_max_scaler.fit_transform(x_2010)
x_14_scaled = min_max_scaler.fit_transform(x_2014)

income_state_codes['Normalized 2000']= x_00_scaled
income_state_codes['Normalized 2005']= x_05_scaled
income_state_codes['Normalized 2010']= x_10_scaled
income_state_codes['Normalized 2014']= x_14_scaled

income_state_codes

Unnamed: 0,State,2000 Median Income,2005 Median Income,2010 Median Income,2014 Median Income,Abbreviation,Percent 2000,Percent 2005,Percent 2010,Percent 2014,Normalized 2000,Normalized 2005,Normalized 2010,Normalized 2014,text
0,Alabama,35424.0,37150.0,40933.0,42278.0,AL,0.0169600344716147,0.0160703409421242,0.0164125638932415,0.0153842962861614,0.239333,0.140196,0.097391,0.166248,AL<br>2010 Median Income 40933.0<br>Percent of...
1,Alaska,52847.0,55891.0,57848.0,67629.0,AK,0.0253016864758758,0.0241773196661175,0.0231948304814266,0.0246091246874689,0.932813,0.754796,0.691462,0.789981,AK<br>2010 Median Income 57848.0<br>Percent of...
2,Arizona,39783.0,45245.0,46896.0,49254.0,AZ,0.0190470034830694,0.0195720747221107,0.0188034983103475,0.0179227524783243,0.412832,0.405667,0.306817,0.337885,AZ<br>2010 Median Income 46896.0<br>Percent of...
3,Arkansas,29697.0,36658.0,38587.0,44922.0,AR,0.0142181047793457,0.0158575116623524,0.0154719078237244,0.0163464061158745,0.011384,0.124061,0.014997,0.231301,AR<br>2010 Median Income 38587.0<br>Percent of...
4,California,46816.0,51755.0,54283.0,60487.0,CA,0.022414209965648,0.0223881694605556,0.0217654021404937,0.0220102637177976,0.692764,0.619158,0.566256,0.61426,CA<br>2010 Median Income 54283.0<br>Percent of...
5,Colorado,48240.0,50449.0,60233.0,60940.0,CO,0.0230959819024022,0.0218232201935189,0.0241511240559357,0.0221751032612393,0.749443,0.576329,0.775226,0.625406,CO<br>2010 Median Income 60233.0<br>Percent of...
6,Connecticut,50172.0,56835.0,65998.0,70161.0,CT,0.0240209702323243,0.0245856750321839,0.0264626680630824,0.0255304794865739,0.826341,0.785754,0.977698,0.852278,CT<br>2010 Median Income 65998.0<br>Percent of...
7,Delaware,50365.0,51235.0,55214.0,57522.0,DE,0.024113373310831,0.0221632279453495,0.0221386974519688,0.020931347059288,0.834023,0.602105,0.598953,0.54131,DE<br>2010 Median Income 55214.0<br>Percent of...
8,Florida,38856.0,42990.0,44066.0,46140.0,FL,0.0186031814427807,0.0185966071898229,0.0176687767942633,0.0167896170737378,0.375935,0.331715,0.207425,0.261269,FL<br>2010 Median Income 44066.0<br>Percent of...
9,Georgia,41901.0,45926.0,44117.0,49555.0,GA,0.0200610434845057,0.0198666615910632,0.0176892258392528,0.0180322816230836,0.497134,0.428,0.209216,0.345291,GA<br>2010 Median Income 44117.0<br>Percent of...


In [141]:
# Choropleth US Map Plot for Year 2000 - MEDIAN HOUSEHOLD INCOME

df = income_state_codes

for col in df.columns:
    df[col] = df[col].astype(str)
    
df['text'] = df['Abbreviation'] + '<br>' +\
    '2000 Median Income '+df['2000 Median Income']+'<br>'+\
    'Percent of U.S. Total '+df['Percent 2000']

data = [ dict(
        type='choropleth',
        autocolorscale = True,
        zmin = 0,
        zmax = 1,
        locations = df['Abbreviation'],
        z = df['Normalized 2000'].astype(float),
        locationmode = 'USA-states',
        text = df['text'],
        marker = dict(
            line = dict (
                color = 'rgb(255,255,255)',
                width = 2
            ) ),
        colorbar = dict(
            title = "Data Normalized between 0 and 1")
        ) ]

layout = dict(
        title = '2000 Median Household Income - Percent of U.S. Total (Hover for actuals)',
        geo = dict(
            scope='usa',
            projection=dict( type='albers usa' ),
            showlakes = True,
            lakecolor = 'rgb(255, 255, 255)'),
             )
    
fig = dict( data=data, layout=layout )
py.iplot( fig, filename='d3-cloropleth-map' )

In [142]:
# Choropleth US Map Plot for Year 2005 - MEDIAN HOUSEHOLD INCOME

df = income_state_codes

for col in df.columns:
    df[col] = df[col].astype(str)
    
df['text'] = df['Abbreviation'] + '<br>' +\
    '2005 Median Income '+df['2005 Median Income']+'<br>'+\
    'Percent of U.S. Total '+df['Percent 2005']

data = [ dict(
        type='choropleth',
        autocolorscale = True,
        zmin = 0,
        zmax = 1,
        locations = df['Abbreviation'],
        z = df['Normalized 2005'].astype(float),
        locationmode = 'USA-states',
        text = df['text'],
        marker = dict(
            line = dict (
                color = 'rgb(255,255,255)',
                width = 2
            ) ),
        colorbar = dict(
            title = "Data Normalized between 0 and 1")
        ) ]

layout = dict(
        title = '2005 Median Household Income - Percent of U.S. Total (Hover for actuals)',
        geo = dict(
            scope='usa',
            projection=dict( type='albers usa' ),
            showlakes = True,
            lakecolor = 'rgb(255, 255, 255)'),
             )
    
fig = dict( data=data, layout=layout )
py.iplot( fig, filename='d3-cloropleth-map' )

In [143]:
# Choropleth US Map Plot for Year 2010 - MEDIAN HOUSEHOLD INCOME

df = income_state_codes

for col in df.columns:
    df[col] = df[col].astype(str)
    
df['text'] = df['Abbreviation'] + '<br>' +\
    '2010 Median Income '+df['2010 Median Income']+'<br>'+\
    'Percent of U.S. Total '+df['Percent 2010']

data = [ dict(
        type='choropleth',
        autocolorscale = True,
        zmin = 0,
        zmax = 1,
        locations = df['Abbreviation'],
        z = df['Normalized 2010'].astype(float),
        locationmode = 'USA-states',
        text = df['text'],
        marker = dict(
            line = dict (
                color = 'rgb(255,255,255)',
                width = 2
            ) ),
        colorbar = dict(
            title = "Data Normalized between 0 and 1")
        ) ]

layout = dict(
        title = '2010 Median Household Income - Percent of U.S. Total (Hover for actuals)',
        geo = dict(
            scope='usa',
            projection=dict( type='albers usa' ),
            showlakes = True,
            lakecolor = 'rgb(255, 255, 255)'),
             )
    
fig = dict( data=data, layout=layout )
py.iplot( fig, filename='d3-cloropleth-map' )

In [145]:
# Choropleth US Map Plot for Year 2014 - MEDIAN HOUSEHOLD INCOME

df = income_state_codes

for col in df.columns:
    df[col] = df[col].astype(str)
    
df['text'] = df['Abbreviation'] + '<br>' +\
    '2014 Median Income '+df['2014 Median Income']+'<br>'+\
    'Percent of U.S. Total '+df['Percent 2014']

data = [ dict(
        type='choropleth',
        autocolorscale = True,
        zmin = 0,
        zmax = 1,
        locations = df['Abbreviation'],
        z = df['Normalized 2014'].astype(float),
        locationmode = 'USA-states',
        text = df['text'],
        marker = dict(
            line = dict (
                color = 'rgb(255,255,255)',
                width = 2
            ) ),
        colorbar = dict(
            title = "Data Normalized between 0 and 1")
        ) ]

layout = dict(
        title = '2014 Median Household Income - Percent of U.S. Total (Hover for actuals)',
        geo = dict(
            scope='usa',
            projection=dict( type='albers usa' ),
            showlakes = True,
            lakecolor = 'rgb(255, 255, 255)'),
             )
    
fig = dict( data=data, layout=layout )
py.iplot( fig, filename='d3-cloropleth-map' )

In [45]:
# FROM NOTES: health respiratory data: Mortality rate for respiratory diseases from 1980-2014 (globalhealthdataexchange)
# SOURCE: http://ghdx.healthdata.org/record/united-states-chronic-respiratory-disease-mortality-rates-county-1980-2014

healthfile = os.path.join('Resources', 'us_resp_mort.xlsx')
health_df = pd.read_excel(healthfile, sheet_name='Summary', encoding='UTF-8')
health_df.head()

Unnamed: 0,State,Mortality Rate 2000,Mortality Rate 2005,Mortality Rate 2010,Mortality Rate 2014
0,Alabama,65.65,69.21,70.86,73.44
1,Alaska,60.23,56.9,52.77,51.36
2,Arizona,58.24,56.44,51.18,51.93
3,Arkansas,62.11,65.31,66.44,69.63
4,California,52.14,49.25,44.59,41.94


In [124]:
# Merge income dataframe with state codes

health_state_codes = pd.merge(health_df, state_codes, on='State')
health_state_codes.head()

Unnamed: 0,State,Mortality Rate 2000,Mortality Rate 2005,Mortality Rate 2010,Mortality Rate 2014,Abbreviation
0,Alabama,65.65,69.21,70.86,73.44,AL
1,Alaska,60.23,56.9,52.77,51.36,AK
2,Arizona,58.24,56.44,51.18,51.93,AZ
3,Arkansas,62.11,65.31,66.44,69.63,AR
4,California,52.14,49.25,44.59,41.94,CA


In [126]:
# Calculate percentage of US total and add to dataframe
health_state_codes['Percent 2000']= health_state_codes['Mortality Rate 2000']/health_state_codes['Mortality Rate 2000'].sum()
health_state_codes['Percent 2005']= health_state_codes['Mortality Rate 2005']/health_state_codes['Mortality Rate 2005'].sum()
health_state_codes['Percent 2010']= health_state_codes['Mortality Rate 2010']/health_state_codes['Mortality Rate 2010'].sum()
health_state_codes['Percent 2014']= health_state_codes['Mortality Rate 2014']/health_state_codes['Mortality Rate 2014'].sum()

health_state_codes.head()

Unnamed: 0,State,Mortality Rate 2000,Mortality Rate 2005,Mortality Rate 2010,Mortality Rate 2014,Abbreviation,Percent 2000,Percent 2005,Percent 2010,Percent 2014
0,Alabama,65.65,69.21,70.86,73.44,AL,0.02255,0.023674,0.024949,0.025671
1,Alaska,60.23,56.9,52.77,51.36,AK,0.020688,0.019463,0.018579,0.017953
2,Arizona,58.24,56.44,51.18,51.93,AZ,0.020004,0.019306,0.01802,0.018152
3,Arkansas,62.11,65.31,66.44,69.63,AR,0.021334,0.02234,0.023392,0.024339
4,California,52.14,49.25,44.59,41.94,CA,0.017909,0.016847,0.015699,0.01466


In [127]:
# Normalize percentage data between 0 and 1 for map scale comparisons 

# Create y, where y the 'scores' column's values as floats
y_2000 = health_state_codes[['Percent 2000']].values.astype(float)
y_2005 = health_state_codes[['Percent 2005']].values.astype(float)
y_2010 = health_state_codes[['Percent 2010']].values.astype(float)
y_2014 = health_state_codes[['Percent 2014']].values.astype(float)

# Create a minimum and maximum processor object
min_max_scaler = preprocessing.MinMaxScaler()

# Create an object to transform the data to fit minmax processor
y_00_scaled = min_max_scaler.fit_transform(y_2000)
y_05_scaled = min_max_scaler.fit_transform(y_2005)
y_10_scaled = min_max_scaler.fit_transform(y_2010)
y_14_scaled = min_max_scaler.fit_transform(y_2014)

health_state_codes['Normalized 2000']= y_00_scaled
health_state_codes['Normalized 2005']= y_05_scaled
health_state_codes['Normalized 2010']= y_10_scaled
health_state_codes['Normalized 2014']= y_14_scaled

health_state_codes.head()

Unnamed: 0,State,Mortality Rate 2000,Mortality Rate 2005,Mortality Rate 2010,Mortality Rate 2014,Abbreviation,Percent 2000,Percent 2005,Percent 2010,Percent 2014,Normalized 2000,Normalized 2005,Normalized 2010,Normalized 2014
0,Alabama,65.65,69.21,70.86,73.44,AL,0.02255,0.023674,0.024949,0.025671,0.704506,0.769076,0.827071,0.849544
1,Alaska,60.23,56.9,52.77,51.36,AK,0.020688,0.019463,0.018579,0.017953,0.587088,0.521888,0.471178,0.438906
2,Arizona,58.24,56.44,51.18,51.93,AZ,0.020004,0.019306,0.01802,0.018152,0.543977,0.512651,0.439898,0.449507
3,Arkansas,62.11,65.31,66.44,69.63,AR,0.021334,0.02234,0.023392,0.024339,0.627816,0.690763,0.740114,0.778687
4,California,52.14,49.25,44.59,41.94,CA,0.017909,0.016847,0.015699,0.01466,0.411828,0.368273,0.31025,0.263716


In [128]:
# Choropleth US Map Plot for Year 2000 - MORTALITY RATE

df = health_state_codes

for col in df.columns:
    df[col] = df[col].astype(str)
    
df['text'] = df['Abbreviation'] + '<br>' +\
    '2000 Mortality Rate (Per 100k People) '+df['Mortality Rate 2000']+'<br>'+\
    'Percent of U.S. Total '+df['Percent 2000']

data = [ dict(
        type='choropleth',
        autocolorscale = True,
        zmin = 0,
        zmax = 1,
        locations = df['Abbreviation'],
        z = df['Normalized 2000'].astype(float),
        locationmode = 'USA-states',
        text = df['text'],
        marker = dict(
            line = dict (
                color = 'rgb(255,255,255)',
                width = 2
            ) ),
        colorbar = dict(
            title = "Data Normalized between 0 and 1")
        ) ]

layout = dict(
        title = '2000 Chronic Respiratory Disease Mortality Rate - Percent of U.S. Total (Hover for actuals)',
        geo = dict(
            scope='usa',
            projection=dict( type='albers usa' ),
            showlakes = True,
            lakecolor = 'rgb(255, 255, 255)'),
             )
    
fig = dict( data=data, layout=layout )
py.iplot( fig, filename='d3-cloropleth-map' )

In [129]:
# Choropleth US Map Plot for Year 2005 - MORTALITY RATE

df = health_state_codes

for col in df.columns:
    df[col] = df[col].astype(str)
    
df['text'] = df['Abbreviation'] + '<br>' +\
    '2005 Mortality Rate (Per 100k People) '+df['Mortality Rate 2005']+'<br>'+\
    'Percent of U.S. Total '+df['Percent 2005']

data = [ dict(
        type='choropleth',
        autocolorscale = True,
        zmin = 0,
        zmax = 1,
        locations = df['Abbreviation'],
        z = df['Normalized 2005'].astype(float),
        locationmode = 'USA-states',
        text = df['text'],
        marker = dict(
            line = dict (
                color = 'rgb(255,255,255)',
                width = 2
            ) ),
        colorbar = dict(
            title = "Data Normalized between 0 and 1")
        ) ]

layout = dict(
        title = '2005 Chronic Respiratory Disease Mortality Rate - Percent of U.S. Total (Hover for actuals)',
        geo = dict(
            scope='usa',
            projection=dict( type='albers usa' ),
            showlakes = True,
            lakecolor = 'rgb(255, 255, 255)'),
             )
    
fig = dict( data=data, layout=layout )
py.iplot( fig, filename='d3-cloropleth-map' )

In [130]:
# Choropleth US Map Plot for Year 2010 - MORTALITY RATE

df = health_state_codes

for col in df.columns:
    df[col] = df[col].astype(str)
    
df['text'] = df['Abbreviation'] + '<br>' +\
    '2010 Mortality Rate (Per 100k People) '+df['Mortality Rate 2010']+'<br>'+\
    'Percent of U.S. Total '+df['Percent 2010']

data = [ dict(
        type='choropleth',
        autocolorscale = True,
        zmin = 0,
        zmax = 1,
        locations = df['Abbreviation'],
        z = df['Normalized 2010'].astype(float),
        locationmode = 'USA-states',
        text = df['text'],
        marker = dict(
            line = dict (
                color = 'rgb(255,255,255)',
                width = 2
            ) ),
        colorbar = dict(
            title = "Data Normalized between 0 and 1")
        ) ]

layout = dict(
        title = '2010 Chronic Respiratory Disease Mortality Rate - Percent of U.S. Total (Hover for actuals)',
        geo = dict(
            scope='usa',
            projection=dict( type='albers usa' ),
            showlakes = True,
            lakecolor = 'rgb(255, 255, 255)'),
             )
    
fig = dict( data=data, layout=layout )
py.iplot( fig, filename='d3-cloropleth-map' )

In [131]:
# Choropleth US Map Plot for Year 2014 - MORTALITY RATE

df = health_state_codes

for col in df.columns:
    df[col] = df[col].astype(str)
    
df['text'] = df['Abbreviation'] + '<br>' +\
    '2014 Mortality Rate (Per 100k People) '+df['Mortality Rate 2014']+'<br>'+\
    'Percent of U.S. Total '+df['Percent 2014']

data = [ dict(
        type='choropleth',
        autocolorscale = True,
        zmin = 0,
        zmax = 1,
        locations = df['Abbreviation'],
        z = df['Normalized 2014'].astype(float),
        locationmode = 'USA-states',
        text = df['text'],
        marker = dict(
            line = dict (
                color = 'rgb(255,255,255)',
                width = 2
            ) ),
        colorbar = dict(
            title = "Data Normalized between 0 and 1")
        ) ]

layout = dict(
        title = '2014 Chronic Respiratory Disease Mortality Rate - Percent of U.S. Total (Hover for actuals)',
        geo = dict(
            scope='usa',
            projection=dict( type='albers usa' ),
            showlakes = True,
            lakecolor = 'rgb(255, 255, 255)'),
             )
    
fig = dict( data=data, layout=layout )
py.iplot( fig, filename='d3-cloropleth-map' )

In [89]:
#FROM NOTES: energy source generation:annual generation of energy by state (eia.gov)
# SOURCE: https://www.eia.gov/electricity/data/state/

eiafile = os.path.join('Resources', 'eia.xls')
eai_df = pd.read_excel(eiafile, encoding='UTF-8')
eai_df.head()

Unnamed: 0,YEAR,STATE,TYPE OF PRODUCER,ENERGY SOURCE,GENERATION (Megawatthours)
0,1990,AK,Total Electric Power Industry,Total,5599506.0
1,1990,AK,Total Electric Power Industry,Coal,510573.0
2,1990,AK,Total Electric Power Industry,Hydroelectric Conventional,974521.0
3,1990,AK,Total Electric Power Industry,Natural Gas,3466261.0
4,1990,AK,Total Electric Power Industry,Petroleum,497116.0


In [90]:
#Energy generation stuff 
#isolate type of producer to Total
scotts_total_power = eai_df[eai_df["TYPE OF PRODUCER"]=="Total Electric Power Industry"]
scotts_total_power.head()

Unnamed: 0,YEAR,STATE,TYPE OF PRODUCER,ENERGY SOURCE,GENERATION (Megawatthours)
0,1990,AK,Total Electric Power Industry,Total,5599506.0
1,1990,AK,Total Electric Power Industry,Coal,510573.0
2,1990,AK,Total Electric Power Industry,Hydroelectric Conventional,974521.0
3,1990,AK,Total Electric Power Industry,Natural Gas,3466261.0
4,1990,AK,Total Electric Power Industry,Petroleum,497116.0


In [91]:
# isolate to Energy Source = Coal
coal_gen = scotts_total_power.loc[scotts_total_power["ENERGY SOURCE"] == "Coal", :]
coal_gen.head()

Unnamed: 0,YEAR,STATE,TYPE OF PRODUCER,ENERGY SOURCE,GENERATION (Megawatthours)
1,1990,AK,Total Electric Power Industry,Coal,510573.0
21,1990,AL,Total Electric Power Industry,Coal,53658115.0
50,1990,AR,Total Electric Power Industry,Coal,19207935.0
76,1990,AZ,Total Electric Power Industry,Coal,31915610.0
99,1990,CA,Total Electric Power Industry,Coal,2637677.0


In [92]:
coal_gen.dtypes

YEAR                            int64
STATE                          object
TYPE OF PRODUCER               object
ENERGY SOURCE                  object
GENERATION (Megawatthours)    float64
dtype: object

In [93]:
# Isolate to year 2000
year_2000 = coal_gen.loc[coal_gen["YEAR"] == 2000, :]
# year_2000

# drop US Total
coal_2000 = year_2000.drop(16792)
coal_2000.head()

Unnamed: 0,YEAR,STATE,TYPE OF PRODUCER,ENERGY SOURCE,GENERATION (Megawatthours)
15434,2000,AK,Total Electric Power Industry,Coal,538673.0
15455,2000,AL,Total Electric Power Industry,Coal,77517714.0
15486,2000,AR,Total Electric Power Industry,Coal,24167699.0
15516,2000,AZ,Total Electric Power Industry,Coal,40993129.0
15543,2000,CA,Total Electric Power Industry,Coal,2363607.0


In [94]:
# Calculate percent of US total
coal_2000['Percent 2000']= coal_2000['GENERATION (Megawatthours)']/coal_2000['GENERATION (Megawatthours)'].sum()
coal_2000.head()

Unnamed: 0,YEAR,STATE,TYPE OF PRODUCER,ENERGY SOURCE,GENERATION (Megawatthours),Percent 2000
15434,2000,AK,Total Electric Power Industry,Coal,538673.0,0.000274
15455,2000,AL,Total Electric Power Industry,Coal,77517714.0,0.039424
15486,2000,AR,Total Electric Power Industry,Coal,24167699.0,0.012291
15516,2000,AZ,Total Electric Power Industry,Coal,40993129.0,0.020848
15543,2000,CA,Total Electric Power Industry,Coal,2363607.0,0.001202


In [98]:
# Normalize data for year 2000 between 0 and 1 for map comparisons 

# Create c, where c the 'scores' column's values as floats
c_2000 = coal_2000[['Percent 2000']].values.astype(float)

# Create a minimum and maximum processor object
min_max_scaler = preprocessing.MinMaxScaler()

# Create an object to transform the data to fit minmax processor
c_2000_scaled = min_max_scaler.fit_transform(c_2000)


coal_2000['Normalized Percent 2000']= c_2000_scaled

coal_2000

Unnamed: 0,YEAR,STATE,TYPE OF PRODUCER,ENERGY SOURCE,GENERATION (Megawatthours),Percent 2000,Normalized Percent 2000,text
15434,2000,AK,Total Electric Power Industry,Coal,538673.0,0.0002739575340449,0.003329,AK<br>GENERATION (Megawatthours) 538673.0<br>P...
15455,2000,AL,Total Electric Power Industry,Coal,77517714.0,0.039423846697792,0.550827,AL<br>GENERATION (Megawatthours) 77517714.0<br...
15486,2000,AR,Total Electric Power Industry,Coal,24167699.0,0.012291173349286,0.171386,AR<br>GENERATION (Megawatthours) 24167699.0<br...
15516,2000,AZ,Total Electric Power Industry,Coal,40993129.0,0.0208482261662,0.291053,AZ<br>GENERATION (Megawatthours) 40993129.0<br...
15543,2000,CA,Total Electric Power Industry,Coal,2363607.0,0.0012020798242557,0.016308,CA<br>GENERATION (Megawatthours) 2363607.0<br>...
15598,2000,CO,Total Electric Power Industry,Coal,35381219.0,0.0179941291075354,0.25114,CO<br>GENERATION (Megawatthours) 35381219.0<br...
15629,2000,CT,Total Electric Power Industry,Coal,3186096.0,0.0016203800884588,0.022158,CT<br>GENERATION (Megawatthours) 3186096.0<br>...
15667,2000,DE,Total Electric Power Industry,Coal,4112296.0,0.0020914255428113,0.028745,DE<br>GENERATION (Megawatthours) 4112296.0<br>...
15686,2000,FL,Total Electric Power Industry,Coal,72741829.0,0.0369949340226029,0.51686,FL<br>GENERATION (Megawatthours) 72741829.0<br...
15728,2000,GA,Total Electric Power Industry,Coal,80100450.0,0.0407373708314483,0.569197,GA<br>GENERATION (Megawatthours) 80100450.0<br...


In [99]:
# Choropleth US Map Plot for Year 2000 - COAL POWER GENERATION

df = coal_2000

for col in df.columns:
    df[col] = df[col].astype(str)
    
df['text'] = df['STATE'] + '<br>' +\
    'GENERATION (Megawatthours) '+df['GENERATION (Megawatthours)']+'<br>'+\
    'Percent of U.S. Total '+df['Percent 2000']

data = [ dict(
        type='choropleth',
        autocolorscale = True,
        zmin = 0,
        zmax = 1,
        locations = df['STATE'],
        z = df['Normalized Percent 2000'].astype(float),
        locationmode = 'USA-states',
        text = df['text'],
        marker = dict(
            line = dict (
                color = 'rgb(255,255,255)',
                width = 2
            ) ),
        colorbar = dict(
            title = "Data Normalized between 0 and 1")
        ) ]

layout = dict(
        title = '2000 Coal Power Generation - Percent of U.S. Total (Hover for actuals)',
        geo = dict(
            scope='usa',
            projection=dict( type='albers usa' ),
            showlakes = True,
            lakecolor = 'rgb(255, 255, 255)'),
             )
    
fig = dict( data=data, layout=layout )
py.iplot( fig, filename='d3-cloropleth-map' )

In [109]:
# Isolate to year 2005
year_2005 = coal_gen.loc[coal_gen["YEAR"] == 2005, :]
# year_2005

# drop US Total
coal_2005 = year_2005.drop(25640)
coal_2005.head()

Unnamed: 0,YEAR,STATE,TYPE OF PRODUCER,ENERGY SOURCE,GENERATION (Megawatthours)
24156,2005,AK,Total Electric Power Industry,Coal,624318.0
24180,2005,AL,Total Electric Power Industry,Coal,78128457.0
24212,2005,AR,Total Electric Power Industry,Coal,23037144.0
24246,2005,AZ,Total Electric Power Industry,Coal,40143534.0
24279,2005,CA,Total Electric Power Industry,Coal,2135375.0


In [110]:
# Calculate percent of US total
coal_2005['Percent 2005']= coal_2005['GENERATION (Megawatthours)']/coal_2005['GENERATION (Megawatthours)'].sum()
coal_2005.head()

Unnamed: 0,YEAR,STATE,TYPE OF PRODUCER,ENERGY SOURCE,GENERATION (Megawatthours),Percent 2005
24156,2005,AK,Total Electric Power Industry,Coal,624318.0,0.00031
24180,2005,AL,Total Electric Power Industry,Coal,78128457.0,0.038814
24212,2005,AR,Total Electric Power Industry,Coal,23037144.0,0.011445
24246,2005,AZ,Total Electric Power Industry,Coal,40143534.0,0.019943
24279,2005,CA,Total Electric Power Industry,Coal,2135375.0,0.001061


In [111]:
# Normalize data for year 2005 between 0 and 1 for map comparisons 

# Create c, where c the 'scores' column's values as floats
c_2005 = coal_2005[['Percent 2005']].values.astype(float)

# Create a minimum and maximum processor object
min_max_scaler = preprocessing.MinMaxScaler()

# Create an object to transform the data to fit minmax processor
c_2005_scaled = min_max_scaler.fit_transform(c_2005)


coal_2005['Normalized Percent 2005']= c_2005_scaled

coal_2005.head()

Unnamed: 0,YEAR,STATE,TYPE OF PRODUCER,ENERGY SOURCE,GENERATION (Megawatthours),Percent 2005,Normalized Percent 2005
24156,2005,AK,Total Electric Power Industry,Coal,624318.0,0.00031,0.003569
24180,2005,AL,Total Electric Power Industry,Coal,78128457.0,0.038814,0.526317
24212,2005,AR,Total Electric Power Industry,Coal,23037144.0,0.011445,0.154738
24246,2005,AZ,Total Electric Power Industry,Coal,40143534.0,0.019943,0.270117
24279,2005,CA,Total Electric Power Industry,Coal,2135375.0,0.001061,0.013761


In [112]:
# Choropleth US Map Plot for Year 2005 - COAL POWER GENERATION

df = coal_2005

for col in df.columns:
    df[col] = df[col].astype(str)
    
df['text'] = df['STATE'] + '<br>' +\
    'GENERATION (Megawatthours) '+df['GENERATION (Megawatthours)']+'<br>'+\
    'Percent of U.S. Total '+df['Percent 2005']

data = [ dict(
        type='choropleth',
        autocolorscale = True,
        zmin = 0,
        zmax = 1,
        locations = df['STATE'],
        z = df['Normalized Percent 2005'].astype(float),
        locationmode = 'USA-states',
        text = df['text'],
        marker = dict(
            line = dict (
                color = 'rgb(255,255,255)',
                width = 2
            ) ),
        colorbar = dict(
            title = "Data Normalized between 0 and 1")
        ) ]

layout = dict(
        title = '2005 Coal Power Generation - Percent of U.S. Total (Hover for actuals)',
        geo = dict(
            scope='usa',
            projection=dict( type='albers usa' ),
            showlakes = True,
            lakecolor = 'rgb(255, 255, 255)'),
             )
    
fig = dict( data=data, layout=layout )
py.iplot( fig, filename='d3-cloropleth-map' )

In [114]:
# Isolate to year 2010
year_2010 = coal_gen.loc[coal_gen["YEAR"] == 2010, :]
# year_2010

# drop US Total
coal_2010 = year_2010.drop(34962)
coal_2010.head()

Unnamed: 0,YEAR,STATE,TYPE OF PRODUCER,ENERGY SOURCE,GENERATION (Megawatthours)
33357,2010,AK,Total Electric Power Industry,Coal,620443.0
33384,2010,AL,Total Electric Power Industry,Coal,63050411.0
33417,2010,AR,Total Electric Power Industry,Coal,28152445.0
33454,2010,AZ,Total Electric Power Industry,Coal,43643807.0
33491,2010,CA,Total Electric Power Industry,Coal,2100221.0


In [115]:
# Calculate percent of US total
coal_2010['Percent 2010']= coal_2010['GENERATION (Megawatthours)']/coal_2010['GENERATION (Megawatthours)'].sum()
coal_2010.head()

Unnamed: 0,YEAR,STATE,TYPE OF PRODUCER,ENERGY SOURCE,GENERATION (Megawatthours),Percent 2010
33357,2010,AK,Total Electric Power Industry,Coal,620443.0,0.000336
33384,2010,AL,Total Electric Power Industry,Coal,63050411.0,0.034131
33417,2010,AR,Total Electric Power Industry,Coal,28152445.0,0.01524
33454,2010,AZ,Total Electric Power Industry,Coal,43643807.0,0.023626
33491,2010,CA,Total Electric Power Industry,Coal,2100221.0,0.001137


In [116]:
# Normalize data for year 2010 between 0 and 1 for map comparisons 

# Create c, where c the 'scores' column's values as floats
c_2010 = coal_2010[['Percent 2010']].values.astype(float)

# Create a minimum and maximum processor object
min_max_scaler = preprocessing.MinMaxScaler()

# Create an object to transform the data to fit minmax processor
c_2010_scaled = min_max_scaler.fit_transform(c_2010)


coal_2010['Normalized Percent 2010']= c_2010_scaled

coal_2010.head()

Unnamed: 0,YEAR,STATE,TYPE OF PRODUCER,ENERGY SOURCE,GENERATION (Megawatthours),Percent 2010,Normalized Percent 2010
33357,2010,AK,Total Electric Power Industry,Coal,620443.0,0.000336,0.003553
33384,2010,AL,Total Electric Power Industry,Coal,63050411.0,0.034131,0.419515
33417,2010,AR,Total Electric Power Industry,Coal,28152445.0,0.01524,0.186995
33454,2010,AZ,Total Electric Power Industry,Coal,43643807.0,0.023626,0.290212
33491,2010,CA,Total Electric Power Industry,Coal,2100221.0,0.001137,0.013413


In [117]:
# Choropleth US Map Plot for Year 2010 - COAL POWER GENERATION

df = coal_2010

for col in df.columns:
    df[col] = df[col].astype(str)
    
df['text'] = df['STATE'] + '<br>' +\
    'GENERATION (Megawatthours) '+df['GENERATION (Megawatthours)']+'<br>'+\
    'Percent of U.S. Total '+df['Percent 2010']

data = [ dict(
        type='choropleth',
        autocolorscale = True,
        zmin = 0,
        zmax = 1,
        locations = df['STATE'],
        z = df['Normalized Percent 2010'].astype(float),
        locationmode = 'USA-states',
        text = df['text'],
        marker = dict(
            line = dict (
                color = 'rgb(255,255,255)',
                width = 2
            ) ),
        colorbar = dict(
            title = "Data Normalized between 0 and 1")
        ) ]

layout = dict(
        title = '2010 Coal Power Generation - Percent of U.S. Total (Hover for actuals)',
        geo = dict(
            scope='usa',
            projection=dict( type='albers usa' ),
            showlakes = True,
            lakecolor = 'rgb(255, 255, 255)'),
             )
    
fig = dict( data=data, layout=layout )
py.iplot( fig, filename='d3-cloropleth-map' )

In [118]:
# Isolate to year 2014
year_2014 = coal_gen.loc[coal_gen["YEAR"] == 2014, :]
# year_2014

# drop US Total
coal_2014 = year_2014.drop(42963)
coal_2014.head()

Unnamed: 0,YEAR,STATE,TYPE OF PRODUCER,ENERGY SOURCE,GENERATION (Megawatthours)
41246,2014,AK,Total Electric Power Industry,Coal,558292.18
41279,2014,AL,Total Electric Power Industry,Coal,47301626.25
41312,2014,AR,Total Electric Power Industry,Coal,33220754.79
41349,2014,AZ,Total Electric Power Industry,Coal,42665010.57
41384,2014,CA,Total Electric Power Industry,Coal,804759.57


In [119]:
# Calculate percent of US total
coal_2014['Percent 2014']= coal_2014['GENERATION (Megawatthours)']/coal_2014['GENERATION (Megawatthours)'].sum()
coal_2014.head()

Unnamed: 0,YEAR,STATE,TYPE OF PRODUCER,ENERGY SOURCE,GENERATION (Megawatthours),Percent 2014
41246,2014,AK,Total Electric Power Industry,Coal,558292.18,0.000353
41279,2014,AL,Total Electric Power Industry,Coal,47301626.25,0.029905
41312,2014,AR,Total Electric Power Industry,Coal,33220754.79,0.021003
41349,2014,AZ,Total Electric Power Industry,Coal,42665010.57,0.026974
41384,2014,CA,Total Electric Power Industry,Coal,804759.57,0.000509


In [120]:
# Normalize data for year 2014 between 0 and 1 for map comparisons 

# Create c, where c the 'scores' column's values as floats
c_2014 = coal_2014[['Percent 2014']].values.astype(float)

# Create a minimum and maximum processor object
min_max_scaler = preprocessing.MinMaxScaler()

# Create an object to transform the data to fit minmax processor
c_2014_scaled = min_max_scaler.fit_transform(c_2014)


coal_2014['Normalized Percent 2014']= c_2014_scaled

coal_2014.head()

Unnamed: 0,YEAR,STATE,TYPE OF PRODUCER,ENERGY SOURCE,GENERATION (Megawatthours),Percent 2014,Normalized Percent 2014
41246,2014,AK,Total Electric Power Industry,Coal,558292.18,0.000353,0.003768
41279,2014,AL,Total Electric Power Industry,Coal,47301626.25,0.029905,0.319231
41312,2014,AR,Total Electric Power Industry,Coal,33220754.79,0.021003,0.224201
41349,2014,AZ,Total Electric Power Industry,Coal,42665010.57,0.026974,0.287939
41384,2014,CA,Total Electric Power Industry,Coal,804759.57,0.000509,0.005431


In [121]:
# Choropleth US Map Plot for Year 2014 - COAL POWER GENERATION

df = coal_2014

for col in df.columns:
    df[col] = df[col].astype(str)
    
df['text'] = df['STATE'] + '<br>' +\
    'GENERATION (Megawatthours) '+df['GENERATION (Megawatthours)']+'<br>'+\
    'Percent of U.S. Total '+df['Percent 2014']

data = [ dict(
        type='choropleth',
        autocolorscale = True,
        zmin = 0,
        zmax = 1,
        locations = df['STATE'],
        z = df['Normalized Percent 2014'].astype(float),
        locationmode = 'USA-states',
        text = df['text'],
        marker = dict(
            line = dict (
                color = 'rgb(255,255,255)',
                width = 2
            ) ),
        colorbar = dict(
            title = "Data Normalized between 0 and 1")
        ) ]

layout = dict(
        title = '2014 Coal Power Generation - Percent of U.S. Total (Hover for actuals)',
        geo = dict(
            scope='usa',
            projection=dict( type='albers usa' ),
            showlakes = True,
            lakecolor = 'rgb(255, 255, 255)'),
             )
    
fig = dict( data=data, layout=layout )
py.iplot( fig, filename='d3-cloropleth-map' )

In [147]:
# isolate to Energy Source = NATURAL GAS
ng_gen = scotts_total_power.loc[scotts_total_power["ENERGY SOURCE"] == "Natural Gas", :]
ng_gen.head()

Unnamed: 0,YEAR,STATE,TYPE OF PRODUCER,ENERGY SOURCE,GENERATION (Megawatthours)
3,1990,AK,Total Electric Power Industry,Natural Gas,3466261.0
23,1990,AL,Total Electric Power Industry,Natural Gas,1020714.0
52,1990,AR,Total Electric Power Industry,Natural Gas,3578573.0
78,1990,AZ,Total Electric Power Industry,Natural Gas,2333900.0
102,1990,CA,Total Electric Power Industry,Natural Gas,74168308.0


In [152]:
# Isolate to year 2000
year_2000_ng = ng_gen.loc[ng_gen["YEAR"] == 2000, :]
# year_2000_ng

# drop US Total
ng_2000 = year_2000_ng.drop(16795)
# ng_2000

In [153]:
# Calculate percent of US total
ng_2000['Percent 2000']= ng_2000['GENERATION (Megawatthours)']/ng_2000['GENERATION (Megawatthours)'].sum()
ng_2000.head()

Unnamed: 0,YEAR,STATE,TYPE OF PRODUCER,ENERGY SOURCE,GENERATION (Megawatthours),Percent 2000
15436,2000,AK,Total Electric Power Industry,Natural Gas,3975832.0,0.006615
15457,2000,AL,Total Electric Power Industry,Natural Gas,5028211.0,0.008366
15488,2000,AR,Total Electric Power Industry,Natural Gas,3862509.0,0.006426
15518,2000,AZ,Total Electric Power Industry,Natural Gas,8734799.0,0.014533
15546,2000,CA,Total Electric Power Industry,Natural Gas,103218973.0,0.171734


In [154]:
# Normalize data for year 2000 between 0 and 1 for map comparisons 

# Create n, where n the 'scores' column's values as floats
n_2000 = ng_2000[['Percent 2000']].values.astype(float)

# Create a minimum and maximum processor object
min_max_scaler = preprocessing.MinMaxScaler()

# Create an object to transform the data to fit minmax processor
n_2000_scaled = min_max_scaler.fit_transform(n_2000)


ng_2000['Normalized Percent 2000']= n_2000_scaled

ng_2000.head()

Unnamed: 0,YEAR,STATE,TYPE OF PRODUCER,ENERGY SOURCE,GENERATION (Megawatthours),Percent 2000,Normalized Percent 2000
15436,2000,AK,Total Electric Power Industry,Natural Gas,3975832.0,0.006615,0.021066
15457,2000,AL,Total Electric Power Industry,Natural Gas,5028211.0,0.008366,0.026642
15488,2000,AR,Total Electric Power Industry,Natural Gas,3862509.0,0.006426,0.020465
15518,2000,AZ,Total Electric Power Industry,Natural Gas,8734799.0,0.014533,0.046281
15546,2000,CA,Total Electric Power Industry,Natural Gas,103218973.0,0.171734,0.546904


In [155]:
# Choropleth US Map Plot for Year 2000 - NATURAL GAS POWER GENERATION

df = ng_2000

for col in df.columns:
    df[col] = df[col].astype(str)
    
df['text'] = df['STATE'] + '<br>' +\
    'GENERATION (Megawatthours) '+df['GENERATION (Megawatthours)']+'<br>'+\
    'Percent of U.S. Total '+df['Percent 2000']

data = [ dict(
        type='choropleth',
        autocolorscale = True,
        zmin = 0,
        zmax = 1,
        locations = df['STATE'],
        z = df['Normalized Percent 2000'].astype(float),
        locationmode = 'USA-states',
        text = df['text'],
        marker = dict(
            line = dict (
                color = 'rgb(255,255,255)',
                width = 2
            ) ),
        colorbar = dict(
            title = "Data Normalized between 0 and 1")
        ) ]

layout = dict(
        title = '2000 Natural Gas Power Generation - Percent of U.S. Total (Hover for actuals)',
        geo = dict(
            scope='usa',
            projection=dict( type='albers usa' ),
            showlakes = True,
            lakecolor = 'rgb(255, 255, 255)'),
             )
    
fig = dict( data=data, layout=layout )
py.iplot( fig, filename='d3-cloropleth-map' )

In [158]:
# Isolate to year 2005
year_2005_ng = ng_gen.loc[ng_gen["YEAR"] == 2005, :]
# year_2005_ng

# drop US Total
ng_2005 = year_2005_ng.drop(25643)
# ng_2005

In [159]:
# Calculate percent of US total
ng_2005['Percent 2005']= ng_2005['GENERATION (Megawatthours)']/ng_2005['GENERATION (Megawatthours)'].sum()
ng_2005.head()

Unnamed: 0,YEAR,STATE,TYPE OF PRODUCER,ENERGY SOURCE,GENERATION (Megawatthours),Percent 2005
24158,2005,AK,Total Electric Power Industry,Natural Gas,3723626.0,0.004893
24182,2005,AL,Total Electric Power Industry,Natural Gas,13869451.0,0.018226
24214,2005,AR,Total Electric Power Industry,Natural Gas,6005660.0,0.007892
24248,2005,AZ,Total Electric Power Industry,Natural Gas,28893013.0,0.037969
24282,2005,CA,Total Electric Power Industry,Natural Gas,93353849.0,0.122679


In [160]:
# Normalize data for year 2005 between 0 and 1 for map comparisons 

# Create n, where n the 'scores' column's values as floats
n_2005 = ng_2005[['Percent 2005']].values.astype(float)

# Create a minimum and maximum processor object
min_max_scaler = preprocessing.MinMaxScaler()

# Create an object to transform the data to fit minmax processor
n_2005_scaled = min_max_scaler.fit_transform(n_2005)


ng_2005['Normalized Percent 2005']= n_2005_scaled

ng_2005.head()

Unnamed: 0,YEAR,STATE,TYPE OF PRODUCER,ENERGY SOURCE,GENERATION (Megawatthours),Percent 2005,Normalized Percent 2005
24158,2005,AK,Total Electric Power Industry,Natural Gas,3723626.0,0.004893,0.018827
24182,2005,AL,Total Electric Power Industry,Natural Gas,13869451.0,0.018226,0.070156
24214,2005,AR,Total Electric Power Industry,Natural Gas,6005660.0,0.007892,0.030372
24248,2005,AZ,Total Electric Power Industry,Natural Gas,28893013.0,0.037969,0.146162
24282,2005,CA,Total Electric Power Industry,Natural Gas,93353849.0,0.122679,0.472277


In [161]:
# Choropleth US Map Plot for Year 2005 - NATURAL GAS POWER GENERATION

df = ng_2005

for col in df.columns:
    df[col] = df[col].astype(str)
    
df['text'] = df['STATE'] + '<br>' +\
    'GENERATION (Megawatthours) '+df['GENERATION (Megawatthours)']+'<br>'+\
    'Percent of U.S. Total '+df['Percent 2005']

data = [ dict(
        type='choropleth',
        autocolorscale = True,
        zmin = 0,
        zmax = 1,
        locations = df['STATE'],
        z = df['Normalized Percent 2005'].astype(float),
        locationmode = 'USA-states',
        text = df['text'],
        marker = dict(
            line = dict (
                color = 'rgb(255,255,255)',
                width = 2
            ) ),
        colorbar = dict(
            title = "Data Normalized between 0 and 1")
        ) ]

layout = dict(
        title = '2005 Natural Gas Power Generation - Percent of U.S. Total (Hover for actuals)',
        geo = dict(
            scope='usa',
            projection=dict( type='albers usa' ),
            showlakes = True,
            lakecolor = 'rgb(255, 255, 255)'),
             )
    
fig = dict( data=data, layout=layout )
py.iplot( fig, filename='d3-cloropleth-map' )

In [164]:
# Isolate to year 2010
year_2010_ng = ng_gen.loc[ng_gen["YEAR"] == 2010, :]
# year_2010_ng

# drop US Total
ng_2010 = year_2010_ng.drop(34965)
# ng_2010

In [165]:
# Calculate percent of US total
ng_2010['Percent 2010']= ng_2010['GENERATION (Megawatthours)']/ng_2010['GENERATION (Megawatthours)'].sum()
ng_2010.head()

Unnamed: 0,YEAR,STATE,TYPE OF PRODUCER,ENERGY SOURCE,GENERATION (Megawatthours),Percent 2010
33359,2010,AK,Total Electric Power Industry,Natural Gas,3750024.0,0.003797
33386,2010,AL,Total Electric Power Industry,Natural Gas,39235017.0,0.039724
33419,2010,AR,Total Electric Power Industry,Natural Gas,12469077.0,0.012624
33456,2010,AZ,Total Electric Power Industry,Natural Gas,29675942.0,0.030046
33494,2010,CA,Total Electric Power Industry,Natural Gas,107522313.0,0.108862


In [166]:
# Normalize data for year 2010 between 0 and 1 for map comparisons 

# Create n, where n the 'scores' column's values as floats
n_2010 = ng_2010[['Percent 2010']].values.astype(float)

# Create a minimum and maximum processor object
min_max_scaler = preprocessing.MinMaxScaler()

# Create an object to transform the data to fit minmax processor
n_2010_scaled = min_max_scaler.fit_transform(n_2010)


ng_2010['Normalized Percent 2010']= n_2010_scaled

ng_2010.head()

Unnamed: 0,YEAR,STATE,TYPE OF PRODUCER,ENERGY SOURCE,GENERATION (Megawatthours),Percent 2010,Normalized Percent 2010
33359,2010,AK,Total Electric Power Industry,Natural Gas,3750024.0,0.003797,0.020046
33386,2010,AL,Total Electric Power Industry,Natural Gas,39235017.0,0.039724,0.209929
33419,2010,AR,Total Electric Power Industry,Natural Gas,12469077.0,0.012624,0.066703
33456,2010,AZ,Total Electric Power Industry,Natural Gas,29675942.0,0.030046,0.158778
33494,2010,CA,Total Electric Power Industry,Natural Gas,107522313.0,0.108862,0.575339


In [167]:
# Choropleth US Map Plot for Year 2010 - NATURAL GAS POWER GENERATION

df = ng_2010

for col in df.columns:
    df[col] = df[col].astype(str)
    
df['text'] = df['STATE'] + '<br>' +\
    'GENERATION (Megawatthours) '+df['GENERATION (Megawatthours)']+'<br>'+\
    'Percent of U.S. Total '+df['Percent 2010']

data = [ dict(
        type='choropleth',
        autocolorscale = True,
        zmin = 0,
        zmax = 1,
        locations = df['STATE'],
        z = df['Normalized Percent 2010'].astype(float),
        locationmode = 'USA-states',
        text = df['text'],
        marker = dict(
            line = dict (
                color = 'rgb(255,255,255)',
                width = 2
            ) ),
        colorbar = dict(
            title = "Data Normalized between 0 and 1")
        ) ]

layout = dict(
        title = '2010 Natural Gas Power Generation - Percent of U.S. Total (Hover for actuals)',
        geo = dict(
            scope='usa',
            projection=dict( type='albers usa' ),
            showlakes = True,
            lakecolor = 'rgb(255, 255, 255)'),
             )
    
fig = dict( data=data, layout=layout )
py.iplot( fig, filename='d3-cloropleth-map' )

In [172]:
# Isolate to year 2014
year_2014_ng = ng_gen.loc[ng_gen["YEAR"] == 2014, :]
# year_2014_ng

# drop US Total
ng_2014 = year_2014_ng.drop(42967)
# ng_2014

In [173]:
# Calculate percent of US total
ng_2014['Percent 2014']= ng_2014['GENERATION (Megawatthours)']/ng_2014['GENERATION (Megawatthours)'].sum()
ng_2014.head()

Unnamed: 0,YEAR,STATE,TYPE OF PRODUCER,ENERGY SOURCE,GENERATION (Megawatthours),Percent 2014
41248,2014,AK,Total Electric Power Industry,Natural Gas,3288022.0,0.002919
41281,2014,AL,Total Electric Power Industry,Natural Gas,48270070.0,0.042845
41315,2014,AR,Total Electric Power Industry,Natural Gas,9613708.0,0.008533
41352,2014,AZ,Total Electric Power Industry,Natural Gas,27241880.0,0.02418
41388,2014,CA,Total Electric Power Industry,Natural Gas,120426400.0,0.106893


In [174]:
# Normalize data for year 2014 between 0 and 1 for map comparisons 

# Create n, where n the 'scores' column's values as floats
n_2014 = ng_2014[['Percent 2014']].values.astype(float)

# Create a minimum and maximum processor object
min_max_scaler = preprocessing.MinMaxScaler()

# Create an object to transform the data to fit minmax processor
n_2014_scaled = min_max_scaler.fit_transform(n_2014)


ng_2014['Normalized Percent 2014']= n_2014_scaled

ng_2014.head()

Unnamed: 0,YEAR,STATE,TYPE OF PRODUCER,ENERGY SOURCE,GENERATION (Megawatthours),Percent 2014,Normalized Percent 2014
41248,2014,AK,Total Electric Power Industry,Natural Gas,3288022.0,0.002919,0.016049
41281,2014,AL,Total Electric Power Industry,Natural Gas,48270070.0,0.042845,0.235775
41315,2014,AR,Total Electric Power Industry,Natural Gas,9613708.0,0.008533,0.046949
41352,2014,AZ,Total Electric Power Industry,Natural Gas,27241880.0,0.02418,0.133058
41388,2014,CA,Total Electric Power Industry,Natural Gas,120426400.0,0.106893,0.588241


In [175]:
# Choropleth US Map Plot for Year 2014 - NATURAL GAS POWER GENERATION

df = ng_2014

for col in df.columns:
    df[col] = df[col].astype(str)
    
df['text'] = df['STATE'] + '<br>' +\
    'GENERATION (Megawatthours) '+df['GENERATION (Megawatthours)']+'<br>'+\
    'Percent of U.S. Total '+df['Percent 2014']

data = [ dict(
        type='choropleth',
        autocolorscale = True,
        zmin = 0,
        zmax = 1,
        locations = df['STATE'],
        z = df['Normalized Percent 2014'].astype(float),
        locationmode = 'USA-states',
        text = df['text'],
        marker = dict(
            line = dict (
                color = 'rgb(255,255,255)',
                width = 2
            ) ),
        colorbar = dict(
            title = "Data Normalized between 0 and 1")
        ) ]

layout = dict(
        title = '2014 Natural Gas Power Generation - Percent of U.S. Total (Hover for actuals)',
        geo = dict(
            scope='usa',
            projection=dict( type='albers usa' ),
            showlakes = True,
            lakecolor = 'rgb(255, 255, 255)'),
             )
    
fig = dict( data=data, layout=layout )
py.iplot( fig, filename='d3-cloropleth-map' )

In [178]:
emissionsfile = os.path.join('Resources', 'targetyears_emissiondata.xlsx')
emissions_df = pd.read_excel(emissionsfile, encoding='UTF-8')
emissions_df.head()

Unnamed: 0,Year,State abbreviation,State annual NOx emissions (tons),State annual SO2 emissions (tons),State annual CO2 emissions (tons),State annual CH4 emissions (lbs),State annual N2O emissions (lbs)
0,2000,AK,14978.584,4151.465,3975039.0,,
1,2000,AL,189508.64,515083.663,89932310.0,,
2,2000,AR,53543.838,76510.438,32085810.0,,
3,2000,AZ,104220.977,72401.525,52273860.0,,
4,2000,CA,58741.984,17917.106,65955470.0,,


In [187]:
emissions_df.dtypes

Year                                   int64
State abbreviation                    object
State annual NOx emissions (tons)    float64
State annual SO2 emissions (tons)    float64
State annual CO2 emissions (tons)    float64
State annual CH4 emissions (lbs)     float64
State annual N2O emissions (lbs)     float64
dtype: object

In [191]:
# reorganize to include only CO2 data
reorg_em = emissions_df[["Year","State abbreviation","State annual CO2 emissions (tons)"]]
reorg_em.head()

Unnamed: 0,Year,State abbreviation,State annual CO2 emissions (tons)
0,2000,AK,3975039.0
1,2000,AL,89932310.0
2,2000,AR,32085810.0
3,2000,AZ,52273860.0
4,2000,CA,65955470.0


In [192]:
# Isolate to year 2000
em_2000 = reorg_em.loc[reorg_em["Year"] == 2000, :]
em_2000.head()


Unnamed: 0,Year,State abbreviation,State annual CO2 emissions (tons)
0,2000,AK,3975039.0
1,2000,AL,89932310.0
2,2000,AR,32085810.0
3,2000,AZ,52273860.0
4,2000,CA,65955470.0


In [193]:
# Calculate percent of US total
em_2000['Percent 2000']= em_2000['State annual CO2 emissions (tons)']/em_2000['State annual CO2 emissions (tons)'].sum()
em_2000.head()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



Unnamed: 0,Year,State abbreviation,State annual CO2 emissions (tons),Percent 2000
0,2000,AK,3975039.0,0.001498
1,2000,AL,89932310.0,0.0339
2,2000,AR,32085810.0,0.012095
3,2000,AZ,52273860.0,0.019704
4,2000,CA,65955470.0,0.024862


In [194]:
# Normalize data for year 2000 between 0 and 1 for map comparisons 

# Create e, where e the 'scores' column's values as floats
e_2000 = em_2000[['Percent 2000']].values.astype(float)

# Create a minimum and maximum processor object
min_max_scaler = preprocessing.MinMaxScaler()

# Create an object to transform the data to fit minmax processor
e_2000_scaled = min_max_scaler.fit_transform(e_2000)


em_2000['Normalized Percent 2000']= e_2000_scaled

em_2000.head()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



Unnamed: 0,Year,State abbreviation,State annual CO2 emissions (tons),Percent 2000,Normalized Percent 2000
0,2000,AK,3975039.0,0.001498,0.013645
1,2000,AL,89932310.0,0.0339,0.322664
2,2000,AR,32085810.0,0.012095,0.114704
3,2000,AZ,52273860.0,0.019704,0.187281
4,2000,CA,65955470.0,0.024862,0.236466


In [197]:
# Choropleth US Map Plot for Year 2000 - CO2 EMISSIONS

df = em_2000

for col in df.columns:
    df[col] = df[col].astype(str)
    
df['text'] = df['State abbreviation'] + '<br>' +\
    'State annual CO2 emissions (tons) '+df['State annual CO2 emissions (tons)']+'<br>'+\
    'Percent of U.S. Total '+df['Percent 2000']

data = [ dict(
        type='choropleth',
        autocolorscale = True,
        zmin = 0,
        zmax = 1,
        locations = df['State abbreviation'],
        z = df['Normalized Percent 2000'].astype(float),
        locationmode = 'USA-states',
        text = df['text'],
        marker = dict(
            line = dict (
                color = 'rgb(255,255,255)',
                width = 2
            ) ),
        colorbar = dict(
            title = "Data Normalized between 0 and 1")
        ) ]

layout = dict(
        title = '2000 CO2 Emissions - Percent of U.S. Total (Hover for actuals)',
        geo = dict(
            scope='usa',
            projection=dict( type='albers usa' ),
            showlakes = True,
            lakecolor = 'rgb(255, 255, 255)'),
             )
    
fig = dict( data=data, layout=layout )
py.iplot( fig, filename='d3-cloropleth-map' )



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



In [198]:
# Isolate to year 2005
em_2005 = reorg_em.loc[reorg_em["Year"] == 2005, :]
em_2005.head()

Unnamed: 0,Year,State abbreviation,State annual CO2 emissions (tons)
51,2005,AK,3583599.0
52,2005,AL,92043920.0
53,2005,AR,29375200.0
54,2005,AZ,58778190.0
55,2005,CA,53985900.0


In [199]:
# Calculate percent of US total
em_2005['Percent 2005']= em_2005['State annual CO2 emissions (tons)']/em_2005['State annual CO2 emissions (tons)'].sum()
em_2005.head()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



Unnamed: 0,Year,State abbreviation,State annual CO2 emissions (tons),Percent 2005
51,2005,AK,3583599.0,0.001329
52,2005,AL,92043920.0,0.034138
53,2005,AR,29375200.0,0.010895
54,2005,AZ,58778190.0,0.0218
55,2005,CA,53985900.0,0.020023


In [200]:
# Normalize data for year 2005 between 0 and 1 for map comparisons 

# Create e, where e the 'scores' column's values as floats
e_2005 = em_2005[['Percent 2005']].values.astype(float)

# Create a minimum and maximum processor object
min_max_scaler = preprocessing.MinMaxScaler()

# Create an object to transform the data to fit minmax processor
e_2005_scaled = min_max_scaler.fit_transform(e_2005)


em_2005['Normalized Percent 2005']= e_2005_scaled

em_2005.head()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



Unnamed: 0,Year,State abbreviation,State annual CO2 emissions (tons),Percent 2005,Normalized Percent 2005
51,2005,AK,3583599.0,0.001329,0.013263
52,2005,AL,92043920.0,0.034138,0.341878
53,2005,AR,29375200.0,0.010895,0.109074
54,2005,AZ,58778190.0,0.0218,0.218302
55,2005,CA,53985900.0,0.020023,0.200499


In [201]:
# Choropleth US Map Plot for Year 2005 - CO2 EMISSIONS

df = em_2005

for col in df.columns:
    df[col] = df[col].astype(str)
    
df['text'] = df['State abbreviation'] + '<br>' +\
    'State annual CO2 emissions (tons) '+df['State annual CO2 emissions (tons)']+'<br>'+\
    'Percent of U.S. Total '+df['Percent 2005']

data = [ dict(
        type='choropleth',
        autocolorscale = True,
        zmin = 0,
        zmax = 1,
        locations = df['State abbreviation'],
        z = df['Normalized Percent 2005'].astype(float),
        locationmode = 'USA-states',
        text = df['text'],
        marker = dict(
            line = dict (
                color = 'rgb(255,255,255)',
                width = 2
            ) ),
        colorbar = dict(
            title = "Data Normalized between 0 and 1")
        ) ]

layout = dict(
        title = '2005 CO2 Emissions - Percent of U.S. Total (Hover for actuals)',
        geo = dict(
            scope='usa',
            projection=dict( type='albers usa' ),
            showlakes = True,
            lakecolor = 'rgb(255, 255, 255)'),
             )
    
fig = dict( data=data, layout=layout )
py.iplot( fig, filename='d3-cloropleth-map' )



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



In [202]:
# Isolate to year 2010
em_2010 = reorg_em.loc[reorg_em["Year"] == 2010, :]
em_2010.head()

Unnamed: 0,Year,State abbreviation,State annual CO2 emissions (tons)
102,2010,AK,3668216.0
103,2010,AL,85715050.0
104,2010,AR,36164190.0
105,2010,AZ,61050570.0
106,2010,CA,52220430.0


In [203]:
# Calculate percent of US total
em_2010['Percent 2010']= em_2010['State annual CO2 emissions (tons)']/em_2010['State annual CO2 emissions (tons)'].sum()
em_2010.head()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



Unnamed: 0,Year,State abbreviation,State annual CO2 emissions (tons),Percent 2010
102,2010,AK,3668216.0,0.001443
103,2010,AL,85715050.0,0.033716
104,2010,AR,36164190.0,0.014225
105,2010,AZ,61050570.0,0.024014
106,2010,CA,52220430.0,0.020541


In [204]:
# Normalize data for year 2010 between 0 and 1 for map comparisons 

# Create e, where e the 'scores' column's values as floats
e_2010 = em_2010[['Percent 2010']].values.astype(float)

# Create a minimum and maximum processor object
min_max_scaler = preprocessing.MinMaxScaler()

# Create an object to transform the data to fit minmax processor
e_2010_scaled = min_max_scaler.fit_transform(e_2010)


em_2010['Normalized Percent 2010']= e_2010_scaled

em_2010.head()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



Unnamed: 0,Year,State abbreviation,State annual CO2 emissions (tons),Percent 2010,Normalized Percent 2010
102,2010,AK,3668216.0,0.001443,0.013988
103,2010,AL,85715050.0,0.033716,0.327678
104,2010,AR,36164190.0,0.014225,0.13823
105,2010,AZ,61050570.0,0.024014,0.233378
106,2010,CA,52220430.0,0.020541,0.199618


In [205]:
# Choropleth US Map Plot for Year 2010 - CO2 EMISSIONS

df = em_2010

for col in df.columns:
    df[col] = df[col].astype(str)
    
df['text'] = df['State abbreviation'] + '<br>' +\
    'State annual CO2 emissions (tons) '+df['State annual CO2 emissions (tons)']+'<br>'+\
    'Percent of U.S. Total '+df['Percent 2010']

data = [ dict(
        type='choropleth',
        autocolorscale = True,
        zmin = 0,
        zmax = 1,
        locations = df['State abbreviation'],
        z = df['Normalized Percent 2010'].astype(float),
        locationmode = 'USA-states',
        text = df['text'],
        marker = dict(
            line = dict (
                color = 'rgb(255,255,255)',
                width = 2
            ) ),
        colorbar = dict(
            title = "Data Normalized between 0 and 1")
        ) ]

layout = dict(
        title = '2010 CO2 Emissions - Percent of U.S. Total (Hover for actuals)',
        geo = dict(
            scope='usa',
            projection=dict( type='albers usa' ),
            showlakes = True,
            lakecolor = 'rgb(255, 255, 255)'),
             )
    
fig = dict( data=data, layout=layout )
py.iplot( fig, filename='d3-cloropleth-map' )



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



In [206]:
# Isolate to year 2014
em_2014 = reorg_em.loc[reorg_em["Year"] == 2014, :]
em_2014.head()

Unnamed: 0,Year,State abbreviation,State annual CO2 emissions (tons)
153,2014,AK,2407768.0
154,2014,AL,71737970.0
155,2014,AR,36121950.0
156,2014,AZ,55150620.0
157,2014,CA,49690680.0


In [207]:
# Calculate percent of US total
em_2014['Percent 2014']= em_2014['State annual CO2 emissions (tons)']/em_2014['State annual CO2 emissions (tons)'].sum()
em_2014.head()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



Unnamed: 0,Year,State abbreviation,State annual CO2 emissions (tons),Percent 2014
153,2014,AK,2407768.0,0.001151
154,2014,AL,71737970.0,0.034301
155,2014,AR,36121950.0,0.017272
156,2014,AZ,55150620.0,0.02637
157,2014,CA,49690680.0,0.023759


In [208]:
# Normalize data for year 2014 between 0 and 1 for map comparisons 

# Create e, where e the 'scores' column's values as floats
e_2014 = em_2014[['Percent 2014']].values.astype(float)

# Create a minimum and maximum processor object
min_max_scaler = preprocessing.MinMaxScaler()

# Create an object to transform the data to fit minmax processor
e_2014_scaled = min_max_scaler.fit_transform(e_2014)


em_2014['Normalized Percent 2014']= e_2014_scaled

em_2014.head()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



Unnamed: 0,Year,State abbreviation,State annual CO2 emissions (tons),Percent 2014,Normalized Percent 2014
153,2014,AK,2407768.0,0.001151,0.010027
154,2014,AL,71737970.0,0.034301,0.303177
155,2014,AR,36121950.0,0.017272,0.152581
156,2014,AZ,55150620.0,0.02637,0.233041
157,2014,CA,49690680.0,0.023759,0.209954


In [209]:
# Choropleth US Map Plot for Year 2014 - CO2 EMISSIONS

df = em_2014

for col in df.columns:
    df[col] = df[col].astype(str)
    
df['text'] = df['State abbreviation'] + '<br>' +\
    'State annual CO2 emissions (tons) '+df['State annual CO2 emissions (tons)']+'<br>'+\
    'Percent of U.S. Total '+df['Percent 2014']

data = [ dict(
        type='choropleth',
        autocolorscale = True,
        zmin = 0,
        zmax = 1,
        locations = df['State abbreviation'],
        z = df['Normalized Percent 2014'].astype(float),
        locationmode = 'USA-states',
        text = df['text'],
        marker = dict(
            line = dict (
                color = 'rgb(255,255,255)',
                width = 2
            ) ),
        colorbar = dict(
            title = "Data Normalized between 0 and 1")
        ) ]

layout = dict(
        title = '2014 CO2 Emissions - Percent of U.S. Total (Hover for actuals)',
        geo = dict(
            scope='usa',
            projection=dict( type='albers usa' ),
            showlakes = True,
            lakecolor = 'rgb(255, 255, 255)'),
             )
    
fig = dict( data=data, layout=layout )
py.iplot( fig, filename='d3-cloropleth-map' )



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy

