# 01) IMPORT RAW DATA

In [1]:
import pandas as pd

In [2]:
# define paths to source files (from http://bit.ly/F7XW4J1JDataset8)
strPH = 'Change in Mean pH Levels 82-12.csv'

In [3]:
# import data
dfPH = pd.read_csv(strPH)

In [4]:
# examine sample dataframe
dfPH.head()

Unnamed: 0,FIPS Code,1982 Mean pH Value,% Outside of Ideal (7.25),2012 Mean pH Value,% Outside of Ideal (7.25).1,% Difference,Change in % Points in Difference from Ideal
0,13065,4.082609,43.7%,3.927763,45.8%,-3.9%,2.1%
1,12003,5.422222,25.2%,4.393682,39.4%,-23.4%,14.2%
2,28073,5.71,21.2%,4.92,32.1%,-16.1%,10.9%
3,28035,6.5,10.3%,5.2,28.3%,-25.0%,17.9%
4,13179,7.49,-3.3%,5.527315,23.8%,-35.5%,27.1%


# 02) REFORMAT DATA

In [5]:
# create numeric "Change in % Points in Difference from Ideal" column
dfPH['PctPHChgImprovmt'] = [float(var.replace('%', '')) for var in dfPH['Change in % Points in Difference from Ideal']]
# create numeric "pH Improvement" column
dfPH['PHImprovmt'] = abs(7.25 - dfPH['1982 Mean pH Value']) - abs(7.25 - dfPH['2012 Mean pH Value'])

In [6]:
dfPH.head()

Unnamed: 0,FIPS Code,1982 Mean pH Value,% Outside of Ideal (7.25),2012 Mean pH Value,% Outside of Ideal (7.25).1,% Difference,Change in % Points in Difference from Ideal,PctPHChgImprovmt,PHImprovmt
0,13065,4.082609,43.7%,3.927763,45.8%,-3.9%,2.1%,2.1,-0.154846
1,12003,5.422222,25.2%,4.393682,39.4%,-23.4%,14.2%,14.2,-1.02854
2,28073,5.71,21.2%,4.92,32.1%,-16.1%,10.9%,10.9,-0.79
3,28035,6.5,10.3%,5.2,28.3%,-25.0%,17.9%,17.9,-1.3
4,13179,7.49,-3.3%,5.527315,23.8%,-35.5%,27.1%,27.1,-1.482685


In [7]:
dfPH.dtypes

FIPS Code                                        int64
1982 Mean pH Value                             float64
%  Outside of Ideal (7.25)                      object
2012 Mean pH Value                             float64
%  Outside of Ideal (7.25).1                    object
% Difference                                    object
Change in % Points in Difference from Ideal     object
PctPHChgImprovmt                               float64
PHImprovmt                                     float64
dtype: object

In [8]:
dfIowa = dfPH.loc[dfPH['FIPS Code']>=19000]
dfIowa = dfIowa.loc[dfIowa['FIPS Code']<20000]
dfIowa

Unnamed: 0,FIPS Code,1982 Mean pH Value,% Outside of Ideal (7.25),2012 Mean pH Value,% Outside of Ideal (7.25).1,% Difference,Change in % Points in Difference from Ideal,PctPHChgImprovmt,PHImprovmt
407,19189,7.325000,-1.0%,7.400000,-2.1%,1.0%,-1.0%,-1.0,-0.075000
581,19061,7.700000,-6.2%,7.594017,-4.7%,-1.4%,1.5%,1.5,0.105983
586,19101,7.450000,-2.8%,7.600000,-4.8%,2.0%,-2.1%,-2.1,-0.150000
602,19095,7.483333,-3.2%,7.621429,-5.1%,1.8%,-1.9%,-1.9,-0.138095
688,19021,7.183333,0.9%,7.711111,-6.4%,6.8%,-7.3%,-7.3,-0.394444
709,19035,7.350000,-1.4%,7.741667,-6.8%,5.1%,-5.4%,-5.4,-0.391667
733,19019,7.550000,-4.1%,7.766667,-7.1%,2.8%,-3.0%,-3.0,-0.216667
759,19195,7.350000,-1.4%,7.800000,-7.6%,5.8%,-6.2%,-6.2,-0.450000
760,19031,7.400000,-2.1%,7.800000,-7.6%,5.1%,-5.5%,-5.5,-0.400000
797,19167,7.250000,0.0%,7.825000,-7.9%,7.3%,-7.9%,-7.9,-0.575000


# 03) MAP WATER QUALITY BY COUNTY

In [9]:
# import plotly and set credentials
from PlotlyConfig import un, pkey 
import plotly
import plotly.plotly as py
import plotly.figure_factory as ff
import numpy as np
plotly.tools.set_credentials_file(username=un, api_key=pkey)

In [10]:
def MakeCtyFig(strVal, strTitle='Water Quality', strLegend='', fRound=False, fBin=True, strFIPS='FIPS Code', df=dfPH):
    # create lists of parameter values
    lstFIPS = df[strFIPS].tolist()
    lstVal = df[strVal].tolist()
    if fRound:
        lstVal = [round(val, 2) for val in lstVal]
    lstColor = ['#FF0040','#FF0000','#FF2800','#FF5000','#FF7800', \
                '#FFa000','#FFc800','#FFf000','#b0ff00','#17ff00', \
                '#00ff83','#00e4ff','#00a4ff','#0064ff','#0022ff', \
                '#0100ff','#0500ff'] # 13-color ROYGB
    intColor = len(lstColor)
    intBinSize = 1 / (intColor + 1) * 100
    lstBin = list(np.linspace(np.percentile(lstVal, intBinSize), \
                              np.percentile(lstVal, 100 - intBinSize), \
                              intColor - 1))
    # set fig variable
    if fBin:
        fig = ff.create_choropleth(
            fips=lstFIPS, values=lstVal,
            binning_endpoints=lstBin,
            colorscale=lstColor,
            show_state_data=False,
            show_hover=True, centroid_marker={'opacity': 0},
            asp=2.9, title=strTitle,
            legend_title=strLegend
        )
    else:
        fig = ff.create_choropleth(
            fips=lstFIPS, values=lstVal,
            title=strTitle,
            legend_title=strLegend
        )
        
    return fig

In [11]:
py.iplot(MakeCtyFig('PHImprovmt', 'Progress toward Ideal pH (7.25), 1982-2012', 'Net pH Improvement', True), \
         filename='choropleth_full_usa')


Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.




Consider using IPython.display.IFrame instead

