In [16]:
import chart_studio.plotly as py
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
import pandas as pd
import numpy as np

In [17]:
df = pd.read_csv('2012_Election_Data')
df.head() ### this function will provide you with quick insight into the dataset

Unnamed: 0,Year,ICPSR State Code,Alphanumeric State Code,State,VEP Total Ballots Counted,VEP Highest Office,VAP Highest Office,Total Ballots Counted,Highest Office,Voting-Eligible Population (VEP),Voting-Age Population (VAP),% Non-citizen,Prison,Probation,Parole,Total Ineligible Felon,State Abv
0,2012,41,1,Alabama,,58.6%,56.0%,,2074338,3539217,3707440.0,2.6%,32232,57993,8616,71584,AL
1,2012,81,2,Alaska,58.9%,58.7%,55.3%,301694.0,300495,511792,543763.0,3.8%,5633,7173,1882,11317,AK
2,2012,61,3,Arizona,53.0%,52.6%,46.5%,2323579.0,2306559,4387900,4959270.0,9.9%,35188,72452,7460,81048,AZ
3,2012,42,4,Arkansas,51.1%,50.7%,47.7%,1078548.0,1069468,2109847,2242740.0,3.5%,14471,30122,23372,53808,AR
4,2012,71,5,California,55.7%,55.1%,45.1%,13202158.0,13038547,23681837,28913129.0,17.4%,119455,0,89287,208742,CA


In [18]:
df.info() # reveals the Dtype for each coloumns. This is important because you want to ensure that your 'z' value in your data input is a 'float' or 'int' not an 'object'

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 51 entries, 0 to 50
Data columns (total 17 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   Year                              51 non-null     int64  
 1   ICPSR State Code                  51 non-null     int64  
 2   Alphanumeric State Code           51 non-null     int64  
 3   State                             51 non-null     object 
 4   VEP Total Ballots Counted         41 non-null     object 
 5   VEP Highest Office                51 non-null     object 
 6   VAP Highest Office                51 non-null     object 
 7   Total Ballots Counted             41 non-null     object 
 8   Highest Office                    51 non-null     object 
 9   Voting-Eligible Population (VEP)  51 non-null     object 
 10  Voting-Age Population (VAP)       51 non-null     float64
 11  % Non-citizen                     51 non-null     object 
 12  Prison    

In [31]:
df['% Non-citizen'].head() #even though the '% Non-citizen' column contains number, its dtype is an object we need to convert the dtype to a float. Which is what I did in the next line

0     2.6
1     3.8
2     9.9
3     3.5
4    17.4
Name: % Non-citizen, dtype: float64

In [20]:
df['% Non-citizen'] = df['% Non-citizen'].apply(lambda x: np.nan if x in ['%'] else x[:-1]).astype(float)

In [32]:
df['% Non-citizen'].head() #As you notice at the bottom, the dtype is now a float and the '%' symbols are gone

0     2.6
1     3.8
2     9.9
3     3.5
4    17.4
Name: % Non-citizen, dtype: float64

In [22]:
df['State Abv'] #Since we will also be using this column, It is a good idea to take a look at it. The value at index 8 is not in an abbreviated format like the others. This will not prevent the code from running, but for consistency sake it is best to make it abbreviated as well.

0                       AL
1                       AK
2                       AZ
3                       AR
4                       CA
5                       CO
6                       CT
7                       DE
8     District of Columbia
9                       FL
10                      GA
11                      HI
12                      ID
13                      IL
14                      IN
15                      IA
16                      KS
17                      KY
18                      LA
19                      ME
20                      MD
21                      MA
22                      MI
23                      MN
24                      MS
25                      MO
26                      MT
27                      NE
28                      NV
29                      NH
30                      NJ
31                      NM
32                      NY
33                      NC
34                      ND
35                      OH
36                      OK
3

In [23]:
df[df['State']=='District of Columbia']

Unnamed: 0,Year,ICPSR State Code,Alphanumeric State Code,State,VEP Total Ballots Counted,VEP Highest Office,VAP Highest Office,Total Ballots Counted,Highest Office,Voting-Eligible Population (VEP),Voting-Age Population (VAP),% Non-citizen,Prison,Probation,Parole,Total Ineligible Felon,State Abv
8,2012,55,9,District of Columbia,61.6%,61.5%,55.5%,294254,293764,477582,528848.0,9.7,0,0,0,0,District of Columbia


In [24]:
df['State Abv'].replace(to_replace='District of Columbia', value='DC',inplace=True)# This will replace the value 
df[df['State']=='District of Columbia'] # This will allow us to check if it was changed. Look at 'State Abv'column for change

Unnamed: 0,Year,ICPSR State Code,Alphanumeric State Code,State,VEP Total Ballots Counted,VEP Highest Office,VAP Highest Office,Total Ballots Counted,Highest Office,Voting-Eligible Population (VEP),Voting-Age Population (VAP),% Non-citizen,Prison,Probation,Parole,Total Ineligible Felon,State Abv
8,2012,55,9,District of Columbia,61.6%,61.5%,55.5%,294254,293764,477582,528848.0,9.7,0,0,0,0,DC


In [25]:
data = dict(type='choropleth',
           locations = df['State Abv'],
            locationmode = 'USA-states',
            colorscale = 'portland',
            text = df['% Non-citizen'],
            z = df['% Non-citizen'],
            marker = dict(line = dict(color = 'rgb(255,255,255)',width = 2)),
            colorbar = {'title':'% of Non-Citizens'}
           )

In [26]:
layout = dict(title = 'Percentage of Non-Citizens per state during the 2012 Elections',
              geo = dict(scope='usa',
                        showlakes = True, 
                        lakecolor = 'rgb(85,173,240)')
             )

In [29]:
choromap = go.Figure(data = [data],layout = layout)

In [30]:
iplot(choromap) ## TO VIEW INTERACTIVE GEO-PLOT IN GITHUB, GO TO http://nbviewer.jupyter.org/ AND PASTE https://github.com/gladys4949/geo/blob/master/temp-plot.html INTO SEARCH BAR