# Capstone Project - The Battle of Neighborhoods
### Week 1
### Applied Data Science Capstone by IBM

### Introduction: Business Problem

In 2020 United States Presidential Election, Joe Biden narrowly won in the state of Georgia. This project aims to gain an insight into voter's decision. By analyzing how data on income, ethnicity or industries distributed in different counties, we are able to conclude what decision would certain types of voter made. The political parties can thus adjust their policy to attract different target voters.


### Data

* GEOID (Source:https://www2.census.gov/acs2010_5yr/summaryfile/UserTools/Geography/GA.xls)

* Votes by county in georgia state - 2020 United States presidential election result by county in Georgia state (Source:https://en.wikipedia.org/wiki/2020_United_States_presidential_election_in_Georgia)

* Income by county in georgia state - Covered employment and wages in the United States and all counties in Georgia, fourth quarter 2019
(Source:https://www.bls.gov/regions/southeast/news-release/countyemploymentandwages_georgia.htm)

* Ethnicity and race distribution by county - County Population by Race 2016
(Source:https://census.georgia.gov/census-data/population-estimates)

* USCities-City name, County, State, Latitude, Longtitude
(Source:https://simplemaps.com/data/us-cities)

* Foursquare API

#### Import Libraries

In [None]:
import pandas as pd
import numpy as np # library to handle data in a vectorized manner
import requests
from geopy.geocoders import Nominatim
import plotly.graph_objects as go
import plotly.express as px

print('Libraries imported.')

In [None]:
!pip install dash==1.19.0
!pip install jupyter_dash
!pip install --upgrade plotly 

### Data Exploration
#### GEOID

In [None]:
GA_FIPS='https://drive.google.com/file/d/1Vr3b2RfCJQ3FUC_Y2YZbaJchWezmPPWI/view?usp=sharing'
file_id=GA_FIPS.split('/')[-2]
dwn_url='https://drive.google.com/uc?id=' + file_id
GA_FIPS= pd.read_csv(dwn_url)
GA_FIPS_new = GA_FIPS.iloc[12:171]
GA_FIPS_new.reset_index(drop=True, inplace=True)
GA_FIPS_new.head()

In [None]:
GEOID = GA_FIPS_new[['GEOID','GEOGRAPHY NAME']]
GEOID['County GEOID'] = GEOID['GEOID'].str[7:]
GEOID['County'] = GEOID['GEOGRAPHY NAME'].str.replace("County, Georgia","")
GEO_new = GEOID.drop(['GEOID', 'GEOGRAPHY NAME'], axis=1)

In [None]:
GEO_new.head()

#### 2020 United States presidential election result by county in Georgia state

In [None]:
table = pd.read_html('https://en.wikipedia.org/wiki/2020_United_States_presidential_election_in_Georgia', match='Total votes cast')

In [None]:
GA_result = table[0]

In [None]:
GA_result.columns = [''] * len(GA_result.columns)

columns = ["County", "Joe Biden Democratic #","Joe Biden Democratic %","Donald Trump Republican #","Donald Trump Republican %",
           "Jo Jorgensen Libertarian #", "Jo Jorgensen Libertarian %", "Howie Hawkins Green #", 
           "Brian Carroll American Solidarity", "Margin #", "Margin %", "Total Votes Cast"]
GA_result.columns = columns

In [None]:
GA_result['County'] = GA_result['County'].str.replace("[","")
GA_result['County'] = GA_result['County'].str.replace("]","")
GA_result['County'] = GA_result['County'].str.replace("0","")
GA_result['County'] = GA_result['County'].str.replace("1","")
GA_result['County'] = GA_result['County'].str.replace("2","")
GA_result['County'] = GA_result['County'].str.replace("3","")
GA_result['County'] = GA_result['County'].str.replace("4","")
GA_result['County'] = GA_result['County'].str.replace("5","")
GA_result['County'] = GA_result['County'].str.replace("6","")
GA_result['County'] = GA_result['County'].str.replace("7","")
GA_result['County'] = GA_result['County'].str.replace("8","")
GA_result['County'] = GA_result['County'].str.replace("9","")

In [None]:
GA_result.head()

In [None]:
GA_result.tail()

In [None]:
GA_result.shape

In [None]:
JB_and_DT_N = GA_result[['Joe Biden Democratic #','Donald Trump Republican #']]

In [None]:
JB_and_DT_N_dropped_last_1 = JB_and_DT_N.iloc[:-1]
JB_and_DT_N_dropped_last_1 .tail()

In [None]:
JB_and_DT_N_with_GEOID = pd.concat([GEO_new, JB_and_DT_N_dropped_last_1], axis=1, join='inner')

In [None]:
JB_and_DT_N_with_GEOID['winner'] = np.where(JB_and_DT_N_with_GEOID['Joe Biden Democratic #'] > JB_and_DT_N_with_GEOID['Donald Trump Republican #'], 'Joe Biden Democratic', 'Donald Trump Republican')

In [None]:
JB_and_DT_N_with_GEOID.head()

In [None]:
Winner_county_count = JB_and_DT_N_with_GEOID.groupby('winner').count()
Winner_county_count

In [None]:
import plotly.express as px
from urllib.request import urlopen
import json

with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
    counties = json.load(response)

geojson = counties 

fig = px.choropleth_mapbox(JB_and_DT_N_with_GEOID, geojson=geojson, color="winner",
                           locations=JB_and_DT_N_with_GEOID['County GEOID'],
                           center = {"lat": 33.247875, "lon":-83.441162 },
                           color_discrete_map={'Donald Trump Republican':'#EF553B',
                                               'Joe Biden Democratic':'#636EFA'},
                           mapbox_style="open-street-map",
                           zoom=5.5)
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

In [None]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import pandas as pd
import json
import urllib.request

from urllib.request import urlopen
import json


fig = make_subplots(
    rows=1, cols=2, subplot_titles=('Joe Biden Democratic #', 'Donald Trump Republican #'),
    specs=[[{"type": "mapbox"}, {"type": "mapbox"}]]
)

with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
    counties = json.load(response)

df1 = JB_and_DT_N_with_GEOID

fig.add_trace(go.Choroplethmapbox(geojson=counties, 
                                  locations=df1['County GEOID'], 
                                  z=df1['Joe Biden Democratic #'],                                
                                  colorbar=dict(thickness=20, x=0.46),
                                  coloraxis="coloraxis1",
                                  marker=dict(opacity=0.75)), row=1, col=1)
fig.add_trace(go.Choroplethmapbox(geojson=counties, 
                                  locations=df1['County GEOID'], 
                                  z=df1['Donald Trump Republican #'],
                                  coloraxis="coloraxis1",                            
                                  colorbar=dict(thickness=20, x=1.02),
                                  marker=dict(opacity=0.75, line_width=0.5)), row=1, col=2);


fig.update_mapboxes(
        bearing=0,
        accesstoken="pk.eyJ1IjoiY2hyaXN0aWVsYXkiLCJhIjoiY2tud2RtZjhoMDEzMDJxcGZyZWV2anByMiJ9.O8ZkzD6zGvDToQAr3rEsug",
        center = {"lat": 33.247875, "lon":-83.441162 })

fig.update_layout(coloraxis=dict(colorscale='plasma'), showlegend=False)

fig.update_layout(margin=dict(l=0, r=0, t=50, b=10));


fig.update_layout(mapbox1=dict(zoom=5.5, style='light'),
                  mapbox2=dict(zoom=5.5, style='light'))

In [None]:
GA_result_t = GA_result.transpose()
GA_result_t

In [None]:
GA_result_t_selected = GA_result_t.loc[["Joe Biden Democratic #","Donald Trump Republican #","Jo Jorgensen Libertarian #","Howie Hawkins Green #"]]
total_result_pie = GA_result_t_selected.iloc[:,159]
total_result_pie.head()

In [None]:
total_result_pie = pd.DataFrame(total_result_pie)
total_result_pie.reset_index(inplace=True)
total_result_pie = total_result_pie.rename(columns = {'index':'Candidates', '159':'Total Votes'})
total_result_pie.columns =['Candidates','Total Votes']

In [None]:
total_result_pie

In [None]:
fig = px.pie(total_result_pie, values='Total Votes', names='Candidates', color='Candidates')
             
fig.show()

#### Employment & Average weekly wage by county

In [None]:
Income ='https://drive.google.com/file/d/1OVJpSjTXfLbnZ7fl-yTLHHyBW_wbkk8A/view?usp=sharing'
Income_id =Income.split('/')[-2]
Income_url ='https://drive.google.com/uc?id=' + Income_id
Income = pd.read_csv(Income_url)

In [None]:
Income.head()

In [None]:
Income_new = Income.iloc[2:]
Income_new.reset_index(drop=True, inplace=True)
Income_new.head()

In [None]:
Income_with_GEOID = pd.concat([GEO_new, Income_new], axis=1, join='inner')
Income_with_GEOID.head()

In [None]:
Income_with_GEOID['Employment December 2019'] = Income_with_GEOID['Employment December 2019'].replace({',': ''}, regex=True)
Income_with_GEOID['Average weekly wage'] = Income_with_GEOID['Average weekly wage'].replace({',': ''}, regex=True)
Income_with_GEOID['Employment December 2019'] = Income_with_GEOID['Employment December 2019'].astype(float)
Income_with_GEOID['Average weekly wage'] = Income_with_GEOID['Average weekly wage'].astype(float)
Income_with_GEOID['Employment December 2019'] = Income_with_GEOID['Employment December 2019'].astype(int)
Income_with_GEOID['Average weekly wage'] = Income_with_GEOID['Average weekly wage'].astype(int)

In [None]:
Income_with_GEOID.dtypes

In [None]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import pandas as pd
import json
import urllib.request

fig = make_subplots(
    rows=1, cols=2, subplot_titles=('Employment December 2019', 'Average Weekly Wage'),
    specs=[[{"type": "mapbox"}, {"type": "mapbox"}]]
)

with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
    counties = json.load(response)

df2 = Income_with_GEOID

fig.add_trace(go.Choroplethmapbox(geojson=counties, 
                                  locations=df2['County GEOID'], 
                                  z=df2['Employment December 2019'],                                
                                  colorscale='Viridis',                                  
                                  colorbar=dict(thickness=20, x=0.46),
                                  marker=dict(opacity=0.75)), row=1, col=1)
fig.add_trace(go.Choroplethmapbox(geojson=counties, 
                                  locations=df2['County GEOID'], 
                                  z=df2['Average weekly wage'],
                                  colorscale='matter_r',                            
                                  colorbar=dict(thickness=20, x=1.02),                                
                                  marker=dict(opacity=0.75, line_width=0.5)), row=1, col=2);

fig.update_mapboxes(
        bearing=0,
        accesstoken="pk.eyJ1IjoiY2hyaXN0aWVsYXkiLCJhIjoiY2tud2RtZjhoMDEzMDJxcGZyZWV2anByMiJ9.O8ZkzD6zGvDToQAr3rEsug",
        center = {"lat": 33.247875, "lon":-83.441162 })

fig.update_layout(margin=dict(l=0, r=0, t=50, b=10));


fig.update_layout(mapbox1=dict(zoom=5.5, style='light'),
                  mapbox2=dict(zoom=5.5, style='light'))

#### US CITIES

In [None]:
uscities = 'https://drive.google.com/file/d/1DJz5ZJJvcnvKPBWwuZaJhzHjEqd2p79N/view?usp=sharing'
uscities_id = uscities.split('/')[-2]
uscities_url ='https://drive.google.com/uc?id=' + uscities_id
uscities = pd.read_csv(uscities_url)
uscities.head()

In [None]:
GA = uscities[uscities['state_id'] == 'GA']
GA.head()

In [None]:
sorted_GA = GA.sort_values(['county_name', 'county_fips','population'], ascending=[True, True, False])
sorted_GA.reset_index(drop=True, inplace=True)
sorted_GA.head(20)

In [None]:
sorted_GA_new = sorted_GA[['city', 'county_fips', 'county_name', 'lat', 'lng']]
sorted_GA_new.head(10)

In [None]:
sorted_GA_new.dtypes

In [None]:
import plotly.express as px
import numpy as np
import plotly.graph_objects as go

px.set_mapbox_access_token("pk.eyJ1IjoiY2hyaXN0aWVsYXkiLCJhIjoiY2tud2RtZjhoMDEzMDJxcGZyZWV2anByMiJ9.O8ZkzD6zGvDToQAr3rEsug")
fig = px.scatter_mapbox(sorted_GA,lat = "lat",lon = "lng",
                        size = "population",color = 'population',    
                        color_continuous_scale='blackbody', size_max=20, zoom=5.5)
fig.show()