## Race / ethnicity for California's 50th Congressional District. 

##### This notebook creates six maps that display race/ethnicity for California's 50th Congressional District using Plotly. The racial categories mapped are:
-Prodominant Race

-Latinx

-White alone

-African-American alone

-Asian alone

-other alone. 

The other race category is an aggregate sum of Native Americans, Pacific Islanders, some other race, and two or more races. Their percentages were too small to represent individually. 

Source: Table: B03002: U.S. Census Bureau, 2013-2017 American Community Survey 5-Year Estimates)

##### Source: Table: B03002: U.S. Census Bureau, 2013-2017 American Community Survey 5-Year Estimates)

###### helpful links used to produce the maps:
https://plotly.com/~empet/14692/mapbox-choropleth-that-works-with-plotly/#/

https://towardsdatascience.com/interactive-choropleth-maps-with-plotly-46c34fba0e48

https://www.districtdatalabs.com/altair-choropleth-viz

https://github.com/bd317/geodata/blob/master/API-Duesseldorf-Github%20(1).ipynb


#### Configuration And Needed Libraries

In [1]:
import pandas as pd
import numpy as np
import urllib.request, json 
import requests
from ast import literal_eval
import json # for working with JSON data

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:80% !important; }</style>"))
%matplotlib notebook
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
import geojson # ditto for GeoJSON data - an extension of JSON with support for geographic data
import geopandas as gpd


In [2]:
# read in csv to dataframe
data2 = pd.read_csv('G:\My Drive\Data\Race Eth ACS 2017\Data_Exports\Selected_5_race_cat_mapping.csv', delimiter = ',')

# rename columns
data2.rename(columns={'Not_Latino_e_pct.1': 'Not_Latino_e_pct'}, inplace=True)


data2.rename(columns={ 'Not_Latino': 'Estimate Not Latino', 'White':'Estimate White',
                         'Black': 'Estimate Black', 'Asian': 'Estimate Asian', 'Latino':'Estimate Latino',
                         'Other_agg':'Estimate Other Race'}, inplace=True)
 
data2.rename(columns={ 'Not_Latino_e_pct':'Not Latino', 'White_e_pct':'White', 'Black_e_pct':'Black',
                         'Asian_e_pct':'Asian', 'Latino_e_pct':'Latino','Other_agg_e_pct':'Other_Race'}, inplace=True)
data2.head(2)

Unnamed: 0,GEOID,Total,Estimate Not Latino,Estimate White,Estimate Black,Estimate Asian,Estimate Latino,Estimate Other Race,Not Latino,White,Black,Asian,Latino,Other_Race
0,6065043216,6359,4567,3356,417,600,1792,194,71.819468,52.775594,6.557635,9.435446,28.180532,3.050794
1,6065043218,5415,3918,3561,74,211,1497,72,72.354571,65.761773,1.366574,3.896584,27.645429,1.32964


In [3]:
# read in lookup table
lu = pd.read_csv('G:\My Drive\Data\Race Eth ACS 2017\Data_Exports\lookup_ct_county_name.csv' , delimiter = ',')

# Joining Census Tract label and County label
data = pd.merge(data2, lu, on='GEOID', how='inner')
data.head(2)

Unnamed: 0,GEOID,Total,Estimate Not Latino,Estimate White,Estimate Black,Estimate Asian,Estimate Latino,Estimate Other Race,Not Latino,White,Black,Asian,Latino,Other_Race,Census Tract,County
0,6065043216,6359,4567,3356,417,600,1792,194,71.819468,52.775594,6.557635,9.435446,28.180532,3.050794,CT 432.16,Riverside
1,6065043218,5415,3918,3561,74,211,1497,72,72.354571,65.761773,1.366574,3.896584,27.645429,1.32964,CT 432.18,Riverside


In [4]:
# read in data with geometry
gmt = gpd.read_file(r'G:\My Drive\Data\Race Eth ACS 2017\Geo_Files\race_ethn_w_geo.json')

# create geo dataframe 
gdf = gmt[['GEOID','geometry']]

# make the string a number
gdf['GEOID'] = pd.to_numeric(gdf['GEOID'])
gdf.head(2)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Unnamed: 0,GEOID,geometry
0,6065051200,"MULTIPOLYGON (((-117.14876 33.47290, -117.1493..."
1,6065049600,"POLYGON ((-117.16311 33.52238, -117.16311 33.5..."


In [5]:
# export the geopandas data frame TO GEOJSON
gdf.to_file('G:\My Drive\Data\Race Eth ACS 2017\Geo_Files\CA_congress.geojson', driver='GeoJSON')

In [6]:
# read in the GEOJSON
with open ('G:\My Drive\Data\Race Eth ACS 2017\Geo_Files\CA_congress.geojson') as geodf:
    geo_data  = json.load(geodf)

In [7]:
#geo_data

In [8]:
# produce a list of colums with quotes // get column names
cols = list(data.columns.values)
cols

['GEOID',
 'Total',
 'Estimate Not Latino',
 'Estimate White',
 'Estimate Black',
 'Estimate Asian',
 'Estimate Latino',
 'Estimate Other Race',
 'Not Latino',
 'White',
 'Black',
 'Asian',
 'Latino',
 'Other_Race',
 'Census Tract',
 'County']

In [9]:
# creating a list that represents column names
race =['Latino','White', 'Black', 'Asian','Other_Race']

# returns the largest value over a given dictionary of columns.
data['predominant_race'] = data[race].idxmax(axis=1)    # why does this return a string
data['predominant_race_pct'] = data[race].max(axis=1)  # does this return a percentage. 
data.head(3)

Unnamed: 0,GEOID,Total,Estimate Not Latino,Estimate White,Estimate Black,Estimate Asian,Estimate Latino,Estimate Other Race,Not Latino,White,Black,Asian,Latino,Other_Race,Census Tract,County,predominant_race,predominant_race_pct
0,6065043216,6359,4567,3356,417,600,1792,194,71.819468,52.775594,6.557635,9.435446,28.180532,3.050794,CT 432.16,Riverside,White,52.775594
1,6065043218,5415,3918,3561,74,211,1497,72,72.354571,65.761773,1.366574,3.896584,27.645429,1.32964,CT 432.18,Riverside,White,65.761773
2,6065043220,4480,2739,2144,164,312,1741,119,61.138393,47.857143,3.660714,6.964286,38.861607,2.65625,CT 432.20,Riverside,White,47.857143


## Plotly Visualization

In [10]:
geo_data

{'type': 'FeatureCollection',
 'crs': {'type': 'name',
  'properties': {'name': 'urn:ogc:def:crs:OGC:1.3:CRS84'}},
 'features': [{'type': 'Feature',
   'properties': {'GEOID': 6065051200},
   'geometry': {'type': 'MultiPolygon',
    'coordinates': [[[[-117.14876, 33.472902],
       [-117.149329, 33.472795],
       [-117.148789, 33.472897],
       [-117.14876, 33.472902]]],
     [[[-117.149263, 33.497393],
       [-117.147781, 33.495825],
       [-117.146988, 33.495011],
       [-117.146738, 33.49476],
       [-117.14654, 33.494534],
       [-117.146372, 33.494329],
       [-117.146109, 33.493988],
       [-117.14597, 33.493778],
       [-117.145779, 33.493462],
       [-117.145316, 33.492604],
       [-117.145226, 33.492393],
       [-117.144813, 33.491384],
       [-117.144428, 33.490354],
       [-117.142914, 33.486147],
       [-117.142424, 33.484718],
       [-117.142235, 33.484222],
       [-117.141672, 33.482743],
       [-117.141484, 33.48225],
       [-117.141378, 33.481951],
 

In [11]:
sources=[{"type": "FeatureCollection", 'features': [feat]} for feat in geo_data['features']]

In [12]:
#sources

In [13]:
df = data
# sort by columns
df.sort_values(by=['GEOID'], inplace=True)
df.head(2)

Unnamed: 0,GEOID,Total,Estimate Not Latino,Estimate White,Estimate Black,Estimate Asian,Estimate Latino,Estimate Other Race,Not Latino,White,Black,Asian,Latino,Other_Race,Census Tract,County,predominant_race,predominant_race_pct
0,6065043216,6359,4567,3356,417,600,1792,194,71.819468,52.775594,6.557635,9.435446,28.180532,3.050794,CT 432.16,Riverside,White,52.775594
1,6065043218,5415,3918,3561,74,211,1497,72,72.354571,65.761773,1.366574,3.896584,27.645429,1.32964,CT 432.18,Riverside,White,65.761773


In [14]:
ids = df['GEOID'].tolist()

In [15]:
df = df.set_index('GEOID', drop=True)
df.head(2)

Unnamed: 0_level_0,Total,Estimate Not Latino,Estimate White,Estimate Black,Estimate Asian,Estimate Latino,Estimate Other Race,Not Latino,White,Black,Asian,Latino,Other_Race,Census Tract,County,predominant_race,predominant_race_pct
GEOID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
6065043216,6359,4567,3356,417,600,1792,194,71.819468,52.775594,6.557635,9.435446,28.180532,3.050794,CT 432.16,Riverside,White,52.775594
6065043218,5415,3918,3561,74,211,1497,72,72.354571,65.761773,1.366574,3.896584,27.645429,1.32964,CT 432.18,Riverside,White,65.761773


In [16]:
ids=[geo_data['features'][k]['properties']['GEOID'] for k in range(len(geo_data['features']))]      

In [17]:
type(ids)

list

In [18]:
df.dtypes

Total                     int64
Estimate Not Latino       int64
Estimate White            int64
Estimate Black            int64
Estimate Asian            int64
Estimate Latino           int64
Estimate Other Race       int64
Not Latino              float64
White                   float64
Black                   float64
Asian                   float64
Latino                  float64
Other_Race              float64
Census Tract             object
County                   object
predominant_race         object
predominant_race_pct    float64
dtype: object

In [19]:
# creating 3 dictionaries that will break the data by each groups respective min and max values
rate_list = ['predominant_race_pct', 'Latino', 'White','Black', 'Asian', 'Other_Race'] # original line
mins_list = ['zmin1','zmin2','zmin3','zmin4','zmin5','zmin6']
maxs_list = ['zmax1','zmax2','zmax3','zmax4','zmax5','zmax6']


dct = {}
for i in rate_list:
    dct['%s' % i] = None
    
dct_min = {}
for i in mins_list:
    dct_min['%s' % i] = None

dct_max = {}
for i in maxs_list:
    dct_max['%s' % i] = None

In [20]:
race_list = ['predominant_race_pct', 'Latino', 'White', 'Black', 'Asian', 'Other_Race']

for n in range(0,len(rate_list)):
    dct[rate_list[n]] = [df.loc[GEOID, race_list[n]] for GEOID in ids]
    dct_min[mins_list[n]] = min(dct[rate_list[n]])
    dct_max[maxs_list[n]] = max(dct[rate_list[n]])

In [21]:
def get_color_for_val(val, vmin, vmax, pl_colorscale):
    if vmin >= vmax:
        raise ValueError('vmin should be < vmax')
        
    plotly_scale, plotly_colors = list(map(float, np.array(pl_colorscale)[:,0])), np.array(pl_colorscale)[:,1]  
    colors_01=np.array(list(map(literal_eval,[color[3:] for color in plotly_colors] )))/255.
    
    v= (val - vmin) / float((vmax - vmin)) 

    idx = 0
   
    while(v > plotly_scale[idx+1]): 
        idx+=1
    left_scale_val = plotly_scale[idx]
    right_scale_val = plotly_scale[idx+ 1]
    vv = (v - left_scale_val) / (right_scale_val - left_scale_val)
    
    val_color01 = colors_01[idx]+vv*(colors_01[idx + 1]-colors_01[idx])
    val_color_0255 = list(map(np.uint8, 255*val_color01+0.5))
    return 'rgb'+str(tuple(val_color_0255))

In [22]:
#multicolored
#predominant_race
predominant_colorscale= [[0.0, 'rgb(255, 255, 204)'],
                [0.10, 'rgb(161, 218, 180)'],
                [0.20, 'rgb(65, 182, 196)'], 
                [0.30, 'rgb(44, 127, 184)'],
                [0.40, 'rgb(8, 104, 172)'],
                [1.0, 'rgb(37, 52, 148)']] 

#--------------------------------------------------------------------------------------------------
#--------------------------------------------------------------------------------------------------
#dark green
Latino_colorscale= [[0.0, 'rgb(237,248,233)'],
                [0.10, 'rgb(199,233,192)'],
                [0.20, 'rgb(161,217,155)'], 
                [0.30, 'rgb(116,196,118)'],
                [0.40, 'rgb(49,163,84)'],
                [1.0, 'rgb(0,109,44)']] 


#--------------------------------------------------------------------------------------------------
#--------------------------------------------------------------------------------------------------
#Red
Black_colorscale= [[0.0, 'rgb(254, 229, 217)'],
                  [0.10, 'rgb(252,187,161)'],
                  [0.20, 'rgb(252,146,114)'], 
                  [0.30, 'rgb(251,106,74)'],
                  [0.40, 'rgb(222,45,38)'],
                  [1.0, 'rgb(165,15,21)']]
                             
#--------------------------------------------------------------------------------------------------
#--------------------------------------------------------------------------------------------------
#yellow
White_colorscale=[[0.0, 'rgb(255, 255, 87)'],
                    [0.10, 'rgb(230,230,78)'],
                    [0.20, 'rgb(204,204,69)'], 
                    [0.30, 'rgb(179,179,61)'],
                    [0.40, 'rgb(153,153,52)'],
                    [1.0, 'rgb(128,128,43)']] 

#--------------------------------------------------------------------------------------------------
#--------------------------------------------------------------------------------------------------
#blue
Asian_colorscale= [[0.0, 'rgb(153, 255, 204)'],
                [0.10, 'rgb(102, 255, 178)'],
                [0.20, 'rgb(51, 255, 153)'], 
                [0.30, 'rgb(0, 255, 128)'],
                [0.40, 'rgb(0, 204, 102)'],
                [1.0, 'rgb(0, 153, 76)']] 

#--------------------------------------------------------------------------------------------------
#--------------------------------------------------------------------------------------------------

Other_colorscale= [[0.0, 'rgb(153, 255, 255)'],
                [0.10, 'rgb(102, 255, 255)'],
                [0.20, 'rgb(51, 255, 255)'], 
                [0.30, 'rgb(0, 255, 255)'],
                [0.40, 'rgb(0, 204, 204)'],
                [1.0, 'rgb(0, 153, 153)']] 


In [23]:
facecolor_list = ['predominant_facecolor', 'facecolor_Latino', 'facecolor_White', 'facecolor_Black', 'facecolor_Asian',
                  'facecolor_Other_Race']
    
scale_list = [predominant_colorscale, Latino_colorscale, White_colorscale, Black_colorscale, Asian_colorscale,
                  Other_colorscale]

dct_facecolor = {}
for i in facecolor_list:
    dct_facecolor['%s' % i] = None


In [24]:
for n in range(0,len(facecolor_list)):
    dct_facecolor[facecolor_list[n]] = [get_color_for_val(r, dct_min[mins_list[n]], dct_max[maxs_list[n]], 
                                        scale_list[n]) for r in dct[rate_list[n]]] 

In [25]:
df.head(2)

Unnamed: 0_level_0,Total,Estimate Not Latino,Estimate White,Estimate Black,Estimate Asian,Estimate Latino,Estimate Other Race,Not Latino,White,Black,Asian,Latino,Other_Race,Census Tract,County,predominant_race,predominant_race_pct
GEOID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
6065043216,6359,4567,3356,417,600,1792,194,71.819468,52.775594,6.557635,9.435446,28.180532,3.050794,CT 432.16,Riverside,White,52.775594
6065043218,5415,3918,3561,74,211,1497,72,72.354571,65.761773,1.366574,3.896584,27.645429,1.32964,CT 432.18,Riverside,White,65.761773


In [26]:
# counties = census_tract
census_tract = data['Census Tract']

In [27]:
# note, looking back to the pre-indexed dataframe// "data" is my pre-indexed dataframe // Creating a list
predominant_race_c = data['predominant_race']
predominant_race_c

0      White
1      White
2      White
3      White
4      White
       ...  
154    White
155    White
156    White
157    White
158    White
Name: predominant_race, Length: 159, dtype: object

In [28]:
# race_list = ['predominant_race_pct', 'Latino', 'White', 'Black', 'Asian', 'Other_Race']
text_pr=    [c+'<br>Predominant Race: '+ w + ' with ' + '{:0.2f}'.format(r)+'%' for c, r, w in zip(census_tract, dct[rate_list[0]], predominant_race_c)]
text_latino=[c+'<br>Latino: ' + '{:0.2f}'.format(r)+'%' for c, r in zip(census_tract , dct[rate_list[1]])]
text_white= [c+'<br>White:  ' + '{:0.2f}'.format(r)+'%' for c, r in zip(census_tract , dct[rate_list[2]])]
text_black= [c+'<br>Black: ' + '{:0.2f}'.format(r)+'%' for c, r in zip(census_tract , dct[rate_list[3]])]
text_asian= [c+'<br>Asian: ' + '{:0.2f}'.format(r)+'%' for c, r in zip(census_tract , dct[rate_list[4]])]
text_other= [c+'<br>Other Race: ' + '{:0.2f}'.format(r)+'%' for c, r in zip(census_tract, dct[rate_list[5]])]


In [29]:
text_list = [text_pr, text_latino,text_white,text_black,text_asian,text_other]

In [None]:
import plotly.graph_objects as go
import plotly
from plotly.offline import iplot, init_notebook_mode
import plotly.offline as off
import plotly.express as px


data = []

mapbox_access_token = 'pk.eyJ1Ijoic2V2ZXJpbm4iLCJhIjoiY2s5NTZ5czJpMGtwaTNrcDNvOHB1ODF4aCJ9.cXb2gWNCGOzUBAnWtAjfUA'

lons=[]
lats=[]

for k in range(len(geo_data['features'])):
    county_coords=np.array(geo_data['features'][k]['geometry']['coordinates'][0])
    m, M =county_coords[:,0].min(), county_coords[:,0].max()
    lons.append(0.5*(m+M))
    m, M =county_coords[:,1].min(), county_coords[:,1].max()
    lats.append(0.5*(m+M))
    
data = [dict(type='scattermapbox',
             lat=lats, 
             lon=lons,
             mode='markers',
             text=text_pr,
             marker=dict(size=1, color='white'),
             showlegend=False,
             hoverinfo='text'
            )] 

for a in range(1,len(text_list)):
    data.append(dict(type='scattermapbox',
                 lat=lats, 
                 lon=lons,
                 mode='markers',
                 text=text_list[a],
                 marker=dict(size=1, color='white'),
                 showlegend=False,
                 hoverinfo='text'
                ))
    
fig = go.Figure(go.Scattermapbox(
        lat=['33.0738775'],
        lon=['-116.6427041'],
        mode='markers', 
    ))

layers_list = ['layers1','layers2','layers3','layers4','layers5','layers6']

dct_layers = {}
for i in layers_list:
    dct_layers['%s' % i] = None
    
for l in range(0,len(layers_list)):
    dct_layers[layers_list[l]]=[dict(sourcetype = 'geojson',
             source =sources[k],
             #below="water", 
             type = 'fill',   
             color = dct_facecolor[facecolor_list[l]][k],
             opacity=0.5
            ) for k in range(len(sources))]



lay_list = [dct_layers[layers_list[0]],dct_layers[layers_list[1]],dct_layers[layers_list[2]],dct_layers[layers_list[3]],dct_layers[layers_list[4]],dct_layers[layers_list[5]]]

layout = dict(title='CA 50th Congressional District by Race/Ethnicity.',
              font=dict(family='Arial Black'),
              autosize=False,
              width=800,
              height=500,
              hovermode='closest',
              mapbox=dict(accesstoken=mapbox_access_token,
                          layers=dct_layers[layers_list[0]],
                          bearing=0,
                          center=dict(
                          lat=33.0738775, 
                          lon=-116.6427041),
                          pitch=0,
                          zoom=7
                    ) 
              )


updatemenus = list([dict(buttons=list()), 
                    dict(direction='down',
                         showactive=True)])

# race_list = ['predominant_race_pct', 'Latino', 'White', 'Black', 'Asian', 'Other_Race']
race_list = ['Predominant Race','Latino','White','Black','Asian','Other Race']
vis_list = [[True,False,False,False,False,False],[False,True,False,False,False,False],[False,False,True,False,False,False],
           [False,False,False,True,False,False],[False,False,False,False,True,False],[False,False,False,False,False,True]]




for s in range(0,len(race_list)):
    updatemenus[0]['buttons'].append(dict(args=[{'visible': vis_list[s]},
                                               {'mapbox.layers': lay_list[s]}],
                                          label=race_list[s],
                                          method='update'))

layout['updatemenus'] = updatemenus

fig = dict(data=data, layout=layout)

off.iplot({'data': data,'layout': layout}, validate=False)