In [None]:
#Install cenpy
!pip install cenpy

In [None]:
#Setup
import altair as alt
#import cenpy as cen
import pandas as pd
import geopandas as gpd
import numpy as np

from google.colab import files

In [None]:
#Load data for race in each county
all = pd.read_csv("county.csv")
df = all[['GEO_ID','NAME','P1_001N','P1_006N']]

#Remove first row, which has alternative column names
df = df.iloc[1: , :]

#Convert 001N(Total) & 006(Asian) to float value for calculating percentages later
df['P1_001N'] = df['P1_001N'].astype(float)
df['P1_006N'] = df['P1_006N'].astype(float)

In [None]:
#Create new variables in county
#Make percent variable
df['PCT'] = (df['P1_006N']/df['P1_001N'])*100

#Separate NAME variable (formatted as COUNTY, NAME) into county and state variables
df['COUNTY'] = df['NAME'].str.split(',').apply(lambda x: x[0])

#Make state variable
df['STATE'] = df['NAME'].str.split(',').apply(lambda x: x[1])

#Strip extra spaces
df['STATE']=df['STATE'].str.strip()

In [None]:
#Load data for race in each state
all_state = pd.read_csv("state.csv")
state = all_state[['GEO_ID','NAME','P1_001N','P1_006N']]

#Remove first row, which has alternative column names
state = state.iloc[1: , :]

#Duplicate NAME variable as state to avoid overlap with the County dataframe
state['STATE'] = state['NAME']

#Strip extra spaces
state['STATE'] = state['STATE'].str.strip()

#Convert variable types
state['P1_001N'] = state['P1_001N'].astype(float)
state['P1_006N'] = state['P1_006N'].astype(float)

#Make percent variable and convert type
state['PCT_STATE'] = (state['P1_006N']/state['P1_001N'])*100
#state['PCT_STATE'] = state['PCT_STATE'].astype(int)

#Creating dataset with only the state name and percent, which will later be merged into the County dataframe so we can compare a county's Asian population with the state's
state_pct = state[['STATE','PCT_STATE']]
state_pct

Unnamed: 0,STATE,PCT_STATE
1,Alabama,1.525791
2,Alaska,6.003892
3,Arizona,3.599663
4,Arkansas,1.721354
5,California,15.392566
6,Colorado,3.460978
7,Connecticut,4.78252
8,Delaware,4.313257
9,District of Columbia,4.864802
10,Florida,2.988562


In [None]:
#Test matches for merge
df['STATE'].isin(state_pct['STATE']).value_counts()

True    3221
Name: STATE, dtype: int64

In [None]:
#Merge in state averages
df_merge = pd.merge(df, state_pct, how="outer", on='STATE')
df_merge

Unnamed: 0,GEO_ID,NAME,P1_001N,P1_006N,PCT,COUNTY,STATE,PCT_STATE
0,0500000US01001,"Autauga County, Alabama",58805.0,881.0,1.498172,Autauga County,Alabama,1.525791
1,0500000US01003,"Baldwin County, Alabama",231767.0,2067.0,0.891844,Baldwin County,Alabama,1.525791
2,0500000US01005,"Barbour County, Alabama",25223.0,117.0,0.463862,Barbour County,Alabama,1.525791
3,0500000US01007,"Bibb County, Alabama",22293.0,32.0,0.143543,Bibb County,Alabama,1.525791
4,0500000US01009,"Blount County, Alabama",59134.0,178.0,0.301011,Blount County,Alabama,1.525791
...,...,...,...,...,...,...,...,...
3216,0500000US72145,"Vega Baja Municipio, Puerto Rico",54414.0,28.0,0.051457,Vega Baja Municipio,Puerto Rico,0.121764
3217,0500000US72147,"Vieques Municipio, Puerto Rico",8249.0,7.0,0.084859,Vieques Municipio,Puerto Rico,0.121764
3218,0500000US72149,"Villalba Municipio, Puerto Rico",22093.0,10.0,0.045263,Villalba Municipio,Puerto Rico,0.121764
3219,0500000US72151,"Yabucoa Municipio, Puerto Rico",30426.0,8.0,0.026293,Yabucoa Municipio,Puerto Rico,0.121764


In [None]:
#Creating dataset of counties that have a higher Asian population than national average (~7.2%)
aboveavg_natl = df_merge[df_merge['PCT']>7.2]
aboveavg_natl

Unnamed: 0,GEO_ID,NAME,P1_001N,P1_006N,PCT,COUNTY,STATE,PCT_STATE
67,0500000US02013,"Aleutians East Borough, Alaska",3420.0,771.0,22.543860,Aleutians East Borough,Alaska,6.003892
68,0500000US02016,"Aleutians West Census Area, Alaska",5232.0,1513.0,28.918196,Aleutians West Census Area,Alaska,6.003892
69,0500000US02020,"Anchorage Municipality, Alaska",291247.0,27646.0,9.492287,Anchorage Municipality,Alaska,6.003892
72,0500000US02063,"Chugach Census Area, Alaska",7102.0,529.0,7.448606,Chugach Census Area,Alaska,6.003892
81,0500000US02130,"Ketchikan Gateway Borough, Alaska",13948.0,1129.0,8.094350,Ketchikan Gateway Borough,Alaska,6.003892
...,...,...,...,...,...,...,...,...
2936,0500000US51685,"Manassas Park city, Virginia",17219.0,1835.0,10.656833,Manassas Park city,Virginia,7.130205
2950,0500000US51810,"Virginia Beach city, Virginia",459470.0,34305.0,7.466211,Virginia Beach city,Virginia,7.130205
2970,0500000US53033,"King County, Washington",2269675.0,452475.0,19.935674,King County,Washington,9.481757
2984,0500000US53061,"Snohomish County, Washington",827957.0,101464.0,12.254743,Snohomish County,Washington,9.481757


In [None]:
#Calculating the difference between each county's % of Asian population and the national %
aboveavg_natl['PCT_DIFF'] = aboveavg_natl['PCT'] - 7.2
aboveavg_natl.sort_values('PCT_DIFF', ascending=False).head(25)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Unnamed: 0,GEO_ID,NAME,P1_001N,P1_006N,PCT,COUNTY,STATE,PCT_STATE,PCT_DIFF
548,0500000US15003,"Honolulu County, Hawaii",1016508.0,436853.0,42.975855,Honolulu County,Hawaii,37.237188,35.775855
229,0500000US06085,"Santa Clara County, California",1936259.0,759030.0,39.200851,Santa Clara County,California,15.392566,32.000851
224,0500000US06075,"San Francisco County, California",873965.0,296505.0,33.926416,San Francisco County,California,15.392566,26.726416
187,0500000US06001,"Alameda County, California",1682353.0,545261.0,32.410618,Alameda County,California,15.392566,25.210618
227,0500000US06081,"San Mateo County, California",764442.0,230242.0,30.118963,San Mateo County,California,15.392566,22.918963
68,0500000US02016,"Aleutians West Census Area, Alaska",5232.0,1513.0,28.918196,Aleutians West Census Area,Alaska,6.003892,21.718196
550,0500000US15007,"Kauai County, Hawaii",73298.0,21102.0,28.789326,Kauai County,Hawaii,37.237188,21.589326
1869,0500000US36081,"Queens County, New York",2405464.0,660631.0,27.463766,Queens County,New York,9.569344,20.263766
551,0500000US15009,"Maui County, Hawaii",164754.0,44328.0,26.905568,Maui County,Hawaii,37.237188,19.705568
1786,0500000US34023,"Middlesex County, New Jersey",863162.0,228813.0,26.508697,Middlesex County,New Jersey,10.228126,19.308697


In [None]:
#Creating dataset of counties that have a higher Asian population than their state average
aboveavg_state = df_merge.query('PCT > PCT_STATE')
aboveavg_state

Unnamed: 0,GEO_ID,NAME,P1_001N,P1_006N,PCT,COUNTY,STATE,PCT_STATE
15,0500000US01031,"Coffee County, Alabama",53465.0,909.0,1.700178,Coffee County,Alabama,1.525791
36,0500000US01073,"Jefferson County, Alabama",674721.0,13109.0,1.942877,Jefferson County,Alabama,1.525791
40,0500000US01081,"Lee County, Alabama",174241.0,8572.0,4.919623,Lee County,Alabama,1.525791
41,0500000US01083,"Limestone County, Alabama",103570.0,1869.0,1.804577,Limestone County,Alabama,1.525791
44,0500000US01089,"Madison County, Alabama",388153.0,10292.0,2.651532,Madison County,Alabama,1.525791
...,...,...,...,...,...,...,...,...
3190,0500000US72093,"Maricao Municipio, Puerto Rico",4755.0,9.0,0.189274,Maricao Municipio,Puerto Rico,0.121764
3192,0500000US72097,"Mayagüez Municipio, Puerto Rico",73077.0,102.0,0.139579,Mayagüez Municipio,Puerto Rico,0.121764
3207,0500000US72127,"San Juan Municipio, Puerto Rico",342259.0,1060.0,0.309707,San Juan Municipio,Puerto Rico,0.121764
3212,0500000US72137,"Toa Baja Municipio, Puerto Rico",75293.0,99.0,0.131486,Toa Baja Municipio,Puerto Rico,0.121764


In [None]:
#Calculating the difference between each county's % of Asian population and the state's
aboveavg_state['PCT_DIFF'] = aboveavg_state['PCT'] - aboveavg_state['PCT_STATE']
aboveavg_state.sort_values('PCT_DIFF', ascending=False).head(25)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Unnamed: 0,GEO_ID,NAME,P1_001N,P1_006N,PCT,COUNTY,STATE,PCT_STATE,PCT_DIFF
229,0500000US06085,"Santa Clara County, California",1936259.0,759030.0,39.200851,Santa Clara County,California,15.392566,23.808285
68,0500000US02016,"Aleutians West Census Area, Alaska",5232.0,1513.0,28.918196,Aleutians West Census Area,Alaska,6.003892,22.914304
224,0500000US06075,"San Francisco County, California",873965.0,296505.0,33.926416,San Francisco County,California,15.392566,18.53385
1869,0500000US36081,"Queens County, New York",2405464.0,660631.0,27.463766,Queens County,New York,9.569344,17.894422
187,0500000US06001,"Alameda County, California",1682353.0,545261.0,32.410618,Alameda County,California,15.392566,17.018052
2602,0500000US48157,"Fort Bend County, Texas",822779.0,182537.0,22.185423,Fort Bend County,Texas,5.439878,16.745545
67,0500000US02013,"Aleutians East Borough, Alaska",3420.0,771.0,22.54386,Aleutians East Borough,Alaska,6.003892,16.539968
1786,0500000US34023,"Middlesex County, New Jersey",863162.0,228813.0,26.508697,Middlesex County,New Jersey,10.228126,16.280571
82,0500000US02150,"Kodiak Island Borough, Alaska",13101.0,2803.0,21.395313,Kodiak Island Borough,Alaska,6.003892,15.391422
227,0500000US06081,"San Mateo County, California",764442.0,230242.0,30.118963,San Mateo County,California,15.392566,14.726397


In [None]:
#Exporting files
#df_merge.to_csv('df_merge.csv') 
#files.download('df_merge.csv')

In [None]:
#aboveavg_natl.to_csv('aboveavg_natl.csv') 
#files.download('aboveavg_natl.csv')

In [None]:
#aboveavg_state.to_csv('aboveavg_state.csv') 
#files.download('aboveavg_state.csv')

Comparing between 2010 & 2020

In [None]:
#Load 2010 data
county_10_raw = pd.read_csv("county_2010.csv")
county_10_raw['Label']=county_10_raw['Label'].str.strip()
#county_10_raw.Label.unique()
county_10_raw = county_10_raw[(county_10_raw['Label'].str.contains('Total:')) | (county_10_raw['Label'].str.contains('Asian alone'))]
county_10_raw

Unnamed: 0,Label,"Autauga County, Alabama","Baldwin County, Alabama","Barbour County, Alabama","Bibb County, Alabama","Blount County, Alabama","Bullock County, Alabama","Butler County, Alabama","Calhoun County, Alabama","Chambers County, Alabama","Cherokee County, Alabama","Chilton County, Alabama","Choctaw County, Alabama","Clarke County, Alabama","Clay County, Alabama","Cleburne County, Alabama","Coffee County, Alabama","Colbert County, Alabama","Conecuh County, Alabama","Coosa County, Alabama","Covington County, Alabama","Crenshaw County, Alabama","Cullman County, Alabama","Dale County, Alabama","Dallas County, Alabama","DeKalb County, Alabama","Elmore County, Alabama","Escambia County, Alabama","Etowah County, Alabama","Fayette County, Alabama","Franklin County, Alabama","Geneva County, Alabama","Greene County, Alabama","Hale County, Alabama","Henry County, Alabama","Houston County, Alabama","Jackson County, Alabama","Jefferson County, Alabama","Lamar County, Alabama","Lauderdale County, Alabama",...,"Juana Díaz Municipio, Puerto Rico","Juncos Municipio, Puerto Rico","Lajas Municipio, Puerto Rico","Lares Municipio, Puerto Rico","Las Marías Municipio, Puerto Rico","Las Piedras Municipio, Puerto Rico","Loíza Municipio, Puerto Rico","Luquillo Municipio, Puerto Rico","Manatí Municipio, Puerto Rico","Maricao Municipio, Puerto Rico","Maunabo Municipio, Puerto Rico","Mayagüez Municipio, Puerto Rico","Moca Municipio, Puerto Rico","Morovis Municipio, Puerto Rico","Naguabo Municipio, Puerto Rico","Naranjito Municipio, Puerto Rico","Orocovis Municipio, Puerto Rico","Patillas Municipio, Puerto Rico","Peñuelas Municipio, Puerto Rico","Ponce Municipio, Puerto Rico","Quebradillas Municipio, Puerto Rico","Rincón Municipio, Puerto Rico","Río Grande Municipio, Puerto Rico","Sabana Grande Municipio, Puerto Rico","Salinas Municipio, Puerto Rico","San Germán Municipio, Puerto Rico","San Juan Municipio, Puerto Rico","San Lorenzo Municipio, Puerto Rico","San Sebastián Municipio, Puerto Rico","Santa Isabel Municipio, Puerto Rico","Toa Alta Municipio, Puerto Rico","Toa Baja Municipio, Puerto Rico","Trujillo Alto Municipio, Puerto Rico","Utuado Municipio, Puerto Rico","Vega Alta Municipio, Puerto Rico","Vega Baja Municipio, Puerto Rico","Vieques Municipio, Puerto Rico","Villalba Municipio, Puerto Rico","Yabucoa Municipio, Puerto Rico","Yauco Municipio, Puerto Rico"
0,Total:,54571,182265,27457,22915,57322,10914,20947,118572,34215,25989,43643,13859,25833,13932,14972,49948,54428,13228,11539,37765,13906,80406,50251,43820,71109,79303,38319,104430,17241,31704,26790,9045,15760,17302,101547,53227,658466,14564,92709,...,50747,40290,25753,30753,9881,38675,30060,20068,44113,6276,12225,89080,40109,32610,26720,30402,23423,19277,24282,166327,25919,15200,54304,25265,31078,35527,395326,41058,42430,23274,74066,89609,74842,33149,39951,59662,9301,26073,37941,42043
5,Asian alone,474,1348,107,22,117,20,177,845,168,54,130,12,77,24,23,644,229,17,16,155,189,343,534,149,203,518,92,672,37,63,67,15,35,54,820,185,9158,4,685,...,44,29,23,22,1,40,36,36,42,3,18,189,32,26,50,23,18,14,42,296,37,27,129,8,34,41,1750,35,29,14,94,228,167,24,69,74,6,11,57,35


In [None]:
#Reshaping data so that county name is in its own column
county_10 = pd.melt(county_10_raw,id_vars=['Label'],var_name='County', value_name='Value')
#Pivoting to separate Asian and Total variables into their own columns
county_10 = county_10.pivot(index='County', columns='Label', values='Value').reset_index()
county_10.columns = county_10.columns.str.replace(' ','_')
county_10.columns = county_10.columns.str.replace(':','')
county_10

Label,County,Asian_alone,Total
0,"Abbeville County, South Carolina",75,25417
1,"Acadia Parish, Louisiana",137,61773
2,"Accomack County, Virginia",183,33164
3,"Ada County, Idaho",9407,392365
4,"Adair County, Iowa",22,7682
...,...,...,...
3216,"Yuma County, Arizona",2324,195751
3217,"Yuma County, Colorado",23,10043
3218,"Zapata County, Texas",32,14018
3219,"Zavala County, Texas",4,11677


In [None]:
#Testing for matches between the 2010 data and the 2020 data (for the 2020 data, using the dataframe that includes the state figures too)
county_10['County'].isin(df_merge['NAME']).value_counts()

True     3214
False       7
Name: County, dtype: int64

In [None]:
#Printing the mismatched terms
mismatch_10_terms = county_10[~county_10['County'].isin(df_merge['NAME'])]
mismatch_20_terms = df_merge[~df_merge['NAME'].isin(county_10['County'])]
print ( mismatch_10_terms.County )
print ( mismatch_20_terms.NAME )

179                          Bedford city, Virginia
824     Dona Ana County, New Mexico Doña Ana County
1582                     La Salle Parish, Louisiana
2293                 Petersburg Census Area, Alaska
2646                   Shannon County, South Dakota
2947             Valdez-Cordova Census Area, Alaska
2982               Wade Hampton Census Area, Alaska
Name: County, dtype: object
72             Chugach Census Area, Alaska
73        Copper River Census Area, Alaska
83            Kusilvak Census Area, Alaska
89              Petersburg Borough, Alaska
1143             LaSalle Parish, Louisiana
1803           Doña Ana County, New Mexico
2413    Oglala Lakota County, South Dakota
Name: NAME, dtype: object


In [None]:
#Addressing the mismatches
#https://www.census.gov/programs-surveys/geography/technical-documentation/county-changes/2010.html
#Looked for terms using this: print ( df_merge[df_merge['NAME'].str.contains("Bedford")] )

#Bedford city became a part of Bedford County in 2013, added 6,222 population
#SO AFTER RENAMING NEED TO ADD TOGETHER THESE TWO ROWS
county_10.County = county_10.County.str.replace('Bedford city, Virginia', 'Bedford County, Virginia')

#tilde over the N was omitted in 2010
county_10.County = county_10.County.str.replace('Dona Ana County, New Mexico Doña Ana County', 'Doña Ana County, New Mexico')

#added space in 2010
county_10.County = county_10.County.str.replace('La Salle Parish, Louisiana', 'LaSalle Parish, Louisiana')

#Wrangell-Petersburg Census Area divided into the Wrangell Borough in 2008 and Petersburg Borough in 2013
#In 2008, it was already counted in Census separately from Petersburg – renaming 2010 file which said "Census Area" instead of "Borough"
county_10.County = county_10.County.str.replace('Petersburg Census Area, Alaska', 'Petersburg Borough, Alaska')

#Oglala Lakota County was renamed in 2015 to Shannon Cty 
county_10.County = county_10.County.str.replace('Oglala Lakota County, South Dakota', 'NEW Shannon County, South Dakota')
county_10.County = county_10.County.str.replace('Shannon County, South Dakota', 'Oglala Lakota County, South Dakota')
county_10.County = county_10.County.str.replace('NEW Shannon County, South Dakota', 'Shannon County, South Dakota')

###Valdez-Cordova Census Area Census Area split into Chugach and Copper River

#Wade Hampton became Kusilvak Census Area in 2015
county_10.County = county_10.County.str.replace('Wade Hampton Census Area, Alaska', 'Kusilvak Census Area, Alaska')

mismatch_10 = df_merge[(df_merge['NAME'].str.contains('Bedford')) | 
                 (df_merge['NAME'].str.contains('Doña Ana')) | 
                 (df_merge['NAME'].str.contains('LaSalle Parish')) | 
                 (df_merge['NAME'].str.contains('Petersburg')) | 
                 (df_merge['NAME'].str.contains('Oglala Lakota ')) | 
                 (df_merge['NAME'].str.contains('Chugach')) | 
                 (df_merge['NAME'].str.contains('Copper River')) |
                 (df_merge['NAME'].str.contains('Kusilvak'))]

In [None]:
#Test mismatches again
county_10['County'].isin(df_merge['NAME']).value_counts()

True     3220
False       1
Name: County, dtype: int64

In [None]:
#Printing mismatches again
mismatch_10_terms = county_10[~county_10['County'].isin(df_merge['NAME'])]
mismatch_20_terms = df_merge[~df_merge['NAME'].isin(county_10['County'])]
print ( mismatch_10_terms.County )
print ( mismatch_20_terms.NAME )

2947    Valdez-Cordova Census Area, Alaska
Name: County, dtype: object
72         Chugach Census Area, Alaska
73    Copper River Census Area, Alaska
Name: NAME, dtype: object


In [None]:
#Removing commas and other non-numric values to convert into float
county_10['Asian_alone'] = county_10.Asian_alone.str.replace(r'\D+', '')
county_10['Total'] = county_10.Total.str.replace(r'\D+', '')

In [None]:
#Converting into float in order to calculate percent
county_10['Asian_alone'] = county_10['Asian_alone'].astype(float)
county_10['Total'] = county_10['Total'].astype(float)

In [None]:
#Calculate percent of Asian population in 2010 in each county
county_10['Pct_10'] = (county_10['Asian_alone']/county_10['Total'])*100
county_10.head(5)

Label,County,Asian_alone,Total,Pct_10
0,"Abbeville County, South Carolina",75.0,25417.0,0.295078
1,"Acadia Parish, Louisiana",137.0,61773.0,0.22178
2,"Accomack County, Virginia",183.0,33164.0,0.551803
3,"Ada County, Idaho",9407.0,392365.0,2.397513
4,"Adair County, Iowa",22.0,7682.0,0.286384


In [None]:
#Merge 2010 and 2020 data together
df_1020 = pd.merge(df_merge, county_10, how="outer", left_on='NAME', right_on='County')
df_1020.rename(columns={'P1_001N': 'Total_20', 
                        'P1_006N': 'Asian_20',
                        'PCT': 'Pct_20',
                        'PCT_STATE': 'PctState_20',
                        'Asian_alone': 'Asian_10',
                        'Total': 'Total_10'
                        }, inplace=True)
df_1020

Unnamed: 0,GEO_ID,NAME,Total_20,Asian_20,Pct_20,COUNTY,STATE,PctState_20,County,Asian_10,Total_10,Pct_10
0,0500000US01001,"Autauga County, Alabama",58805.0,881.0,1.498172,Autauga County,Alabama,1.525791,"Autauga County, Alabama",474.0,54571.0,0.868593
1,0500000US01003,"Baldwin County, Alabama",231767.0,2067.0,0.891844,Baldwin County,Alabama,1.525791,"Baldwin County, Alabama",1348.0,182265.0,0.739582
2,0500000US01005,"Barbour County, Alabama",25223.0,117.0,0.463862,Barbour County,Alabama,1.525791,"Barbour County, Alabama",107.0,27457.0,0.389700
3,0500000US01007,"Bibb County, Alabama",22293.0,32.0,0.143543,Bibb County,Alabama,1.525791,"Bibb County, Alabama",22.0,22915.0,0.096007
4,0500000US01009,"Blount County, Alabama",59134.0,178.0,0.301011,Blount County,Alabama,1.525791,"Blount County, Alabama",117.0,57322.0,0.204110
...,...,...,...,...,...,...,...,...,...,...,...,...
3218,0500000US72147,"Vieques Municipio, Puerto Rico",8249.0,7.0,0.084859,Vieques Municipio,Puerto Rico,0.121764,"Vieques Municipio, Puerto Rico",6.0,9301.0,0.064509
3219,0500000US72149,"Villalba Municipio, Puerto Rico",22093.0,10.0,0.045263,Villalba Municipio,Puerto Rico,0.121764,"Villalba Municipio, Puerto Rico",11.0,26073.0,0.042189
3220,0500000US72151,"Yabucoa Municipio, Puerto Rico",30426.0,8.0,0.026293,Yabucoa Municipio,Puerto Rico,0.121764,"Yabucoa Municipio, Puerto Rico",57.0,37941.0,0.150233
3221,0500000US72153,"Yauco Municipio, Puerto Rico",34172.0,25.0,0.073159,Yauco Municipio,Puerto Rico,0.121764,"Yauco Municipio, Puerto Rico",35.0,42043.0,0.083248


In [None]:
#Calculating the difference in Asian population % from 2010 to 2020
df_1020['PCT_DIFF'] = df_1020['Pct_20'] - df_1020['Pct_10']
df_1020.sort_values('PCT_DIFF', ascending=False).head(25)

Unnamed: 0,GEO_ID,NAME,Total_20,Asian_20,Pct_20,COUNTY,STATE,PctState_20,County,Asian_10,Total_10,Pct_10,PCT_DIFF
239,0500000US06105,"Trinity County, California",16112.0,2222.0,13.790963,Trinity County,California,15.392566,"Trinity County, California",94.0,13786.0,0.681851,13.109112
445,0500000US13117,"Forsyth County, Georgia",251283.0,45203.0,17.988881,Forsyth County,Georgia,4.47192,"Forsyth County, Georgia",10925.0,175511.0,6.224681,11.7642
2364,0500000US46005,"Beadle County, South Dakota",19149.0,2071.0,10.815186,Beadle County,South Dakota,1.519849,"Beadle County, South Dakota",632.0,17398.0,3.632601,7.182585
229,0500000US06085,"Santa Clara County, California",1936259.0,759030.0,39.200851,Santa Clara County,California,15.392566,"Santa Clara County, California",570524.0,1781642.0,32.02237,7.17848
2874,0500000US51107,"Loudoun County, Virginia",420959.0,89779.0,21.327255,Loudoun County,Virginia,7.130205,"Loudoun County, Virginia",46033.0,312311.0,14.739474,6.587781
2566,0500000US48085,"Collin County, Texas",1064465.0,189209.0,17.775033,Collin County,Texas,5.439878,"Collin County, Texas",87752.0,782341.0,11.216592,6.55844
187,0500000US06001,"Alameda County, California",1682353.0,545261.0,32.410618,Alameda County,California,15.392566,"Alameda County, California",394560.0,1510271.0,26.125113,6.285505
1206,0500000US24027,"Howard County, Maryland",332317.0,66288.0,19.947219,Howard County,Maryland,6.814453,"Howard County, Maryland",41221.0,287085.0,14.358465,5.588754
1792,0500000US34035,"Somerset County, New Jersey",345361.0,67309.0,19.489462,Somerset County,New Jersey,10.228126,"Somerset County, New Jersey",45650.0,323444.0,14.113726,5.375736
2971,0500000US53033,"King County, Washington",2269675.0,452475.0,19.935674,King County,Washington,9.481757,"King County, Washington",282075.0,1931249.0,14.605833,5.329841


In [None]:
#Make GEOID column to use for merge because current GEO_ID is formatted as 0500000USXXXXX
df_1020['GEOID'] = df_1020['GEO_ID'].str.strip().str[-5:]
df_1020.head(1)

Unnamed: 0,GEO_ID,NAME,Total_20,Asian_20,Pct_20,COUNTY,STATE,PctState_20,County,Asian_10,Total_10,Pct_10,PCT_DIFF,GEOID
0,0500000US01001,"Autauga County, Alabama",58805.0,881.0,1.498172,Autauga County,Alabama,1.525791,"Autauga County, Alabama",474.0,54571.0,0.868593,0.629579,1001


In [None]:
#df_1020.to_csv('df_1020.csv') 
#files.download('df_1020.csv')

Charting

Downloaded shapefile from COUNTY folder in FTP archive on [this site](https://www.census.gov/geographies/mapping-files/time-series/geo/tiger-line-file.2021.html)

In [None]:
usCty = gpd.read_file('https://www2.census.gov/geo/tiger/TIGER2021/COUNTY/tl_2021_us_county.zip')
#Other shape files I tested:
#https://www2.census.gov/geo/tiger/TIGER2020/COUNTY/tl_2020_us_county.zip
#https://www2.census.gov/geo/tiger/GENZ2018/shp/cb_2018_us_county_500k.zip
#https://www2.census.gov/geo/tiger/TIGER2021/COUNTY/tl_2021_us_county.zip
usCty.head(1)

Unnamed: 0,STATEFP,COUNTYFP,COUNTYNS,GEOID,NAME,NAMELSAD,LSAD,CLASSFP,MTFCC,CSAFP,CBSAFP,METDIVFP,FUNCSTAT,ALAND,AWATER,INTPTLAT,INTPTLON,geometry
0,31,39,835841,31039,Cuming,Cuming County,6,H1,G4020,,,,A,1477645345,10690204,41.9158651,-96.7885168,"POLYGON ((-96.55515 41.91587, -96.55515 41.914..."


In [None]:
#Look at mismatches
usCty['GEOID'].isin(df_1020['GEOID']).value_counts()

True     3219
False      14
Name: GEOID, dtype: int64

In [None]:
#Printing mismatches again – can ignore, because these are all outside of our analysis of the 50 states
us_geoid = usCty[~usCty['NAMELSAD'].isin(df_1020['COUNTY'])]
cty_geoid = df_1020[~df_1020['COUNTY'].isin(usCty['NAMELSAD'])]
print ( us_geoid.NAMELSAD )
print ( cty_geoid.COUNTY )

81                  St. Thomas Island
800                              Guam
891                 Rota Municipality
964                   Manu'a District
967               Tinian Municipality
1248                 St. Croix Island
1459                  St. John Island
1590                 Eastern District
1790                 Western District
1925    Northern Islands Municipality
2990              Saipan Municipality
3103                    Swains Island
3210                      Rose Island
Name: NAMELSAD, dtype: object
3222    NaN
Name: COUNTY, dtype: object


In [None]:
us_asian = usCty.merge(df_1020, on='GEOID')

In [None]:
growth_map = alt.Chart(us_asian).mark_geoshape().encode(
    color=alt.Color('PCT_DIFF:Q', legend=alt.Legend(title='Income')),
    tooltip=[alt.Tooltip('PCT_DIFF:Q',title='Income')]
).transform_filter(
    alt.FieldGTEPredicate(field='PCT_DIFF', gte=0)
).properties(
    width=850,
    height=600
)

In [None]:
growth_map.save('chart.html')
files.download('chart.html')