In [1]:
import numpy as np
import pandas as pd

! git clone https://github.com/gsilver321/project_voting

Cloning into 'project_voting'...
remote: Enumerating objects: 102, done.[K
remote: Counting objects: 100% (19/19), done.[K
remote: Compressing objects: 100% (15/15), done.[K
remote: Total 102 (delta 10), reused 4 (delta 4), pack-reused 83[K
Receiving objects: 100% (102/102), 123.52 MiB | 11.91 MiB/s, done.
Resolving deltas: 100% (39/39), done.
Updating files: 100% (61/61), done.


In [2]:
tsdf = pd.read_csv("project_voting/data/county_data/0002_ts_nominal_county.csv", low_memory=False, encoding="latin-1") # provided time series
tsdf = tsdf.loc[tsdf["STATE"] == "Virginia"] # limit time series to Virginia

tsdf2 = pd.read_csv("project_voting/data/county_data/nhgis0001_ts_nominal_county.csv", low_memory=False, encoding="latin-1") # downloaded time series with additional demographic info
tsdf2 = tsdf2.loc[tsdf2["STATE"] == "Virginia"]

In [3]:
df_avg = pd.DataFrame(tsdf["COUNTY"])

# num of 25+ y.o. college-educated citizens in Virginia counties over time
df_edu = tsdf.loc[:, ["B69AC1970", "B69AC1980", "B69AC1990", "B69AC2000", "B69AC125", "B69AC195"]]
df_edu = df_edu.apply(pd.to_numeric)

# num of 16+ y.o. workers in Virginia counties over time
df_work = tsdf.loc[:, ["B84AA1970", "B84AA1980", "B84AA1990", "B84AA2000", "B84AA125", "B84AA195"]]
df_work = df_work.apply(pd.to_numeric)

# num of foreign-born citizens in Virginia counties over time
df_foreign = tsdf.loc[:, ["AT5AB1970", "AT5AB1980", "AT5AB1990", "AT5AB2000", "AT5AB125", "AT5AB195"]]
df_foreign = df_foreign.apply(pd.to_numeric)

# num of female citizens in Virginia counties over time
df_female = tsdf.loc[:, ["AV1AB1970", "AV1AB1980", "AV1AB1990", "AV1AB2000", "AV1AB125", "AV1AB195"]]
df_female = df_female.apply(pd.to_numeric)

# per capita income in Virginia counties over time
df_income = tsdf.loc[:, ["BD5AA1980", "BD5AA1990", "BD5AA2000", "BD5AA125", "BD5AA195"]]
df_income = df_income.apply(pd.to_numeric)

# num of citizens below poverty line in Virginia counties over time
df_poverty = tsdf.loc[:, ["CL6AA1970", "CL6AA1980", "CL6AA1990", "CL6AA2000", "CL6AA125", "CL6AA195"]]
df_poverty = df_poverty.apply(pd.to_numeric)

# num of citizens in rural areas in Virginia counties over time
df_rural = tsdf2.loc[:, ["A57AD1970", "A57AD1980", "A57AD1990", "A57AD2000", "A57AD2010"]]
df_rural = df_rural.apply(pd.to_numeric)

# num of black or african american citizens in Virginia counties over time
df_black = tsdf2.loc[:, ["B18AB1970", "B18AB1980", "B18AB1990", "B18AB2000", "B18AB2010", "B18AB2020"]]
df_black = df_black.apply(pd.to_numeric)

# num of hispanic or latinx citizens in Virginia counties over time
df_latinx = tsdf2.loc[:, ["A35AA1970", "A35AA1980", "A35AA1990", "A35AA2000", "A35AA2010", "A35AA2020"]]
df_latinx = df_latinx.apply(pd.to_numeric)

# num of aapi citizens in Virginia counties over time
df_aapi = tsdf2.loc[:, ["B18AD1970", "B18AD1980", "B18AD1990", "B18AD2000", "B18AD2010", "B18AD2020"]]
df_aapi = df_aapi.apply(pd.to_numeric)

# num of never married citizens in Virginia counties over time
df_marry_male = tsdf2.loc[:, ["BL1AA1970", "BL1AA1980", "BL1AA1990", "BL1AA2000"]]
df_marry_male = df_marry_male.apply(pd.to_numeric)
df_marry_male = df_marry_male.rename(columns={"BL1AA1970": "1970", "BL1AA1980": "1980", "BL1AA1990": "1990", "BL1AA2000": "2000"})
df_marry_female = tsdf2.loc[:, ["BL1AG1970", "BL1AG1980", "BL1AG1990", "BL1AG2000"]]
df_marry_female = df_marry_female.apply(pd.to_numeric)
df_marry_female = df_marry_female.rename(columns={"BL1AG1970": "1970", "BL1AG1980": "1980", "BL1AG1990": "1990", "BL1AG2000": "2000"})
df_marry = df_marry_male.add(df_marry_female)

# num of 65+ y.o. citizens in Virginia counties over time
df_65 = tsdf2.loc[:, ["B57AP1970", "B57AP1980", "B57AP1990", "B57AP2000", "B57AP2010"]]
df_65 = df_65.apply(pd.to_numeric)
df_65 = df_65.rename(columns={"B57AP1970": "1970", "B57AP1980": "1980", "B57AP1990": "1990", "B57AP2000": "2000", "B57AP2010": "2010"})
df_75 = tsdf2.loc[:, ["B57AQ1970", "B57AQ1980", "B57AQ1990", "B57AQ2000", "B57AQ2010"]]
df_75 = df_75.apply(pd.to_numeric)
df_75 = df_75.rename(columns={"B57AQ1970": "1970", "B57AQ1980": "1980", "B57AQ1990": "1990", "B57AQ2000": "2000", "B57AQ2010": "2010"})
df_85 = tsdf2.loc[:, ["B57AR1970", "B57AR1980", "B57AR1990", "B57AR2000", "B57AR2010"]]
df_85 = df_85.apply(pd.to_numeric)
df_85 = df_85.rename(columns={"B57AR1970": "1970", "B57AR1980": "1980", "B57AR1990": "1990", "B57AR2000": "2000", "B57AR2010": "2010"})
df_over_65 = df_65.add(df_75).add(df_85)

# summarize time series for later model
df_avg["Avg College Degree"] = df_edu.mean(axis=1).values
df_avg["Avg Labor Force"] = df_work.mean(axis=1).values
df_avg["Avg Foreigners"] = df_foreign.mean(axis=1).values
df_avg["Avg Females"] = df_female.mean(axis=1).values
df_avg["Avg Income"] = df_income.mean(axis=1).values
df_avg["Avg Poverty"] = df_poverty.mean(axis=1).values
df_avg["Avg Rural"] = df_rural.mean(axis=1).values
df_avg["Avg Black or African American"] = df_black.mean(axis=1).values
df_avg["Avg Latinx"] = df_latinx.mean(axis=1).values
df_avg["Avg AAPI"] = df_aapi.mean(axis=1).values
df_avg["Avg Never Married"] = df_marry.mean(axis=1).values
df_avg["Avg 65+"] = df_over_65.mean(axis=1).values

df_avg

Unnamed: 0,COUNTY,Avg College Degree,Avg Labor Force,Avg Foreigners,Avg Females,Avg Income,Avg Poverty,Avg Rural,Avg Black or African American,Avg Latinx,Avg AAPI,Avg Never Married,Avg 65+
2845,Accomack County,2776.833333,14774.166667,1195.166667,17024.666667,16231.0,6831.833333,30504.8,10547.500000,1539.000000,955.833333,5891.00,5649.6
2846,Albemarle County,21778.500000,37186.000000,5357.000000,38320.833333,27357.6,6379.666667,38497.0,7542.000000,2882.000000,4139.500000,14343.75,7596.6
2847,Alexandria city,46037.500000,79149.166667,24048.500000,65407.833333,38555.4,10788.666667,0.0,25727.500000,14676.500000,12749.500000,34072.75,10526.6
2848,Alleghany County,1129.500000,6103.833333,134.166667,7089.000000,17668.8,1751.000000,11519.4,466.000000,89.166667,56.000000,1975.75,1940.4
2849,Amelia County,771.166667,4833.833333,94.500000,5173.833333,18413.8,1284.333333,9774.8,3051.500000,156.833333,88.166667,1670.75,1317.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2977,Williamsburg city,2475.333333,5713.333333,693.500000,6382.833333,17896.2,1393.666667,0.0,2201.500000,459.500000,594.166667,4999.75,1263.6
2978,Winchester city,3404.666667,11633.833333,1596.000000,11685.666667,20015.4,3311.000000,0.0,2250.166667,1902.500000,1478.000000,4442.25,3115.8
2979,Wise County,2574.833333,14582.000000,295.333333,20012.833333,13916.2,8528.166667,28271.4,1184.333333,258.166667,206.500000,6671.50,4989.6
2980,Wythe County,2206.000000,12565.333333,148.166667,13704.833333,17017.2,3824.166667,18934.2,826.666667,158.833333,138.166667,3924.50,3837.0


In [5]:
df_avg.to_csv('demographic_info.csv', index=False)