In [2]:
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt

In [3]:
DRIVE_PATH = "/Users/jerrylin/Google Drive/My Drive/CS+/Data/"
def full_path(filename):
    return DRIVE_PATH + filename

# Merge Nested VTDs

There are 2692 VTDs (note that this is different from precincts, of which there are 2704). However, apparently two of these are nested, so they have been combined into 2690 VTDs. (See the Herschlag et al. GitLab README, it expands on this)

In [4]:
### Census + election data from MGGG Github
# Link: https://github.com/mggg-states/NC-shapefiles
### GEOID10 from Census files and VTD column from this match up
df = gpd.read_file("/Users/jerrylin/Google Drive/My Drive/CS+/Data/raw_mggg_NC_VTD")

In [5]:
#Read in the official Census shapefile, which is CRITICAL for getting the right VTD identifiers
#This ordered index corresponds to the VTD numbering used in the districting ensemble txts. 
#I.e., first VTD in this shapefile is VTD 1 in the districting ensemble.
shp = gpd.read_file("/Users/jerrylin/Google Drive/My Drive/CS+/Data/tl_2012_37_vtd10")

In [6]:
df.rename(columns={"VTD":"GEOID10"}, inplace=True)

## Change the Geometry to Be Correct

In [7]:
#Drop bad geometry from df
df.drop(columns=['geometry'], inplace=True)

In [8]:
#Maintains the order of that valuable index, but extracts only two columns
geoid = shp.loc[:, ["GEOID10", 'geometry']]

In [9]:
#Merges the order of that valuable index with the full demographic data
df2 = geoid.merge(df, on="GEOID10")
#Increase the index by 1 to match the districting ensemble — now index 1 matches with VTD number 1 in the ensembles
df2.index += 1

## Unnest

In [11]:
#Create merged objects for the nested VTDs
to_insert = df2.loc[[402, 316]].groupby('County').agg(lambda x: x.sum()).reset_index().iloc[0]
to_insert["geometry"] = df2.loc[[402, 316]].unary_union
to_insert2 = df2.loc[[1677, 1658]].groupby('County').agg(lambda x: x.sum()).reset_index().iloc[0]
to_insert2["geometry"] = df2.loc[[1677, 1658]].unary_union

  results[key] = self.aggregate(func)


In [12]:
#Drop and replace the nested districts with merged entries
df2.drop(316, inplace=True)
df2.drop(1658, inplace=True)
df2.loc[402] = to_insert
df2.loc[1677] = to_insert2

In [13]:
df2.reset_index(inplace=True)
df2.rename(columns={'index':'VTD_num'}, inplace=True)
df2

Unnamed: 0,VTD_num,GEOID10,geometry,ALAND10,AWATER10,County,VTD_Key,VTD_Name,PL10AA_TOT,PL10VA_TOT,...,WVAP,BVAP,AMINVAP,ASIANVAP,NHPIVAP,OTHERVAP,2MOREVAP,HDIST,SEND,CD
0,1,37149WO06,"POLYGON ((-82.27895 35.30135, -82.27878 35.301...",112372241.0,565150.0,37149,37149WO06,WO06,2504,1964,...,1733,77,4,6,0,0,16,113,47,11
1,2,37149SA04,"POLYGON ((-82.35609 35.24454, -82.35517 35.244...",85577055.0,7670.0,37149,37149SA04,SA04,1972,1616,...,1561,19,2,3,0,2,7,113,47,11
2,3,37149TR123,"POLYGON ((-82.31243 35.22149, -82.31239 35.221...",45106392.0,20740.0,37149,37149TR123,TR123,3747,3217,...,2841,258,1,9,0,2,15,113,47,11
3,4,37149CL07,"POLYGON ((-82.24123 35.26694, -82.23969 35.267...",60133780.0,33643.0,37149,37149CL07,CL07,3294,2562,...,2373,26,2,6,0,2,23,113,47,11
4,5,37149GC09,"POLYGON ((-82.13863 35.19342, -82.13834 35.193...",133671985.0,47824.0,37149,37149GC09,GC09,3607,2814,...,2407,250,9,9,0,0,18,113,47,11
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2685,2688,37051G5,"POLYGON ((-79.08169 35.03270, -79.08069 35.032...",30284464.0,329736.0,37051,37051G5,G5,20447,14830,...,4920,7675,130,410,60,22,381,44,21,8
2686,2689,37173ALARKA,"POLYGON ((-83.48201 35.37736, -83.48195 35.377...",68373817.0,0.0,37173,37173ALARKA,ALARKA,962,760,...,726,0,14,0,2,0,16,119,50,11
2687,2690,37173BC2,"POLYGON ((-83.52304 35.45413, -83.52303 35.454...",173532693.0,1180244.0,37173,37173BC2,BC2,3109,2433,...,2194,17,112,22,0,0,32,119,50,11
2688,2691,37173BC1,"POLYGON ((-83.57351 35.42605, -83.57350 35.426...",167197388.0,6042784.0,37173,37173BC1,BC1,2918,2389,...,2169,5,116,8,0,0,40,119,50,11


### Rename Columns & Add Neighbors

In [15]:
df2.rename(columns={'VTD_Key': 'loc_prec'}, inplace=True)
# df2.rename(columns={"County":"COUNTY_FIPS", 'VTD_Key':'loc_prec', 'PL10AA_TOT':'total_pop',
#                       'PL10VA_TOT':'total_18+', 
#                       'HVAP':'hispanic', 'WVAP':'white', 'BVAP':'african_am', 'AMINVAP':'am_indian_',
#                       'ASIANVAP':'asian', 'NHPIVAP':'hawaii/pac', 'OTHERVAP':'other_race', 
#                        '2MOREVAP':'2+races'}, inplace=True)
#Removed the below:
#EL16G_PR_D':'total_dem', 'EL16G_PR_R':'total_rep','EL16G_PR_L':'total_lib', 
#'EL16G_PR_W':'writein', 'EL16G_PR_T':'total_vote'


'df2.rename(columns={"County":"COUNTY_FIPS", \'VTD_Key\':\'loc_prec\', \'PL10AA_TOT\':\'total_pop\',\n                      \'PL10VA_TOT\':\'total_18+\', \n                      \'HVAP\':\'hispanic\', \'WVAP\':\'white\', \'BVAP\':\'african_am\', \'AMINVAP\':\'am_indian_\',\n                      \'ASIANVAP\':\'asian\', \'NHPIVAP\':\'hawaii/pac\', \'OTHERVAP\':\'other_race\', \n                       \'2MOREVAP\':\'2+races\'}, inplace=True)'

In [16]:
#Takes about 1 minute
for index, row in df2.iterrows():  
    neighbors = df2[df2.geometry.touches(row['geometry'])].loc_prec.tolist() 
    df2.at[index, "my_neighbors"] = ", ".join(neighbors)

### Read in Mattingly's Congressional Election Data

In [15]:
hor12 = pd.read_csv(full_path("code_data_NC_NCAbs_USHOUSEOFREPRESENTATIVES_12.txt"), sep='\t', header=None)
hor16 = pd.read_csv(full_path("code_data_NC_NCAbs_USHOUSEOFREPRESENTATIVES_16.txt"), sep='\t', header=None)
hor12.rename(columns={0:"VTD_num", 1:"dummy", 2:'hor12_dem', 3:'hor12_rep', 4:'hor12_other'}, inplace=True)
hor16.rename(columns={0:"VTD_num", 1:"dummy", 2:'hor16_dem', 3:'hor16_rep', 4:'hor16_other'}, inplace=True)
hor12.drop(columns=['dummy'], inplace=True)
hor16.drop(columns=['dummy'], inplace=True)
df2 = df2.merge(hor12, on="VTD_num", how="left")
df2 = df2.merge(hor16, on="VTD_num", how="left")

### Save to File

In [20]:
df2.to_file(full_path('NCabs_VTD'))

  """Entry point for launching an IPython kernel.


And voila! You're done. You have a dataset of all 2690 VTDs (after accounting for nested VTDs), with a wide array of election data at your disposal. The ensembles in Mattingly's GitLab are structured as VTD-district pairs; simply merge a districting text file with df2 on the "VTD_num" column! 