# Merging

In [1]:
import pandas as pd
import geopandas as gpd

In [2]:
ELECTION_FOLDER = 'ga_2020_gen_2020_blocks'
CENSUS_FOLDER = 'ga_ds248_2020_block'
BOUNDARY_FOLDER = 'ga_2020' #2020 Precinct Election Data, only need geometries
ELECTION_COLUMNS = ['GEOID20','PRECINCTID','G20PRERTRU','G20PREDBID']
CENSUS_COLUMNS = ['GEOID20','U7E001','U7E002','U7E005','U7E006']
BOUNDARY_COLUMNS = ['PRECINCTID','geometry'] #PRECINTID called DISTRICT in file
EXPORT_PRECINCTS = 'ga_precinct_2020.csv'
EXPORT_EDGES = 'ga_edges_2020.csv'

In [3]:
election_data = gpd.read_file(ELECTION_FOLDER + '/' + ELECTION_FOLDER + '.shp')
census_data = gpd.read_file(CENSUS_FOLDER + '/' + CENSUS_FOLDER + '.csv')
boundary_data = gpd.read_file(BOUNDARY_FOLDER + '/' + BOUNDARY_FOLDER + '.shp') 

In [4]:
election_data.head()

Unnamed: 0,GEOID20,STATEFP,COUNTYFP,PRECINCTID,VAP_MOD,G20PRERTRU,G20PREDBID,G20PRELJOR,G20USSRPER,G20USSDOSS,G20USSLHAZ,G20PSCRSHA,G20PSCDBRY,G20PSCLMEL,G20PSCRMCD,G20PSCDBLA,G20PSCLWIL,geometry
0,130019501001000,13,1,0011B,12,7.29,0.97,0.06,6.93,1.12,0.13,6.82,0.91,0.2,6.64,0.96,0.18,"POLYGON ((-82.34985 31.92087, -82.34960 31.920..."
1,130019501001001,13,1,0011B,9,5.46,0.73,0.04,5.2,0.84,0.1,5.11,0.68,0.15,4.98,0.72,0.13,"POLYGON ((-82.33439 31.90390, -82.33416 31.904..."
2,130019501001002,13,1,0011B,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"POLYGON ((-82.35306 31.93902, -82.34760 31.938..."
3,130019501001003,13,1,0011B,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"POLYGON ((-82.31536 31.94550, -82.31516 31.945..."
4,130019501001004,13,1,0011B,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"POLYGON ((-82.35489 31.93956, -82.35484 31.939..."


In [5]:
census_data.head()

Unnamed: 0,GISJOIN,YEAR,GEOID,GEOCODE,REGIONA,DIVISIONA,STATE,STATEA,COUNTY,COUNTYA,...,U7E065,U7E066,U7E067,U7E068,U7E069,U7E070,U7E071,U7E072,U7E073,geometry
0,G13000109501001000,2020,1010000US130019501001000,130020000000000.0,3,5,Georgia,13,Appling County,1,...,0,0,0,0,0,0,0,0,0,
1,G13000109501001001,2020,1010000US130019501001001,130020000000000.0,3,5,Georgia,13,Appling County,1,...,0,0,0,0,0,0,0,0,0,
2,G13000109501001002,2020,1010000US130019501001002,130020000000000.0,3,5,Georgia,13,Appling County,1,...,0,0,0,0,0,0,0,0,0,
3,G13000109501001003,2020,1010000US130019501001003,130020000000000.0,3,5,Georgia,13,Appling County,1,...,0,0,0,0,0,0,0,0,0,
4,G13000109501001004,2020,1010000US130019501001004,130020000000000.0,3,5,Georgia,13,Appling County,1,...,0,0,0,0,0,0,0,0,0,


In [6]:
boundary_data.head()

Unnamed: 0,DISTRICT,CTYSOSID,PRECINCT_I,PRECINCT_N,CTYNAME,CTYNUMBER,CTYNUMBER2,FIPS2,G20PRERTRU,G20PREDBID,...,G20PSCRMCD,G20PSCDBLA,G20PSCLWIL,R21USSRPER,R21USSDOSS,R21USSRLOE,R21USSDWAR,R21PSCRMCD,R21PSCDBLA,geometry
0,215122,215122,122,FIRST AFRICAN,MUSCOGEE,106,106,215,238,668,...,251,587,44,230,589,222,599,239,564,"POLYGON ((-84.96984 32.46725, -84.97031 32.467..."
1,215108,215108,108,ST MARK/HEIFERHORN,MUSCOGEE,106,106,215,3243,1676,...,3268,1456,122,3071,1484,3055,1499,3112,1397,"POLYGON ((-84.96552 32.53259, -84.96852 32.532..."
2,57031,57031,31,R T JONES,CHEROKEE,28,28,57,1021,513,...,998,461,60,891,455,879,467,902,434,"POLYGON ((-84.46579 34.25122, -84.46545 34.251..."
3,57033,57033,33,SALACOA,CHEROKEE,28,28,57,454,69,...,419,61,17,419,69,421,68,416,64,"POLYGON ((-84.53036 34.38103, -84.53047 34.380..."
4,1506,1506,6,CENTER,BARTOW,8,8,15,2312,568,...,2230,516,107,2026,507,2019,511,2032,478,"MULTIPOLYGON (((-84.65788 34.14247, -84.65830 ..."


In [7]:
census_data['GEOID20'] = census_data['GEOID'].apply(lambda x: x.split('US')[1]) #Acquiring GEOIDs 
boundary_data = boundary_data.rename(columns = {'DISTRICT': 'PRECINCTID'}) 

In [8]:
election_data = election_data[ELECTION_COLUMNS]
census_data = census_data[CENSUS_COLUMNS]
boundary_data = boundary_data[BOUNDARY_COLUMNS]

In [9]:
precinct_data = election_data.merge(census_data, on = 'GEOID20')
precinct_data[CENSUS_COLUMNS] = precinct_data[CENSUS_COLUMNS].astype('int64') #Listed as Object by default.
precinct_data = precinct_data.drop(columns = 'GEOID20')
precinct_data = precinct_data.groupby(by='PRECINCTID')
precinct_data = precinct_data.sum()
precinct_data = precinct_data.drop('N/A') #Last entry has N/A and no data.
precinct_data = precinct_data.merge(boundary_data, on = 'PRECINCTID')
precinct_data['G20PRERTRU'] = precinct_data['G20PRERTRU'].apply(lambda x: round(x))
precinct_data['G20PREDBID'] = precinct_data['G20PREDBID'].apply(lambda x: round(x))
precinct_data = precinct_data.rename(columns = {'PRECINCTID': 'PRECINCT_ID',
                                                'G20PRERTRU': 'REPUBLICAN',
                                                'G20PREDBID': 'DEMOCRAT',
                                                'U7E001': 'TOTAL_POPULATION', 
                                                'U7E002': 'HISPANIC', 
                                                'U7E005': 'WHITE', 
                                                'U7E006': 'BLACK',
                                                'geometry': 'GEOMETRY'})
precinct_data = gpd.GeoDataFrame(precinct_data, geometry = 'GEOMETRY')
precinct_data.head()

Unnamed: 0,PRECINCT_ID,REPUBLICAN,DEMOCRAT,TOTAL_POPULATION,HISPANIC,WHITE,BLACK,GEOMETRY
0,0011B,870,116,1433,46,1267,72,"POLYGON ((-82.31535 31.94637, -82.31508 31.946..."
1,0011C,594,44,1192,65,991,117,"POLYGON ((-82.43146 31.87040, -82.43131 31.870..."
2,0012,531,908,2639,264,955,1360,"POLYGON ((-82.36296 31.78329, -82.36294 31.783..."
3,0013A1,587,20,865,12,817,17,"POLYGON ((-82.28323 31.90526, -82.28321 31.905..."
4,0013C,843,288,1920,144,1247,457,"POLYGON ((-82.35010 31.77878, -82.34979 31.778..."


# Adjacency

In [10]:
def addEdge(edges, index1, index2):
    if(index1 < index2):
        edges.add((index1, index2))
    else:
        edges.add((index2, index1))

In [11]:
edges = set()
for i in precinct_data.index:
    try:
        indices = precinct_data[precinct_data.touches(precinct_data['GEOMETRY'][i]) == True].index
        for j in indices:
            addEdge(edges, i, j)
    except:
        continue

TopologyException: side location conflict at -84.281015999999994 33.772047000000001
TopologyException: side location conflict at -84.281015999999994 33.772047000000001
TopologyException: side location conflict at -84.281015999999994 33.772047000000001
TopologyException: side location conflict at -84.281015999999994 33.772047000000001
TopologyException: side location conflict at -84.281015999999994 33.772047000000001
TopologyException: side location conflict at -84.281015999999994 33.772047000000001
TopologyException: side location conflict at -84.281015999999994 33.772047000000001
TopologyException: side location conflict at -84.281015999999994 33.772047000000001
TopologyException: side location conflict at -84.281015999999994 33.772047000000001
TopologyException: side location conflict at -84.281015999999994 33.772047000000001
TopologyException: side location conflict at -84.281015999999994 33.772047000000001


In [12]:
edges = pd.DataFrame(edges, columns = ['ROW_1','ROW_2'])

In [14]:
precinct_data.to_csv(EXPORT_PRECINCTS)
edges.to_csv(EXPORT_EDGES)