In [2]:
from shapely.wkt import dumps
from pyproj import CRS
from shapely.geometry import MultiPolygon, Polygon
from shapely.ops import cascaded_union
from shapely import unary_union
from shapely.geometry import shape, mapping
from pymongo.errors import BulkWriteError
import matplotlib.pyplot as plt
import multiprocessing as mp
from pprint import pprint
from typing import Dict
import geopandas as gpd
from shapely import wkt
import pandas as pd
import numpy as np
import fiona
import os
import folium
import maup

# Useful functions

In [3]:
# Demographic Heat Map of Precincts
# + Election Results (Pres, Sen, Gov, etc.)
def block_to_precinct(block_data: gpd.GeoDataFrame,
                      precinct_data: gpd.GeoDataFrame):

    block_prec_assigns = maup.assign(block_data, precinct_data)
    return block_prec_assigns

def variable_grouping(assignment_data,
                      block_data,
                      precinct_data,
                      variables: list):
    precinct_data[variables] = block_data[variables].groupby(assignment_data).sum()
    return precinct_data

### Demographic Heat Map of Districts ###
def precinct_to_district(precinct_data: gpd.GeoDataFrame,
                      district_data: gpd.GeoDataFrame):
    prec_district_assigns = maup.assign(precinct_data, district_data)
    return prec_district_assigns

def geometry_to_wkt(geometry_data):
    """Convert a GeoDataFrame geometries to WKT format"""
    try:
        geometry_data['geometry'] = gpd.array.to_wkt(geometry_data['geometry'].values)
    except Exception as e:
        pprint(e)

    return geometry_data

def find_precinct_neighbors(precinct_data: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
    '''Find the neighbors of each precinct in the precinct data and add it to the dataframe'''
    neighbors = maup.adjacencies(precinct_data)
    precinct_data['neighbors'] = neighbors
    return precinct_data

def rename_columns(dataframe: pd.DataFrame, columns_map) -> None:
    dataframe.rename(columns_map, inplace=True, axis=1)

def reorder_columns(dataframe: pd.DataFrame, columns: list):
    return dataframe[columns]

def fill_missing_pop_w_zero(dataframe: pd.DataFrame,
                                  name: str,
                                  column: pd.Series
                                  ) -> pd.Series:
    '''
    Randomly generating regions with a population of NaN
    using a gamma distribution which favors generating numbers at the lower range
    to prevent skewing our significant regions
    '''
    mean = column.mean()
    std = column.std()
    # adj_std = np.clip(std / 1000, 0, std)
    # adj_mean = np.clip(mean - std, 0, mean)
    is_null = column.isna()
    num_missing = is_null.sum()
    # maximum_acceptable_pop = dataframe[name].max()
    # pprint(mean, std, maximum_acceptable_pop)
    
    if num_missing > 0:
        # fill_values = np.random.default_rng().gamma(adj_mean, adj_std, num_missing)        
        # column[is_null] = np.clip(fill_values, 0, maximum_acceptable_pop)
        column[is_null] = 0
    return column

def calculate_state_wide_stats(dataframe: pd.DataFrame, columns) -> pd.DataFrame:
    return dataframe[columns].astype(int).sum()

def combine_election_results(block_df: pd.DataFrame, precinct_df: pd.DataFrame) -> pd.DataFrame:
    block_precinct_assign = maup.assign(block_df, precinct_df)
    columns = []
    aggregated_election_results = block_df.groupby(block_precinct_assign).sum()
    return aggregated_election_results

### TRANSFORM FUNCTIONS ###

def to_math_crs(geometry_data, crs=32030):
    geometry_data.to_crs(crs, inplace=True)
    return geometry_data

def to_world_crs(geometry_data, crs=4326):
    geometry_data.to_crs(crs, inplace=True)
    return geometry_data

def simplify_geometry(geometry_data):
    geometry_data['geometry'] = geometry_data['geometry'].simplify(0.001)
    return geometry_data

### Data Observation Functions ###
def common_columns(precinct_data: gpd.GeoDataFrame, block_data: gpd.GeoDataFrame) -> None:
    pprint(precinct_data.columns.intersection(block_data.columns))

def missing_data(precinct_data: gpd.GeoDataFrame) -> None:
    pprint(precinct_data.isna().sum())

def print_columns(precinct_data: gpd.GeoDataFrame) -> None:
    pprint(precinct_data.columns)

def print_df(precinct_data: gpd.GeoDataFrame) -> None:
    pprint(precinct_data.head())

def print_spacer() -> None:
    pprint('------------')

def print_crs(data: gpd.GeoDataFrame) -> None:
    pprint(data.crs)

In [4]:
# Get the current working directory
current_directory = os.getcwd()
move_up = os.path.dirname(os.path.dirname(current_directory))
data_dir = os.path.join(move_up, 'Data')
new_path = os.chdir(data_dir)
print(os.getcwd())

# Nevada Preprocessing

### District-level data

In [5]:
nv_district_df = gpd.read_file('NV Data/nv_sldl_2021/nv_sldl_2021.shp')

nv_district_df.plot()
display(nv_district_df.columns)

### Precinct-level Election Data

We begin by aggregating data on the precinct-level by merging Presidential data with Gubernational and US Senate

In [6]:
nv_2022_gen_elc = gpd.read_file('NV Data/nv_2022_gen_prec/nv_2022_gen_prec.csv')

# Gather US Senate
nv_gov_vote_2022 = nv_2022_gen_elc.iloc[:, :4].join(nv_2022_gen_elc.iloc[:, 12:16]).join(nv_2022_gen_elc.iloc[:, [39,43]])
# Removing Non-Democratic/Non-Republican votes
nv_gov_vote_2022.drop(['G22GOVOWRI', 'G22GOVLDAV'], axis=1, inplace=True)
nv_gov_vote_2022['PREC'] = nv_gov_vote_2022['PREC'].str.extract('(\\d+)').astype(float)
# # Some districts don't have numbers associated with them, when taking a closer look these districts
# # don't have any votes associated with them so we are safe to drop them
nv_gov_vote_2022.dropna(inplace=True)
nv_gov_vote_2022['PREC'] = nv_gov_vote_2022['PREC'].astype(int)
# # Creating a unique id to join the dataframes on
nv_gov_vote_2022['UNIQUE_ID'] = nv_gov_vote_2022['COUNTYFP'] + '-' + nv_gov_vote_2022['PREC'].astype(str)

Precinct-Level Gubernational and Attorney General Results from [Redistricting Data Hub](https://redistrictingdatahub.org/dataset/nevada-2022-general-election-precinct-level-results/)

In [44]:
nv_vest_shp = gpd.read_file('NV Data/nv_vest_20/nv_vest_20.shp')

nv_vest_shp = nv_vest_shp[nv_vest_shp['NAME'] != 'Voting Districts Not Defined']
# Formating unique id column as the gubernational election dataframe
nv_vest_shp['UNIQUE_ID'] = nv_vest_shp['COUNTY'] + '-' + nv_vest_shp['VTDST'].str.lstrip('0')

In [45]:
maup.doctor(nv_vest_shp)

In [46]:
to_math_crs(nv_vest_shp)
nv_vest_shp = maup.smart_repair(nv_vest_shp)

In [10]:
maup.doctor(nv_vest_shp)

In [11]:
nv_vest_shp.columns

Precinct-Level Presidential Data from [Redistricting Data Hub](https://redistrictingdatahub.org/dataset/vest-2020-nevada-precinct-boundaries-and-election-results-shapefile/)

### Merging Presidential, Gubernational, Attornery General Elections by Precinct

Using an outer join and removing precincts with missing geometry

We need geometries do this in order to assign block data to precincts

In [12]:
nv_elections = pd.merge(nv_vest_shp, nv_gov_vote_2022, on='UNIQUE_ID', how='outer')

In [13]:
# Filter elections with geometry
nv_elections_precinct = gpd.GeoDataFrame(nv_elections[nv_elections.geometry != None])
nv_elections_precinct.reset_index(drop=True, inplace=True)

to_math_crs(nv_elections_precinct)
maup.doctor(nv_elections_precinct)

In [14]:
unnecessary_columns = ['COUNTYFP_x', 'CNTY_NAME', 'PREC', 'COUNTYFP_y', 'COUNTY',
                       'VTDST', 'NAME', 'G20PRELJOR', 'G20PREIBLA', 'G20PREONON']

In [15]:
nv_elections_precinct.drop(unnecessary_columns, axis=1, inplace=True)

### Block-level Demographic data

We process the block data to be assigned to precinct-level data using the maup library

In [16]:
districtr_block_df = gpd.read_file('NV Data/districtr_nv_blockDemo/export-2657.shp')

Block-Level Demographic Data from [districtr](https://districtr.org/plan)

In [17]:
to_math_crs(nv_elections_precinct)

to_math_crs(districtr_block_df)

In [18]:
nv_blocks_to_precinct_assignments = block_to_precinct(districtr_block_df, nv_elections_precinct)

In [19]:
districtr_block_df.columns

In [20]:
nv_block_columns = ['VAP', 'WVAP', 'BVAP', 'ASIANVAP', 'HVAP']
nv_elections_precinct = variable_grouping(assignment_data=nv_blocks_to_precinct_assignments,
                  block_data=districtr_block_df,
                  precinct_data=nv_elections_precinct,
                  variables=nv_block_columns)

In [21]:
maup.doctor(nv_elections_precinct)

In [22]:
columns_to_fill = ['G22GOVDSIS', 'G22GOVRLOM', 'G22USSDCOR',
                   'G22USSRLAX', 'VAP', 'WVAP', 'BVAP', 'ASIANVAP', 'HVAP']
vote_results_to_int = ['G22GOVDSIS', 'G22GOVRLOM', 'G22USSDCOR', 'G22USSRLAX',
                       'G20PREDBID', 'G20PRERTRU', 'VAP', 'WVAP', 'BVAP', 'ASIANVAP', 'HVAP']

Before merging, some of the vote results were formatted as strings rather than integers. \
We reformat all of the voting results as integers for santiy and completeness.

In [23]:
for column in vote_results_to_int:
    nv_elections_precinct[column] = pd.to_numeric(nv_elections_precinct[column], errors='coerce')

Randomly generate missing values

In [24]:
for column_name in columns_to_fill:
    fill_missing_pop_w_zero(dataframe=nv_elections_precinct,
                                   name=column_name,
                                   column=nv_elections_precinct[column_name])

Some values came out as decimals! We can't have partial votes! \
We reformat values into whole integers again

In [25]:
nv_elections_precinct[columns_to_fill] = np.ceil(nv_elections_precinct[columns_to_fill]).astype(int)

We reorganize the column names for readiability and formatting

In [26]:
pct_votes = ['pct_wvap', 'pct_bvap', 'pct_asianvap', 'pct_hvap']

In [27]:
nv_elections_precinct[pct_votes] = (nv_elections_precinct[['WVAP', 'BVAP', 'ASIANVAP', 'HVAP']]
                                    .div(nv_elections_precinct['VAP'], axis=0)).astype(float)

In [28]:
vote_share = ['pct_bid', 'pct_tru', 'pct_cor', 'pct_lax']

nv_elections_precinct = nv_elections_precinct.assign(
                             pct_tru=lambda row: (row['G20PRERTRU'] / (row['G20PRERTRU'] + row['G20PREDBID'])).astype(float),
                             pct_bid=lambda row: (row['G20PREDBID'] / (row['G20PRERTRU'] + row['G20PREDBID'])).astype(float),
                             pct_cor=lambda row: (row['G22USSDCOR'] / (row['G22USSRLAX'] + row['G22USSDCOR'])).astype(float),
                             pct_lax=lambda row: (row['G22USSRLAX'] / (row['G22USSRLAX'] + row['G22USSDCOR'])).astype(float))

In [29]:
new_column_order = ['UNIQUE_ID', 'STATEFP', 'G20PREDBID', 'G20PRERTRU', 'G22USSDCOR', 'G22USSRLAX', 
                    'pct_bid', 'pct_tru', 'pct_cor', 'pct_lax', 'VAP', 'WVAP', 'BVAP', 'ASIANVAP', 
                    'HVAP', 'pct_wvap', 'pct_bvap', 'pct_asianvap', 'pct_hvap', 'geometry']
new_column_names = {
    'UNIQUE_ID': 'uniqueId',
    'STATEFP': 'state', 
    'G20PREDBID': 'PRE20D',
    'G20PRERTRU': 'PRE20R', 
    'G22USSDCOR': 'USS22D', 
    'G22USSRLAX': 'USS22R', 
    'VAP': 'vap', 
    'WVAP': 'wvap', 
    'BVAP': 'bvap', 
    'ASIANVAP': 'asianvap', 
    'HVAP': 'hvap',
    'pct_wvap' :'pct_wvap', 
    'pct_bvap' :'pct_bvap', 
    'pct_asianvap': 'pct_asianvap', 
    'pct_hvap': 'pct_hvap',
    'geometry': 'geometry'
}

nv_elections_precinct['STATEFP'] = 'Nevada'

nv_precinct_level_data = reorder_columns(dataframe=nv_elections_precinct, columns=new_column_order)
rename_columns(dataframe=nv_precinct_level_data, columns_map=new_column_names)

Because some of the areas have ZERO population, the dataframe results in NaN values, \
so we will fill them with 

In [30]:
nv_precinct_level_data[pct_votes] = nv_precinct_level_data[pct_votes].fillna(0)
nv_precinct_level_data[vote_share] = nv_precinct_level_data[vote_share].fillna(0)

In [31]:
pct_columns_to_int = ['pct_bid', 'pct_tru', 'pct_cor', 'pct_lax', 
                           'pct_wvap', 'pct_bvap', 'pct_asianvap', 'pct_hvap']

In [32]:
for column in pct_columns_to_int:
    nv_precinct_level_data[column] = nv_precinct_level_data[column].astype(float)

In [33]:
to_math_crs(nv_precinct_level_data)
maup.doctor(nv_precinct_level_data)

In [34]:
nv_prec_adj = maup.adjacencies(nv_precinct_level_data)
nv_prec_adj

### Cleaning the district-level

In [35]:
nv_district_df.drop(['ADJPOP', 'TAWHITEALN', 'TABLACKCMB', 'TAAIANCMB',
       'TAASIANCMB', 'TANHOPICMB', 'TAOTHERALN', 'TA2RACE', 'TAHISPANIC',
       'VAPERSONS', 'TOTVOTER20', 'DEMVOTER20', 'REPVOTER20', 'NPVOTER20',
       'OTHVOTER20', 'PRES20_TOT', 'PRES20_DEM', 'PRES20_REP', 'PRES20_OTH',
       'AG18_TOTAL', 'AG18_DEM', 'AG18_REP', 'AG18_OTH', 'SOS18_TOTA',
       'SOS18_DEM', 'SOS18_REP', 'SOS18_OTH', 'PRES16_TOT', 'PRES16_DEM',
       'PRES16_REP', 'PRES16_OTH'], inplace=True, axis=1)

In [36]:
to_math_crs(nv_district_df)

In [37]:
precinct_to_district_assignment = maup.assign(nv_precinct_level_data, nv_district_df)
# Add the assigned districts as a column of the `precincts` GeoDataFrame:
nv_precinct_level_data["districtNum"] = precinct_to_district_assignment
precinct_to_district_assignment.head()

In [38]:
nv_district_variables = ['PRE20D', 'PRE20R', 'USS22D', 'USS22R', 'vap', 'wvap', 'bvap', 'asianvap', 'hvap']
variable_grouping(assignment_data=precinct_to_district_assignment,
                  block_data=nv_precinct_level_data, 
                  precinct_data=nv_district_df,
                  variables=nv_district_variables)

nv_district_df[pct_votes] = (nv_district_df[['wvap', 'bvap', 'asianvap', 'hvap']].div(nv_district_df['vap'], axis=0)).astype(float)
nv_district_df['state'] = 'Nevada'
nv_district_df = nv_district_df.assign(
                             pct_rep=lambda row: (row['PRE20R'] / (row['PRE20R'] + row['PRE20D'])).astype(float),
                             pct_dem=lambda row: (row['PRE20D'] / (row['PRE20R'] + row['PRE20D'])).astype(float))

In [39]:
nv_district_df.columns

In [40]:
district_column_names = {
    'DISTRICTNO': 'districtNum'
}

rename_columns(dataframe=nv_district_df, columns_map=district_column_names)

In [41]:
for column in pct_votes:
    nv_district_df[column] = nv_district_df[column].astype(float)
for column in ['pct_rep', 'pct_dem']:
    nv_district_df[column] = nv_district_df[column].astype(float)

In [42]:
testing = nv_precinct_level_data.copy()

In [43]:
to_math_crs(testing, testing.estimate_utm_crs())

In [1182]:
testing_repaired = maup.smart_repair(testing)

In [1183]:
maup.doctor(testing_repaired)

In [1184]:
def close_holes(poly: Polygon) -> Polygon:
        """
        Close polygon holes by limitation to the exterior ring.
        Args:
            poly: Input shapely Polygon
        Example:
            df.geometry.apply(lambda p: close_holes(p))
        """
        if poly.interiors:
            # return MultiPolygon(Polygon(p.exterior) for p in poly)
            return Polygon(list(poly.exterior.coords))
        else:
            return poly

def hole_func(poly: MultiPolygon):
    # eps = 0.01
    # omega = unary_union([
    #     Polygon(component.exterior).buffer(eps).buffer(-eps) for component in poly.geoms
    # ])
    return MultiPolygon(Polygon(p.exterior) for p in poly.geoms)

In [1186]:
testing_repaired = testing_repaired.geometry.apply(lambda p: hole_func(p) if p.geom_type == 'MultiPolygon' else close_holes(p))
# testing_repaired = testing_repaired.geometry.apply(lambda p: hole_func(p))

In [1187]:
maup.doctor(testing_repaired)

In [1188]:
testing_repaired.plot()

In [1189]:
testing_repaired2 = maup.smart_repair(testing_repaired)

In [1190]:
maup.doctor(testing_repaired2)

In [1194]:
testing_repaired2 = testing_repaired2.geometry.apply(lambda p: hole_func(p) if p.geom_type == 'MultiPolygon' else close_holes(p))
# testing_repaired = testing_repaired.geometry.apply(lambda p: hole_func(p))

In [1195]:
maup.doctor(testing_repaired2)

In [1196]:
testing_repaired2.plot()

In [1192]:
testing_repaired3 = maup.smart_repair(testing_repaired2)

In [1075]:
# nv_district_df.to_file('nv_district_final_data.shp')
# nv_precinct_level_data.to_file('nv_precinct_final_data.shp')

In [None]:
to_world_crs(nv_district_df)
to_world_crs(nv_precinct_level_data)

In [None]:
simplify_geometry(nv_district_df)
simplify_geometry(nv_precinct_level_data)

In [None]:
geometry_to_wkt(nv_district_df)
geometry_to_wkt(nv_precinct_level_data)

### Exporting Files

Before we export them we must simplifiy the geometry and turn them into WKT, we also format them into dictionaries to be sent to the database

In [974]:
# nv_district_df.to_file('nv_district_final_data.shp')
# nv_district_df.to_file('nv_district_final_data.geojson', driver='GeoJSON')
nv_district_df.to_csv('nv_district_final_data.csv', index=False)

In [975]:
# nv_precinct_level_data.to_file('nv_precinct_final_data.shp')
# nv_precinct_level_data.to_file('nv_precinct_final_data.geojson', driver='GeoJSON')
nv_precinct_level_data.to_csv('nv_precinct_final_data.csv', index=False)

# Mississippi Preprocessing

### Importing Block-level data

This data includes the demographic values on a block level collected MGGG's Districtr website source, which was sourced via the US Census Bureau

In [983]:
ms_districtr_block_df = gpd.read_file('MS Data/districtr_ms_blockDemo/export-3230.shp')

Block-Level Demographic Data from [districtr](https://districtr.org/plan)

In [984]:
to_math_crs(ms_districtr_block_df)

In [985]:
ms_vest_20 = gpd.read_file('MS Data/ms_vest_20/ms_vest_20.shp')

"VEST Presidential Election Data from [Redistricting Hub](https://redistrictingdatahub.org/dataset/vest-2020-mississippi-precinct-and-election-results/) \
Precinct-Level Presidential Data + Geometry

In [986]:
to_math_crs(ms_vest_20)
ms_vest_repaired = maup.smart_repair(ms_vest_20)

In [1096]:
ms_vest_repaired['UNIQUE_ID'] = ms_vest_repaired['NAME20'] + '-' + ms_vest_repaired['VTDST20'].astype(str)

Dropping unused columns

In [1097]:
ms_unused_prec_col = [ 'G20PRELJOR', 'G20PREGHAW', 'G20PREABLA', 
                      'G20PREOCAR', 'G20PREIWES', 'G20PREICOL', 
                      'G20PREIPIE','G20USSLEDW', 'GEOID20']
pres_str_to_int = ['STATEFP20', 'COUNTYFP20', 'VTDST20']
ms_pres_20 = ms_vest_repaired.drop(ms_unused_prec_col, axis=1)
ms_pres_20[pres_str_to_int] = ms_pres_20[pres_str_to_int].astype(int)
ms_pres_20.sort_values(by='COUNTYFP20', inplace=True)

In [1098]:
ms_blocks_to_precinct_assignments = block_to_precinct(ms_districtr_block_df, ms_pres_20)

In [1099]:
nv_block_columns = ['VAP', 'WVAP', 'BVAP', 'ASIANVAP', 'HVAP']
ms_precinct_data = variable_grouping(assignment_data=nv_blocks_to_precinct_assignments,
                  block_data=districtr_block_df,
                  precinct_data=ms_pres_20,
                  variables=nv_block_columns)

We're starting to notice a pattern here with cleaning our data...

In [1100]:
columns_to_fill = ['VAP', 'WVAP', 'BVAP', 'ASIANVAP', 'HVAP']

In [1101]:
ms_precinct_data[columns_to_fill] = ms_precinct_data[columns_to_fill].fillna(0)
ms_precinct_data[columns_to_fill] = np.ceil(ms_precinct_data[columns_to_fill]).astype(int)

In [1102]:
ms_precinct_data[pct_votes] = (ms_precinct_data[['WVAP', 'BVAP', 'ASIANVAP', 'HVAP']]
                                    .div(ms_precinct_data['VAP'], axis=0)).astype(float)

In [1103]:
ms_precinct_data[pct_votes] = ms_precinct_data[pct_votes].fillna(0)

In [1104]:
vote_share = ['pct_bid', 'pct_tru', 'pct_hyd', 'pct_esp']

ms_precinct_data = ms_precinct_data.assign(
                             pct_tru=lambda row: (row['G20PRERTRU'] / (row['G20PRERTRU'] + row['G20PREDBID'])).astype(float),
                             pct_bid=lambda row: (row['G20PREDBID'] / (row['G20PRERTRU'] + row['G20PREDBID'])).astype(float),
                             pct_hyd=lambda row: (row['G20USSRHYD'] / (row['G20USSDESP'] + row['G20USSRHYD'])).astype(float),
                             pct_esp=lambda row: (row['G20USSDESP'] / (row['G20USSDESP'] + row['G20USSRHYD'])).astype(float))

In [1105]:
new_column_order = ['UNIQUE_ID', 'STATEFP', 'G20PREDBID', 'G20PRERTRU', 'G20USSDESP', 'G20USSRHYD', 
                    'pct_bid', 'pct_tru', 'pct_hyd', 'pct_esp', 'VAP', 'WVAP', 'BVAP', 'ASIANVAP', 
                    'HVAP', 'pct_wvap', 'pct_bvap', 'pct_asianvap', 'pct_hvap', 'geometry']
new_column_names = {
    'UNIQUE_ID': 'uniqueId',
    'STATEFP': 'state', 
    'G20PREDBID': 'PRE20D',
    'G20PRERTRU': 'PRE20R', 
    'G20USSDESP': 'USS20D', 
    'G20USSRHYD': 'USS20R', 
    'VAP': 'vap', 
    'WVAP': 'wvap', 
    'BVAP': 'bvap', 
    'ASIANVAP': 'asianvap', 
    'HVAP': 'hvap',
    'pct_wvap' :'pct_wvap', 
    'pct_bvap' :'pct_bvap', 
    'pct_asianvap': 'pct_asianvap', 
    'pct_hvap': 'pct_hvap',
    'geometry': 'geometry'
}

ms_precinct_data['STATEFP'] = 'Mississippi'

ms_precinct_level_data = reorder_columns(dataframe=ms_precinct_data, columns=new_column_order)
rename_columns(dataframe=ms_precinct_level_data, columns_map=new_column_names)
ms_precinct_level_data.reset_index(inplace=True)

In [1106]:
ms_precinct_level_data.columns

### Importing Mississippi District Data

We only need the geometry and the District Num associated with it, so we drop everything else

In [1107]:
ms_districts_lower = gpd.read_file('MS Data/ms_sldl_adopted_2022/MS_HouseDists_Mar292022.shp')

District-level Data from [Redistricting Hub](https://redistrictingdatahub.org/dataset/2022-mississippi-senate-districts-approved-plan/)

In [1108]:
ms_districts_lower.drop(['ID', 'MEMBERS', 'LOCKED', 'NAME', 'POPULATION', 'WHITE', 'BLACK', 
                         'F18_POP', 'F18_WHT', 'F18_BLK', 'F18_AP_BLK', 'DEVIATION', 'F_DEVIATIO', 
                         'F_WHITE', 'F_BLACK', 'F_18_POP', 'F_18_WHT', 'F_18_BLK', 'MULTIPLE_F', 
                         'F_18_AP_BL', 'DISTRICT_L', 'Shade_Num', 'DISTRICT', 'AREA', 'IDEAL_VALU'], inplace=True, axis=1)

In [1109]:
to_math_crs(ms_districts_lower)

In [1110]:
ms_precinct_to_district_assignment = maup.assign(ms_precinct_level_data, ms_districts_lower)
# Add the assigned districts as a column of the `precincts` GeoDataFrame:
ms_precinct_level_data["distrctNum"] = ms_precinct_to_district_assignment

In [1111]:
ms_district_variables = ['PRE20D', 'PRE20R', 'USS20D', 'USS20R',
                         'vap', 'wvap', 'bvap', 'asianvap', 'hvap']
ms_district_df = variable_grouping(assignment_data=ms_precinct_to_district_assignment,
                  block_data=ms_precinct_level_data, 
                  precinct_data=ms_districts_lower,
                  variables=ms_district_variables)

In [1112]:
ms_district_df[pct_votes] = (ms_district_df[['wvap', 'bvap', 'asianvap', 'hvap']].div(ms_district_df['vap'], axis=0)).astype(float)
ms_district_df['state'] = 'Mississippi'
ms_district_df = ms_district_df.assign(
                             pct_rep=lambda row: (row['PRE20R'] / (row['PRE20R'] + row['PRE20D'])).astype(float),
                             pct_dem=lambda row: (row['PRE20D'] / (row['PRE20R'] + row['PRE20D'])).astype(float))

In [1113]:
ms_district_df.columns

In [1114]:
new_column_order = ['Distnum', 'state', 'PRE20D', 'PRE20R', 'USS20D', 'USS20R', 'pct_rep', 'pct_dem', 'vap', 
                    'wvap', 'bvap', 'asianvap', 'hvap', 'pct_wvap', 'pct_bvap', 'pct_asianvap', 'pct_hvap', 'geometry']

district_column_names = {
    'Distnum': 'districtNum'
}

ms_district_df = reorder_columns(dataframe=ms_district_df, columns=new_column_order)
rename_columns(dataframe=ms_district_df, columns_map=district_column_names)
ms_district_df.sort_values(by='districtNum', inplace=True)

In [1117]:
ms_district_df.to_file('ms_district_final_data.shp')
ms_precinct_level_data.to_file('ms_precinct_final_data.shp')

In [1095]:
to_world_crs(ms_district_df)
to_world_crs(ms_precinct_level_data)

In [None]:
simplify_geometry(ms_district_df)
simplify_geometry(ms_precinct_level_data)

In [None]:
geometry_to_wkt(ms_district_df)
geometry_to_wkt(ms_precinct_level_data)

### Exporting Mississippi Data

Just like before, we export the data to a csv for convenience

In [1033]:
ms_district_df.to_csv('ms_district_final_data.csv')
ms_precinct_level_data.to_csv('ms_precinct_final_data.csv')

### Assigning Demographic data on the Block Level to Election Results on the Precinct Level

Using the Maup library we assign blocks to precincts

In [823]:
test = gpd.read_file('NV Data/nv_sldl_2021/nv_sldl_2021.shp')

test.to_excel('nv_districts.xlsx')