In [None]:
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import folium
import os
import maup
import multiprocessing as mp
from pyproj import CRS

In [None]:
# Get the current working directory
current_directory = os.getcwd()
move_up = os.path.dirname(current_directory)
client_dir = os.path.join(move_up, 'client')
data_dir = os.path.join(client_dir, 'public')
# new_path = os.chdir(os.path.join(data_dir, 'MS'))
print(os.getcwd())
print(os.listdir())

In [None]:
block_data_bigfile1 = gpd.read_file('ms_pl2020_b/ms_pl2020_h1p5_b.shp')

In [None]:
display(block_data_bigfile1.columns)

In [None]:
race_data_bigfile = gpd.read_file('ms_pl2020_b/ms_pl2020_p1_b.shp')

In [None]:
display(race_data_bigfile)

# Data

---

- [ ] Find Precinct Data (Nevada + Mississippi)
- [ ] Find State Assembly Shapefile
- [ ] Demographic Population

# Pre-processing

---

- [ ] Read ShapeFile
- [ ] Convert ShapeFile -> GeoJSON
- [ ] Multi-threaded and Parallelize Code
- [ ] 

### Mississippi State Assembly Data Processing

In [None]:
ms_districts_df = gpd.read_file('ms_State_Assembly_2022.geojson')
ms_districts_df.plot()

# plt.show()

ms_districts_df.to_crs(epsg=4326, inplace=True)
display(ms_districts_df.crs)
display(ms_districts_df.columns)

# ms_districts_df.to_file('ms_State_Assembly_2022.geojson', driver='GeoJSON')


Data gathered from [Redistricting Data](https://redistrictingdatahub.org/dataset/2022-mississippi-senate-districts-approved-plan/)

### Demographic Data on Block Level

In [None]:
ms_block_race_df = gpd.read_file('ms-block-demo-2020.geojson')

# ms_block_race_df['area'] = ms_block_race_df['geometry'].area
# ms_block_race_df.explore('area', legend=False)
# ms_block_race_df.explore('STATEFP20', legend=False)
ms_block_race_df.plot()
display(ms_block_race_df.head())
display(ms_block_race_df.columns)

# https://redistrictingdatahub.org/dataset/mississippi-2022-select-race-data-from-the-american-community-survey-20182022-at-the-block-group-level/
# diff_block_df = gpd.read_file('MS/ms_race_2022_bg/ms_race_2022_bg.shp')
# display(diff_block_df.isna().sum())
# display(diff_block_df.columns)


# fig, (ax1,ax2) = plt.subplots(nrows=1, ncols=2, figsize=(20, 16))
# ax1 = diff_block_df.plot(ax=ax1)
# ax2 = ms_block_race_df.plot(ax=ax2)

Data from [Districtr](https://districtr.org/plan)

### General Elections Precinct Data + Geometry

In [None]:
ms_prec_bound_2022_df = gpd.read_file('ms_gen_22_prec/ms_gen_22_prec.shp')
# ms_prec_bound_2022_df.to_file('msGenPrecBound2022', driver='GeoJSON')
# ms_prec_bound_2022_df.explore()

display(ms_prec_bound_2022_df.head())
display(ms_prec_bound_2022_df.crs, ms_block_race_df.crs)

fig, (ax1,ax2) = plt.subplots(nrows=1, ncols=2, figsize=(20, 16))
ax1 = ms_prec_bound_2022_df.plot(ax=ax1)
ax2 = ms_block_race_df.plot(ax=ax2)

In [None]:
ms_prec_bound_2022_df

Data from [Redistricting Hub](https://redistrictingdatahub.org/dataset/mississippi-2022-general-election-precinct-level-results-and-boundaries/)

In [None]:
ms_prec_data = ms_prec_bound_2022_df
display(ms_prec_data.columns)

ms_block_data = ms_block_race_df
display(ms_block_data.columns)

ms_block_data.to_crs(32030, inplace=True)
ms_prec_data.to_crs(32030, inplace=True)
display(ms_prec_data.crs)


In [None]:
# maup.doctor(ms_block_data)
maup.doctor(ms_prec_data)
# print(ms_prec_data.geometry.is_valid)

In [None]:
display(ms_prec_data.estimate_utm_crs())
ms_prec_data.to_crs(epsg=32030, inplace=True)
ms_prec_data = maup.smart_repair(ms_prec_data)

In [None]:
ms_prec_data
block_prec_aggr = maup.assign(ms_block_data, ms_prec_data)
display(ms_prec_data.columns.intersection(ms_block_data.columns))


In [None]:
display(ms_prec_data)
ms_prec_data.to_file("fixed_ms_prec_geom.geojson", driver="GeoJSON")

In [None]:
display(ms_prec_data.columns)
display(ms_block_data.columns)

### Redistrcting Hub Precinct Level Data + Districtr Block Demographic Data provided better geometry

In [None]:
variables = ['TOTPOP', 'WHITE', 'BLACK', 'ASIAN', 'HISP']

ms_prec_data[variables] = ms_block_data[variables].groupby(block_prec_aggr).sum()


# display(ms_prec_data[variables].head())
# display(ms_prec_data.isna().sum())
# display(ms_prec_data)
# ms_prec_data.plot()

In [None]:
ms_prec_data['2022_DEMOCRATIC'] = ms_prec_data[['GCON01DBLA', 'GCON02DTHO', 'GCON03DYOU', 'GCON04DDUP']].sum(axis=1)
ms_prec_data['2022_REPUBLICAN'] = ms_prec_data[['GCON01RKEL', 'GCON02RFLO', 'GCON03RGUE', 'GCON04REZE']].sum(axis=1)
ms_prec_data['2022_LIBERAL'] = ms_prec_data[['GCON04LJOH']].sum(axis=1)

# display(ms_prec_data)

ms_prec_data['border'] = ms_prec_data.buffer(200)

# ms_prec_data['border'].plot()
rows = [row for _, row in ms_prec_data.groupby(ms_prec_data.index)]

new_prec_data = ms_prec_data.drop(['GCON01DBLA', 'GCON02DTHO', 'GCON03DYOU', 'GCON04DDUP', 
                                      'GCON01RKEL', 'GCON02RFLO', 'GCON03RGUE', 'GCON04REZE', 
                                     'GCON04LJOH', 'POLL_LOC', 'CNTY_CODE'], axis=1)

new_prec_data

In [None]:
new_prec_data.plot(column='WHITE', cmap='OrRd', legend=True)

# 2023 General Election Precinct-Level Results

In [32]:
ms_gen_23 = gpd.read_file('ms_gen_2023_prec/ms_gen_2023_prec.csv')
ms_gen_23.shape

ATG and GOV data from [Redistricting Hub](https://redistrictingdatahub.org/dataset/mississippi-2023-general-election-precinct-level-results/)

In [33]:
ms_vtd = gpd.read_file('tl_2020_28_vtd20/tl_2020_28_vtd20.shp')
# display(ms_vtd.columns)
ms_vtd.shape

In [None]:
ms_vtd.to_excel('ms_vtd.xlsx')

VTD Data from [Data.gov](https://catalog.data.gov/dataset/tiger-line-shapefile-2020-state-mississippi-voting-districts)

In [34]:
ms_vest_20 = gpd.read_file('ms_vest_20/ms_vest_20.shp')
ms_vest_20.columns

VEST Data from [Redistricting Hub](https://redistrictingdatahub.org/dataset/vest-2020-mississippi-precinct-and-election-results/)

# Exploratory Data Analysis

- [ ] Precinct Neighbors
- [ ] MGGG Algorithm
- [ ] Calculate Election Winners
- [ ] Identify Opportunity Districts
- [ ] **Ecological Inference**
