In [None]:
import glob
import pickle

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patheffects as PathEffects
import pandas as pd
import geopandas as gpd

from datetime import timedelta, datetime
from matplotlib.colors import ListedColormap
from PIL import Image

In [None]:
# Define global variables

# Voter file variables
# This should be the full path to where all the CO voter file... files live.
voter_file_directory = r'D:\Users\bengen343\Documents\Registration'

# The voter file contains a lot of information that's irrelevant for our purposes.
# Here we define the fields within the voter file that we'd like to keep
dimension_list = [
    'VOTER_ID',
    'COUNTY',
    'PRECINCT',
    'PARTY',
    'PREFERENCE',
    'CONGRESSIONAL',
    'STATE_SENATE',
    'STATE_HOUSE'
]

merged_dimension_list = [
    'New Congressional',
    'New State Senate',
    'New State House',
    'VOTER_ID',
    'COUNTY',
    'PRECINCT',
    'PARTY',
    'PREFERENCE',
    'CONGRESSIONAL',
    'STATE_SENATE',
    'STATE_HOUSE',
    'District',
    'rtla-12',
    'rtla-14',
    'rtla-16',
    'rtla-18',
    'rtla-20',
    'rtla-trend',
    'r-margin-12',
    'r-margin-14',
    'r-margin-16',
    'r-margin-18',
    'r-margin-20',
    'Inflection',
    'Latitude',
    'Longitude',
    'geometry'
]

# Performance history files
# These CSV files have the historic legislative electoral outcomes, RTLA measures, and implied
# inflection point for each district.
# The methodology for calculating this data is outlined in two posts:
# https://www.constellationpolitical.com/blog/sd20-fallacy/ 
# https://www.constellationpolitical.com/blog/how-republicans-will-lose-2020-reapportionment/
hd_csv = r'D:\Users\bengen343\Documents\Constellation Political\Python\2020-co-redistricting\hd-rtla.csv'
sd_csv = r'D:\Users\bengen343\Documents\Constellation Political\Python\2020-co-redistricting\sd-rtla.csv'

# Map files
co_congress_shp = r'W:\My Documents\2020 Redistricting\Conressional - First Staff Plan - 20210902\First_Staff_Congressional_Final_20210902.shp'
co_senate_shp = r'D:\Users\bengen343\Documents\Constellation Political\Python\2020-co-redistricting\State Senate - First Staff Plan - 20210913\First_Senate_Staff_Plan_Final.shp'
co_house_shp = r'D:\Users\bengen343\Documents\Constellation Political\Python\2020-co-redistricting\State House - First Staff Plan - 20210913\First_House_Staff_Plan_Final.shp'
co_county_map = r'W:\My Documents\Counties84\Counties84.shp'

# Map Variables
im = Image.open(r'D:\Users\bengen343\Documents\Constellation Political\Constellation Operations\Graphics\CPC Star in LIne.png')
    

In [None]:
# Function to take all the seperate files of the current voter file and assemble them into one dataframe
def voterfile_to_df(voter_file_directory):
    print(f"Beginning to import current voter file at {datetime.strftime(datetime.now(), '%H:%M:%S')}")
          
    _allFiles = glob.glob(voter_file_directory + '/*.txt')
    _df = pd.DataFrame()
    _list = []
          
    for _file in _allFiles:
          _df = pd.read_csv(
              _file, sep=',', 
              encoding='cp437', 
              index_col=None, 
              header=0, 
              low_memory=False, 
              error_bad_lines=False
          )
          
          _list.append(_df)
    
    _output_df = pd.concat(_list)
    _output_df.reset_index(drop=True, inplace=True)
          
    # Output the number registered voters, should take 1-3 minutes probably
    print(f"Total Registration: {len(_output_df):,}")
    print(f"Current voter file loaded at {datetime.strftime(datetime.now(), '%H:%M:%S')}")
          
    return(_output_df)

In [None]:
# Load a data frame of the current voter file
voter_file_df = voterfile_to_df(voter_file_directory)

In [None]:
# Narrow the voter file data to only those fields that we're interested in
voter_file_df = voter_file_df[dimension_list]

In [None]:
# Replace minor party designations with 'OTH'
voter_file_df.loc[((voter_file_df['PARTY'] != 'REP') & (voter_file_df['PARTY'] != 'DEM') & 
                  (voter_file_df['PARTY'] != 'UAF')), 'PARTY'] = 'OTH'

In [None]:
# Load dataframes for the HD & SD historic performance
hd_df = pd.DataFrame()
hd_df = pd.read_csv (hd_csv, sep=',' ,index_col=None, header=0, low_memory=False)

sd_df = pd.DataFrame()
sd_df = pd.read_csv (sd_csv, sep=',' ,index_col=None, header=0, low_memory=False)

The core of this methodology is to break down a districts performance to its constituent elements -- its voters -- and then reassemble those into the newly drawn district to see how the new district would behave based on the prior behavior of its new voters. To do this, we'll start by matching each legislative district's past performance to each voter within that district.

I'm mostly going to rely on data from House Districts because they're the smallest geography that isn't impacted by external race dynamics.

In [None]:
# Match the historic HD data to each voter within the district
voter_file_df = pd.merge(voter_file_df, hd_df, how='left', left_on='STATE_HOUSE', right_on='District')

In [None]:
# Import the map files
co_congress_map = gpd.read_file(co_congress_shp)
co_congress_map = co_congress_map.rename(columns={'DISTRICT':'New Congressional'})

co_senate_map = gpd.read_file(co_senate_shp)
co_senate_map = co_senate_map.rename(columns={'DISTRICT':'New State Senate'})

co_house_map = gpd.read_file(co_house_shp)
co_house_map = co_house_map.rename(columns={'DISTRICT':'New State House'})

In [None]:
# Create a geometric file out of the coordinates for the individual voters
voter_file_gpd = gpd.GeoDataFrame(voter_file_df, geometry=gpd.points_from_xy(voter_file_df['Longitude'], voter_file_df['Latitude'], crs='EPSG:4326'))

In [None]:
# Match the voters into their new districts
voter_file_new_gpd = gpd.sjoin(co_congress_map, voter_file_gpd, how='right')

In [None]:
voter_file_new_gpd = gpd.sjoin(co_senate_map, voter_file_gpd, how='right')

In [None]:
voter_file_new_gpd = gpd.sjoin(co_house_map, voter_file_gpd, how='right')

In [None]:
voter_file_new_gpd = voter_file_new_gpd[merged_dimension_list]

In [None]:
# Group the districts together and calculate the new average performance
voter_file_new_gpd[['New Congressional', 'rtla-12', 'rtla-14', 'rtla-16', 'rtla-18', 'rtla-20',  'rtla-trend', 'Inflection']].groupby('New Congressional').mean()

In [None]:
# Group the districts together and count the new party registration
voter_file_new_gpd[['New Congressional', 'PARTY', 'VOTER_ID']].groupby(['New Congressional', 'PARTY']).count()

In [None]:
# Group the districts together and calculate the new average performance
voter_file_new_gpd[['New State Senate', 'rtla-12', 'rtla-14', 'rtla-16', 'rtla-18', 'rtla-20',  'rtla-trend', 'Inflection']].groupby('New State Senate').mean()

In [None]:
# Group the districts together and count the new party registration
voter_file_new_gpd[['New State Senate', 'PARTY', 'VOTER_ID']].groupby(['New State Senate', 'PARTY']).count()

In [None]:
# Group the districts together and calculate the new average performance
voter_file_new_gpd[['New State House', 'rtla-12', 'rtla-14', 'rtla-16', 'rtla-18', 'rtla-20',  'rtla-trend', 'Inflection']].groupby('New State House').mean()

In [None]:
# Group the districts together and count the new party registration
voter_file_new_gpd[['New State House', 'PARTY', 'VOTER_ID']].groupby(['New State House', 'PARTY']).count()

In [None]:
# Define a function to map the outcome in different years.
def create_map(_map='', _column='', _year='22'):
    # Import global variables. These are mostly paths to files that might move around.
    global co_county_map
    global im
    
    # Define map colors. Just Red & Blue for REP/DEM outcome
    _cmap = ListedColormap(['#036ED2', '#D30803'], name='REP', N=2)
    
    # Initiate the plot for the map
    _f, _ax = plt.subplots(figsize=(13,15), dpi=300)
    
    # Merge the turnout data into the map
    _mean_df = voter_file_new_gpd[[_column, 'rtla-12', 'rtla-14', 'rtla-16', 'rtla-18', 'rtla-20',  'rtla-trend', 'Inflection']].groupby(_column).mean()
    _map = pd.merge(_map, _mean_df, how='left', on=_column)
    
    # Calculate where RTLA will be in the year in question.
    # This is just the trend times the number of cycles that have elapsed
    _map['rtla-' + _year] = _map['rtla-20'] + (_map['rtla-trend'] * (int(_year) - 20) / 2)
    _map['outcome-' + _year] = np.where(_map['rtla-' + _year] > _map['Inflection'], 'DEM', 'REP')
        
    # Plot the map for the districts/year in question
    _map.plot(ax=_ax, column='outcome-' + _year, cmap=_cmap, linewidth=2.0, edgecolor='white')
    
    # Add in a faint map of the county borders to give better geographic context
    _counties = gpd.GeoDataFrame.from_file(co_county_map)
    _counties.plot(ax=_ax, facecolor='none', edgecolor='grey', linewidth=0.5)
    
    # Identify the coordinates for where labels should appear on the map
    _map['coords'] = _map['geometry'].apply(lambda x: x.representative_point().coords[:])
    _map['coords'] = [coords[0] for coords in _map['coords']]

    # Label and format the labels for the districts
    for idx, row in _map.iterrows():
        plt.annotate(text=row[_column], xy=row['coords'], horizontalalignment='center', 
                     fontsize=12).set_path_effects([PathEffects.withStroke(linewidth=3, foreground='w')])
    
    # Set plot layout parameters
    _ax.set_axis_off()
    plt.tight_layout()
    
    # Set the plot title
    _ax.set_title("'" + _year + " Likely Election Results", fontname='Lato', fontsize='24')
    
    #Add watermark and webpage
    im = im.resize((2000,2000))
    plt.figimage(im, 1000, 500, alpha=0.20, zorder=3)
    plt.figtext(0.5, 0.805, "www.ConstellationPolitical.com", ha='center', va='center', fontname='Lato', fontsize='14', 
                color='#686C6D')
    
    # Export to image file
    plt.savefig(_year + ' ' + _column.split(' ')[-1].lower() +'_outcome_map.png',  pad_inches=0, bbox_inches='tight')

In [None]:
create_map(_map=co_congress_map, _column='New Congressional', _year='22')

In [None]:
create_map(_map=co_senate_map, _column='New State Senate', _year='30')

In [None]:
create_map(_map=co_house_map, _column='New State House', _year='30')

In [None]:
# Create CPC histogram
def create_histogram(df='', race='', district='', rtla=''):
    # Import global variables
    global im
    
    # Scope data
    df = df[df[race] == district]
    _column = df[rtla]
    
    # Get some basic descriptive statisticts
    _mean = _column.mean()
    _med = _column.median()
    _std = _column.std()
    _inflection = df['Inflection'].mean()
    
    # Define the size of the plot area
    _fig, _ax = plt.subplots(figsize=(10, 6), dpi=75)
    
    # Plot data for y-axis 
    _column.plot(kind='hist', density=False, bins=20, color='#036ED2', edgecolor='white')
    
    # Plot descriptive statistic markers
    _trans = _ax.get_xaxis_transform()
    # Mean
    plt.axvline(x=_mean, color='#D30803')
    plt.text(_mean + 0.5, 0.5, ('Mean: '  + "{0:,.1f}".format(_mean)), rotation=90, color='#D30803', transform=_trans).set_path_effects([PathEffects.withStroke(linewidth=2, foreground='white')])
    # Standard Deviation
    plt.axvline(x=(_mean + _std), linestyle='dashed', color='#D30803')
    plt.axvline(x=(_mean - _std), linestyle='dashed', color='#D30803')
    plt.text(_mean + _std + 0.5, 0.5, 'Std. Dev.', rotation=90, color='#D30803', transform=_trans).set_path_effects([PathEffects.withStroke(linewidth=2, foreground='white')])
    # Inflection
    plt.axvline(x=_inflection, color='#6103D3')
    plt.text(_inflection + 0.5, 0.5, ('Inflection: ' + "{0:,.1f}".format(_inflection)), rotation=90, color='#6103D3', transform=_trans).set_path_effects([PathEffects.withStroke(linewidth=2, foreground='white')])
    
    # Inflection point
    plt.axvline(x=_mean, color='#D30803')
    plt.text(_mean + 0.5, 0.5, 'Mean', rotation=90, color='#D30803', transform=_trans).set_path_effects([PathEffects.withStroke(linewidth=2, foreground='white')])
    
    # Set the labels and formatting for y-axis
    _ax.set_ylabel("Count", fontname='Lato', fontsize='12')
    _ax.set_yticklabels(_ax.get_yticks(), fontsize=12)
    _ax.yaxis.grid(which='major')
    _ax.get_yaxis().set_major_formatter(plt.FuncFormatter(lambda x, loc: "{:,}".format(int(x))))
    
    # Set x axes formatting
    _ax.set_xlabel("Bin", fontname='Lato', fontsize='12')
    plt.xticks(fontsize=12)
    
    # Set plot layout parameters
    plt.tight_layout()
    plt.subplots_adjust(top=0.93)
    
    # Set plot title
    plt.suptitle(race.split(' ')[-1] + '-' + str(district) + ' ' + _column.name.upper() + " Performance Distribution", 
                 fontname='Lato', fontsize='16', x=0.56)
    plt.figtext(0.56, 0.910, 'www.ConstellationPolitical.com', ha='center', va='center', fontname='Lato', 
                fontsize='9',color='#686C6D')
    
    # Set watermark
    im = im.resize((277,277))
    plt.figimage(im, 280, 100, alpha=0.20, zorder=10)
    
    plt.savefig(race.split(' ')[-1].lower() + '-' + str(district) + '-histogram.png')

In [None]:
create_histogram(df=voter_file_new_gpd, race='New Congressional', district=8, rtla='rtla-20')