Based on code for All Years_Judicial Voter Analysis_V2

In [13]:
#Check my working directory
!pwd

/Users/amy/code/injustice_watch/analysis


In [14]:
import pandas as pd

## Define Functions

In [15]:
#Pass a df that contains all the ballot measures
#Returns a cleaned df that deletes all rows with the ballot measures
def remove_ballot_measures(ballot_measures_df,df):
    clean_df = df[~df.RACE.isin(ballot_measures_df.RACE)]
    
    return clean_df

In [16]:
#Finds and tags subcircuit races
#Returns a df with a new column that includes if it's a subcircuit race
def tag_subcircuit(df):
    def subcircuit(a):
        if 'Subcircuit' in a:
            return 'Subcircuit'
        elif 'Sub' in a: #accounts for 2006 notation
            return 'Subcircuit'
        else:
            return 'Not Subcircuit'
   
    df['Subcircuit'] = df['RACE'].apply(lambda x: subcircuit(x))
    
    return df

In [17]:
#Pass a clean_df
#Returns a df of aggregated by judicial race by sum of votes
def create_all_races(clean_df):
    clean_df['key'] = clean_df['RACE'] + clean_df['WARD'].map(str)
    all_races_df = clean_df.groupby(by=['key']).agg({'WARD': 'first','REGISTERED VOTERS': 'first',
                                                     'BALLOTS CAST': 'first','VOTES': 'sum','Subcircuit':'first'})
    return all_races_df

In [18]:
#Pass all_races_df
#Returns ward_df for all judicial races that aggregates votes by sum and includes count of races for each ward
def create_ward_df(all_races_df):
    ward_df = all_races_df.groupby(by=['WARD']).agg({'REGISTERED VOTERS': 'first','BALLOTS CAST': 'first',
                                                     'WARD':'count','VOTES': 'sum'})
    #Rename column for vote and decapitalize
    ward_df = ward_df.rename(columns={'REGISTERED VOTERS': 'Registered Voters', 'BALLOTS CAST': 'Ballots Cast', 
                                      'WARD': 'Number of Races', 'VOTES': 'Sum of Votes'})
    
    #Ward column becomes an index column in the groupby; this resets it as it's own column to merge on
    ward_df.reset_index(inplace=True)
    
    return ward_df

For subcircuits

In [19]:
def create_subcircuit_ward_df(subcircuit_races_df):
    subcircuit_ward_df = subcircuit_races_df.groupby(by=['WARD']).agg({'REGISTERED VOTERS': 'first',
                                                                       'BALLOTS CAST': 'first','WARD':'count',
                                                                       'VOTES': 'sum'})
    subcircuit_ward_df = subcircuit_ward_df.rename(columns={'REGISTERED VOTERS': 'Registered Voters', 
                                                            'BALLOTS CAST': 'Ballots Cast', 
                                                            'WARD': 'Number of Subcircuit Races', 
                                                            'VOTES': 'Sum of Subcircuit Votes'})
    
    subcircuit_ward_df.reset_index(inplace=True)
    
    return subcircuit_ward_df

For circuits

In [20]:
def create_circuit_ward_df(circuit_races_df):
    circuit_ward_df = circuit_races_df.groupby(by=['WARD']).agg({'REGISTERED VOTERS': 'first',
                                                                       'BALLOTS CAST': 'first','WARD':'count',
                                                                       'VOTES': 'sum'})
    circuit_ward_df = circuit_ward_df.rename(columns={'REGISTERED VOTERS': 'Registered Voters', 
                                                            'BALLOTS CAST': 'Ballots Cast', 
                                                            'WARD': 'Number of Circuit Races', 
                                                            'VOTES': 'Sum of Circuit Votes'})
    
    circuit_ward_df.reset_index(inplace=True)
    
    return circuit_ward_df

Prepare demographics data and clean and join to final df

In [21]:
#Pass a final_df and demo_df
#Returns a new joined_df with the cleaned demos joined to final
def prep_demos(final_df,demo_df):
    #Add calculated VAP column
    demo_df['Citizens Over 18'] = demo_df['Native Born, Male, Over 18'] + demo_df['Naturalized U.S. Citizen, Male, Over 18'] + demo_df['Native Born, female, Over 18'] + demo_df['Naturalized U.S. Citizen, Female, Over 18']
    
    #Add calculated race columns
    demo_df['Pct White'] = demo_df['Not Hispanic or Latino Origin, Whites']/demo_df['Total Population']
    demo_df['Pct Black'] = demo_df['Not Hispanic or Latino Origin, Blacks']/demo_df['Total Population']
    demo_df['Pct Latino'] = demo_df['Hispanic or Latino Origin']/demo_df['Total Population']
    demo_df['Pct Asian'] = demo_df['Not Hispanic or Latino Origin, Asians']/demo_df['Total Population']
    
    #Drop all absolute race columms, disaggregated citizen, income and household stuff, and number of tracts
    demo_df = demo_df.drop(['Native Born, Male, Over 18','Naturalized U.S. Citizen, Male, Over 18',
                            'Native Born, female, Over 18','Naturalized U.S. Citizen, Female, Over 18',
                           'number of tracts','Not Hispanic or Latino Origin, Whites','Not Hispanic or Latino Origin, Blacks',
                           'Hispanic or Latino Origin','Not Hispanic or Latino Origin, Asians',
                            'Aggregate household income in the past 12 months','Total households',
                           'mean household income'],axis=1)
    
    #Join demographics with final
    final_df['WARD'].astype(int)
    demo_df['ward'].astype(int)
    joined_df = pd.merge(final_df, demo_df, how='inner', left_on = 'WARD', right_on = 'ward')
    
    return joined_df

Max votes is the maximum number of votes if 100% of people who cast a ballot in an election voted in 100% of judicial races.

Theoretically possible votes is the maximium number of votes if 100% of registered voters voted in 100% of judicial races.

Judicial participation rate which is the inverse of the roll-off rate.

Turnout is ballots cast over total registered voters.

In [32]:
#Pass joined_df
#Returns a full_df which is joined_df with additional calculated columns
def add_calculations(joined_df):
    #Make a deep copy of joined_df so as to not modify that
    full_df = joined_df.copy()
    
    #Max votes
    full_df['Max Votes ALL'] = full_df['Ballots Cast']*full_df['Number of Races']
    full_df['Max Votes SUBCIRCUIT'] = full_df['Ballots Cast']*full_df['Number of Subcircuit Races']
    full_df['Max Votes CIRCUIT'] = full_df['Ballots Cast']*full_df['Number of Circuit Races']
    
    #Theoretical votes
    full_df['Theoretical Votes ALL'] = full_df['Citizens Over 18']*full_df['Number of Races']
    full_df['Theoretical Votes SUBCIRCUIT'] = full_df['Citizens Over 18']*full_df['Number of Subcircuit Races']
    full_df['Theoretical Votes CIRCUIT'] = full_df['Citizens Over 18']*full_df['Number of Circuit Races']
    
    #Judicial participation
    full_df['Judicial Participation ALL'] = full_df['Sum of Votes']/full_df['Max Votes ALL']
    full_df['Judicial Participation SUBCIRCUIT'] = full_df['Sum of Subcircuit Votes']/full_df['Max Votes SUBCIRCUIT']
    full_df['Judicial Participation CIRCUIT'] = full_df['Sum of Circuit Votes']/full_df['Max Votes CIRCUIT']
    
    #Turnout for judicial
    full_df['Judicial Turnout ALL'] = full_df['Sum of Votes']/full_df['Theoretical Votes ALL']
    full_df['Judicial Turnout SUBCIRCUIT'] = full_df['Sum of Subcircuit Votes']/full_df['Theoretical Votes SUBCIRCUIT']
    full_df['Judicial Turnout CIRCUIT'] = full_df['Sum of Circuit Votes']/full_df['Theoretical Votes CIRCUIT']
    
    #Overall Turnout
    full_df['Overall Turnout'] = full_df['Ballots Cast']/full_df['Registered Voters']
    
    return full_df

## Loop through all years

Establish pathanmes to batch download

In [33]:
judicial_list = ['/Users/amy/Code/injustice_watch/Judicial General Data/Dem Primaries/judicial_democratic_primary_2006.csv',
                '/Users/amy/Code/injustice_watch/Judicial General Data/Dem Primaries/judicial_democratic_primary_2008.csv',
                '/Users/amy/Code/injustice_watch/Judicial General Data/Dem Primaries/judicial_democratic_primary_2010.csv',
                '/Users/amy/Code/injustice_watch/Judicial General Data/Dem Primaries/judicial_democratic_primary_2012.csv',
                '/Users/amy/Code/injustice_watch/Judicial General Data/Dem Primaries/judicial_democratic_primary_2014.csv',
                '/Users/amy/Code/injustice_watch/Judicial General Data/Dem Primaries/judicial_democratic_primary_2016.csv',
                '/Users/amy/Code/injustice_watch/Judicial General Data/Dem Primaries/judicial_democratic_primary_2018.csv',
                '/Users/amy/Code/injustice_watch/Judicial General Data/Dem Primaries/judicial_democratic_primary_2020.csv']

demographics_list = ['/Users/amy/Code/injustice_watch/Judicial General Data/Dem Primaries/Demographics/ward_2006_primary.csv',
                    '/Users/amy/Code/injustice_watch/Judicial General Data/Dem Primaries/Demographics/ward_2008_primary.csv',
                    '/Users/amy/Code/injustice_watch/Judicial General Data/Dem Primaries/Demographics/ward_2010_primary.csv',
                    '/Users/amy/Code/injustice_watch/Judicial General Data/Dem Primaries/Demographics/ward_2012_primary.csv',
                    '/Users/amy/Code/injustice_watch/Judicial General Data/Dem Primaries/Demographics/ward_2014_primary.csv',
                     '/Users/amy/Code/injustice_watch/Judicial General Data/Dem Primaries/Demographics/ward_2016_primary.csv',
                    '/Users/amy/Code/injustice_watch/Judicial General Data/Dem Primaries/Demographics/ward_2018_primary.csv',
                    '/Users/amy/Code/injustice_watch/Judicial General Data/Dem Primaries/Demographics/ward_2020_primary.csv']

Loop through all years. Return a dictionary with the year and the df.

In [34]:
#Create two dictionary to store results in
full_df_dict = {}
short_df_dict = {}

#Create a counter to index the position of the items in judicial_list and demographics_list
index = 0

#Create a list of years to index for the file names
years = ['2006','2008','2010','2012','2014','2016','2018','2020']

#Loop through years and call on functions above
for pathname in judicial_list:
    print(index)
    
    #Load data
    df = pd.read_csv(pathname)
    
    #Clean data
    df = tag_subcircuit(df)
    
    #Create judicial races dataframes
    all_races_df = create_all_races(df)
    subcircuit_races_df = all_races_df[all_races_df['Subcircuit'] == 'Subcircuit']
    circuit_races_df = all_races_df[all_races_df['Subcircuit'] == 'Not Subcircuit']
    
    #Create ward dataframes
    ward_df = create_ward_df(all_races_df)
    subcircuit_ward_df = create_subcircuit_ward_df(subcircuit_races_df)
    circuit_ward_df = create_circuit_ward_df(circuit_races_df)
    
    #print('subcircuit ward')
    #print(subcircuit_ward_df.head())
    
    #Join just the avg votes by retention/vacancy and subcircuit/circuit to the ward_df to create an analysis_df
    merge1 = pd.merge(ward_df, subcircuit_ward_df.drop(['Registered Voters','Ballots Cast'],axis=1), how='outer', left_on = 'WARD', right_on = 'WARD')
    final_df = pd.merge(merge1, circuit_ward_df.drop(['Registered Voters','Ballots Cast'],axis=1), how='outer', left_on = 'WARD', right_on = 'WARD')
    
    #print('final df')
    #print(final_df.head())
    
    #Load demographics data
    demo_df = pd.read_csv(demographics_list[index])
    
    #Clean and join demographics data
    joined_df = prep_demos(final_df,demo_df)
    
    #Add calculations
    full_df = add_calculations(joined_df)
    
    #Replace NaN's from merges
    full_df = full_df.fillna('')
    
    #Create a short version with just demos and calculated columns by ward and not max/theoretical
    short_df = full_df.copy()
    short_df = short_df.drop(['ward','Theoretical Votes ALL',
                             'Theoretical Votes SUBCIRCUIT','Theoretical Votes CIRCUIT',
                             'Max Votes ALL','Max Votes SUBCIRCUIT',
                             'Max Votes CIRCUIT'],axis=1)
    
    #Add this year's dataframe to the dictionary with the year as the key
    current_year = years[index]
    print(current_year)
    full_df_dict[current_year] = full_df
    short_df_dict[current_year] = short_df
    
    #Update counter
    index+=1

0
2006
1
2008
2
2010
3
2012
4
2014
5
2016
6
2018
7
2020


In [40]:
#Inspect
test = short_df_dict['2010']
test.head()

Unnamed: 0,WARD,Registered Voters,Ballots Cast,Number of Races,Sum of Votes,Number of Subcircuit Races,Sum of Subcircuit Votes,Number of Circuit Races,Sum of Circuit Votes,Total Population,...,Pct Black,Pct Latino,Pct Asian,Judicial Participation ALL,Judicial Participation SUBCIRCUIT,Judicial Participation CIRCUIT,Judicial Turnout ALL,Judicial Turnout SUBCIRCUIT,Judicial Turnout CIRCUIT,Overall Turnout
0,1,30837,4933,11,36495,,,11,36495,54613,...,0.056433,0.358907,0.034607,0.672558,,0.672558,0.084204,,0.084204,0.15997
1,2,38872,9049,12,71480,1.0,4.0,11,71476,56360,...,0.419163,0.057488,0.12351,0.658268,0.000442038,0.71807,0.14068,9.44688e-05,0.15346,0.23279
2,3,24082,6382,12,51226,1.0,86.0,11,51140,36741,...,0.788846,0.064587,0.038622,0.668886,0.0134754,0.728469,0.169593,0.00341663,0.1847,0.265011
3,4,29166,10070,11,82429,,,11,82429,41945,...,0.742091,0.027608,0.062582,0.744146,,0.744146,0.253529,,0.253529,0.345265
4,5,29653,8770,14,71608,3.0,352.0,11,71256,41018,...,0.769272,0.023648,0.030621,0.583222,0.0133789,0.738634,0.173491,0.00397983,0.219721,0.295754


## Export to csv as 'primary df [YEAR]'

Short df

In [41]:
for key,value in short_df_dict.items():
    pathname = '/Users/amy/Code/injustice_watch/analysis/primary df_' + key + '.csv'
    value.to_csv(pathname)