In [230]:
import csv
import collections
import os
import pickle

In [272]:
hor_results = collections.defaultdict(lambda: collections.defaultdict(collections.Counter))
precinct_results = collections.defaultdict(
        lambda: collections.defaultdict(
            collections.Counter
        )
)
fieldnames = ["county_abbr", "county_name", "unk", "election_date", "election", "precinct_number", 
              "polling_location", "unk", "unk", "unk", "unk", "ballot_measure", "district", "unk", 
              "candidate", "party", "unk", "unk", "vote_count"]

directory = "data"
for subdir, dirs, files in os.walk(directory):
    for f in files:
        if "2012" not in f:
            try:
                if f.endswith('.txt'):
                    with open(os.path.join(subdir, f), encoding = "ISO-8859-1") as csvfile:
                        dr = csv.DictReader(csvfile, fieldnames=fieldnames, delimiter='\t')
                        for row in dr:
                            if row["ballot_measure"] == "U.S. Representative" or row["ballot_measure"] == "Representative in Congress":
                                hor_results[row["election"]][row["district"]][row["party"]] += int(row["vote_count"])
                                precinct_number = int(float(row["precinct_number"]))

                                # The dataset subdivides precincts for some elections but not for others.
                                # To get precinct data across all elections, we unify the subdivisions.
                                precinct_results[
                                    row["county_abbr"] + str(precinct_number)][
                                    row["election"]][
                                    row["party"]] += int(row["vote_count"])
                                precinct_results[
                                    row["county_abbr"] + str(precinct_number)][
                                    row["election"]]["district"] = row["district"]
            except Exception as e:
                print("Corrupt file: {} - {}".format(f, e))


Corrupt file: BAK_PctResults20141104.txt - could not convert string to float: '1A'
Corrupt file: BRO_PctResults20141104.txt - could not convert string to float: 'A001'
Corrupt file: GIL_PctResults20141104.txt - could not convert string to float: '1A'
Corrupt file: UNI_PctResults20141104.txt - could not convert string to float: '1A & 1B'
Corrupt file: BAK_PctResults20161108.txt - could not convert string to float: '1A'
Corrupt file: BRO_PctResults20161108.txt - could not convert string to float: 'A001'
Corrupt file: GIL_PctResults20161108.txt - could not convert string to float: '1A'
Corrupt file: UNI_PctResults20161108.txt - could not convert string to float: '1A & 1B'


In [273]:
two_years_results = {key: results for key, results in precinct_results.items() if len(results) == 2}

In [274]:
to_del = set()
# We remove small precincts from the analysis
for key, value in two_years_results.items():
    for election, results in value.items():
        if results["DEM"] <= 5 and results["REP"] <= 5:
            to_del.add(key)

for key in to_del:
    del two_years_results[key]

In [275]:
for key, value in two_years_results.items():
    for election, results in value.items():
        if "2012 General Election" not in election:
            two_years_results[key][election]["total"] = results["DEM"] + results["REP"]
            two_years_results[key][election]["ratio"] = results["DEM"] / (results["DEM"] + results["REP"])
    total_dem_votes = sum([int(votes["DEM"]) for votes in value.values()])
    total_rep_votes = sum([int(votes["REP"]) for votes in value.values()])
    two_years_results[key]["ratio"] = total_dem_votes / (total_dem_votes + total_rep_votes)
    total_2014_votes = value["2014 General Election"]["total"]
    total_2016_votes = value["2016 General Election"]["total"]
    voter_increase = max(0, total_2016_votes - total_2014_votes)
    two_years_results[key]["voter_increase"] = voter_increase
    two_years_results[key]["midterm_voter_percentage"] = float(total_2014_votes)/total_2016_votes
    two_years_results[key]["dem_missing_voters"] = two_years_results[key]["ratio"] * voter_increase
    two_years_results[key]["rep_missing_voters"] = (1 - two_years_results[key]["ratio"]) * voter_increase
    two_years_results[key]["dem_delta"] = two_years_results[key]["dem_missing_voters"] - two_years_results[key]["rep_missing_voters"]
    two_years_results[key]["new_d_voter_odds"] = two_years_results[key]["dem_delta"]/value["2016 General Election"]["total"]

In [269]:
competitive_districts = [6, 15, 16, 18, 25, 26, 27]
comp_dist_names = [" District {}".format(dist) for dist in competitive_districts]
districts_dict = collections.defaultdict(dict)
for district_name in comp_dist_names:
    district_results = {key: value for key, value in two_years_results.items() if value["2016 General Election"]["district"] == district_name}
    districts_dict[district_name] = sorted(district_results.items(), key=lambda x: x[1]["dem_delta"]/x[1]["2016 General Election"]["total"], reverse=True)
    

In [249]:

for district, results in districts_dict.items():
    print(district)
    print(results[0])
    print()
    

 District 6
('VOL620', defaultdict(<class 'collections.Counter'>, {'2014 General Election': Counter({'REP': 6, 'district': ' District 6', 'DEM': 383, ' ': 10, 'total': 389, 'ratio': 0.9845758354755784}), '2016 General Election': Counter({'REP': 55, 'district': ' District 6', 'DEM': 1343, ' ': 57, 'total': 1398, 'ratio': 0.9606580829756796}), 'ratio': 0.965864577504197, 'voter_increase': 1009, 'midterm_voter_percentage': 0.2782546494992847, 'dem_missing_voters': 974.5573587017348, 'rep_missing_voters': 34.442641298265265, 'dem_delta': 940.1147174034695}))

 District 15
('HIL753', defaultdict(<class 'collections.Counter'>, {'2014 General Election': Counter({'REP': 372, 'district': ' District 15', 'DEM': 960, ' ': 64, 'total': 1332, 'ratio': 0.7207207207207207}), '2016 General Election': Counter({'REP': 691, 'district': ' District 15', 'DEM': 1522, ' ': 149, 'total': 2213, 'ratio': 0.6877541798463624}), 'ratio': 0.7001410437235543, 'voter_increase': 881, 'midterm_voter_percentage': 0.6018

In [262]:
fieldnames = [
    "Precinct", 
    "DEM votes 2014", 
    "REP votes 2014", 
    "DEM votes 2016", 
    "REP votes 2016", 
    "DEM vote share", 
    "midterm voter percentage", 
    "DEM missing voters", 
    "REP missing voters", 
    "DEM potential gain"
]
def dict_writer_formatter(pickups_list):
    output_rows = []
    for precinct_data in pickups_list:
        results = precinct_data[1]
        row_dict = {}
        row_dict["Precinct"] = precinct_data[0]
        row_dict["DEM votes 2014"] = results["2014 General Election"]["DEM"]
        row_dict["REP votes 2014"] = results["2014 General Election"]["REP"]
        row_dict["DEM votes 2016"] = results["2016 General Election"]["DEM"]
        row_dict["REP votes 2016"] = results["2016 General Election"]["REP"]
        row_dict["DEM vote share"] = results["ratio"]
        row_dict["midterm voter percentage"] = results["midterm_voter_percentage"]
        row_dict["DEM missing voters"] = results["dem_missing_voters"]
        row_dict["REP missing voters"] = results["rep_missing_voters"]
        row_dict["DEM potential gain"] = results["dem_delta"]
        output_rows.append(row_dict)
    return output_rows
    

In [263]:
dict_writer_formatter(districts_dict[" District 6"])

[{'Precinct': 'VOL620',
  'DEM votes 2014': 383,
  'REP votes 2014': 6,
  'DEM votes 2016': 1343,
  'REP votes 2016': 55,
  'DEM vote share': 0.965864577504197,
  'midterm voter percentage': 0.2782546494992847,
  'DEM missing voters': 974.5573587017348,
  'REP missing voters': 34.442641298265265,
  'DEM potential gain': 940.1147174034695},
 {'Precinct': 'VOL614',
  'DEM votes 2014': 1025,
  'REP votes 2014': 236,
  'DEM votes 2016': 1881,
  'REP votes 2016': 496,
  'DEM vote share': 0.7987905442550852,
  'midterm voter percentage': 0.5305006310475389,
  'DEM missing voters': 891.450247388675,
  'REP missing voters': 224.5497526113249,
  'DEM potential gain': 666.9004947773501},
 {'Precinct': 'VOL628',
  'DEM votes 2014': 1080,
  'REP votes 2014': 231,
  'DEM votes 2016': 1813,
  'REP votes 2016': 447,
  'DEM vote share': 0.8101372164659759,
  'midterm voter percentage': 0.5800884955752212,
  'DEM missing voters': 768.8202184262111,
  'REP missing voters': 180.17978157378886,
  'DEM pot

In [267]:
for dist_name in comp_dist_names:
    with open("dist_table{}.csv".format(dist_name.replace(" ", "_")), 'w') as csvfile:
        dictwriter = csv.DictWriter(csvfile, fieldnames)
        dictwriter.writeheader()
        for row in dict_writer_formatter(districts_dict[dist_name]):
            dictwriter.writerow(row)

In [270]:

for district, results in districts_dict.items():
    print(district)
    print(results[0])
    print()
    

 District 6
('VOL620', defaultdict(<class 'collections.Counter'>, {'2014 General Election': Counter({'REP': 6, 'district': ' District 6', 'DEM': 383, ' ': 10, 'total': 389, 'ratio': 0.9845758354755784}), '2016 General Election': Counter({'REP': 55, 'district': ' District 6', 'DEM': 1343, ' ': 57, 'total': 1398, 'ratio': 0.9606580829756796}), 'ratio': 0.965864577504197, 'voter_increase': 1009, 'midterm_voter_percentage': 0.2782546494992847, 'dem_missing_voters': 974.5573587017348, 'rep_missing_voters': 34.442641298265265, 'dem_delta': 940.1147174034695}))

 District 15
('HIL964', defaultdict(<class 'collections.Counter'>, {'2014 General Election': Counter({'REP': 23, 'district': ' District 15', 'DEM': 103, ' ': 5, 'total': 126, 'ratio': 0.8174603174603174}), '2016 General Election': Counter({'REP': 49, 'district': ' District 15', 'DEM': 235, ' ': 15, 'total': 284, 'ratio': 0.8274647887323944}), 'ratio': 0.824390243902439, 'voter_increase': 158, 'midterm_voter_percentage': 0.443661971830