Skip to content

Commit

Permalink
Loading baseline poll data using the interpolator.
Browse files Browse the repository at this point in the history
  • Loading branch information
j3camero committed Sep 30, 2015
1 parent bc6bff7 commit 188a87c
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 52 deletions.
41 changes: 35 additions & 6 deletions regional_baseline.csv
Original file line number Diff line number Diff line change
@@ -1,6 +1,35 @@
,ATL,QC,ON,SK_MB,AB,BC,Canada
cpc,37.46139295,16.5,44.4,54.7875,66.8,45.5,39.6
lpc,29.58301806,14.2,25.3,12.92142857,9.3,13.4,18.9
ndp,29.75,42.9,25.6,28.78883929,16.8,32.5,30.6
gpc,2.960361135,2.1,3.8,3.186160714,5.3,7.7,3.9
bq,0,23.4,0,0,0,0,0
2011-05-02,Canada,ndp,30.6
2011-05-02,Canada,cpc,39.6
2011-05-02,Canada,lpc,18.9
2011-05-02,Canada,bq,0.0
2011-05-02,Canada,gpc,3.9
2011-05-02,ON,ndp,25.6
2011-05-02,ON,cpc,44.4
2011-05-02,ON,lpc,25.3
2011-05-02,ON,bq,0.0
2011-05-02,ON,gpc,3.8
2011-05-02,AB,ndp,16.8
2011-05-02,AB,cpc,66.8
2011-05-02,AB,lpc,9.3
2011-05-02,AB,bq,0.0
2011-05-02,AB,gpc,5.3
2011-05-02,SK_MB,ndp,28.78883929
2011-05-02,SK_MB,cpc,54.7875
2011-05-02,SK_MB,lpc,12.92142857
2011-05-02,SK_MB,bq,0.0
2011-05-02,SK_MB,gpc,3.186160714
2011-05-02,BC,ndp,32.5
2011-05-02,BC,cpc,45.5
2011-05-02,BC,lpc,13.4
2011-05-02,BC,bq,0.0
2011-05-02,BC,gpc,7.7
2011-05-02,ATL,ndp,29.75
2011-05-02,ATL,cpc,37.46139295
2011-05-02,ATL,lpc,29.58301806
2011-05-02,ATL,bq,0.0
2011-05-02,ATL,gpc,2.960361135
2011-05-02,QC,ndp,42.9
2011-05-02,QC,cpc,16.5
2011-05-02,QC,lpc,14.2
2011-05-02,QC,bq,23.4
2011-05-02,QC,gpc,2.1
31 changes: 31 additions & 0 deletions regional_poll_interpolator.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,13 @@
import datetime

class RegionalPollInterpolator(object):
"""Routines for interpolating a series of poll data."""

def __init__(self):
self.series_by_region_then_party = dict()

def LoadFromCsv(self, csv_filename):
"""Adds data from a csv file. Can be called multiple times."""
with open(csv_filename) as csv_file:
csv_reader = csv.reader(csv_file)
for row in csv_reader:
Expand All @@ -24,6 +27,7 @@ def LoadFromCsv(self, csv_filename):
series.sort()

def Interpolate(self, region, party, date):
"""Interpolate the loaded poll data."""
try:
series = self.series_by_region_then_party[region][party]
except:
Expand All @@ -41,7 +45,34 @@ def Interpolate(self, region, party, date):
return 0

def GetMostRecent(self, region, party):
"""Returns the most recent data point for a region and party."""
try:
return self.series_by_region_then_party[region][party][-1][1]
except:
return 0

def UniformSwingProjection(self, region, begin_date, begin_vector):
"""Projects forward a vector of popular votes."""
projection = {}
for party in begin_vector:
old_poll = self.Interpolate(region, party, begin_date)
new_poll = self.GetMostRecent(region, party)
projection[party] = begin_vector[party] + new_poll - old_poll
return projection

def ProportionalSwingProjection(self, region, begin_date, begin_vector):
"""Projects forward a vector of popular votes."""
projection = {}
for party in begin_vector:
old_poll = self.Interpolate(region, party, begin_date)
new_poll = self.GetMostRecent(region, party)
if old_poll > 0:
gain = new_poll / old_poll
else:
gain = 1
projection[party] = begin_vector[party] * gain
# Normalize so the projections sum to 1.
divisor = sum(projection.values())
for k, v in projection.items():
projection[k] = v / divisor
return projection
50 changes: 4 additions & 46 deletions riding_forecast.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import csv
import sys
import unicodedata
import datetime

from scipy.stats import norm

Expand Down Expand Up @@ -83,44 +82,6 @@ def WhichProvince(s):
return abbr
return None

def LoadMatrix(filename):
"""Loads a table of numbers from a CSV file.
The table of numbers should have labeled columns and rows. The first row
of the CSV file will contain column labels. The first cell in each row
thereafter will be a label for that row. The first column of the first
row must be blank. All other cells in the CSV file should contain numbers.
The returned table is indexed first by column label then by row label.
Example file format:
,ColumnOne,ColumnTwo
RowOne,1,2
RowTwo,3,4
Example usage:
m = LoadMatrix('example.csv')
print m['ColumnTwo']['RowOne']
# Prints 2
"""
matrix = {}
with open(filename) as csv_file:
reader = csv.DictReader(csv_file)
for row in reader:
row_label = row['']
for column_label, value in row.items():
if not column_label:
continue
if column_label not in matrix:
matrix[column_label] = {}
try:
value = float(value)
except:
# Blank values default to zero.
value = 0
matrix[column_label][row_label] = value
return matrix

def NormalizeDictVector(d):
"""Adjusts numerical values so they add up to 1."""
normalized = {}
Expand All @@ -146,14 +107,11 @@ def KeyWithHighestValue(d, forbidden_keys=[]):
mv = v
return mk

def RemoveAccentsFromText(s):
return ''.join((c for c in unicodedata.normalize('NFD', s)
if unicodedata.category(c) != 'Mn'))

# Load regional polling data.
regional_support_before = LoadMatrix('regional_baseline.csv')
interpolator = RegionalPollInterpolator()
interpolator.LoadFromCsv('regional_poll_averages.csv')
interpolator.LoadFromCsv('regional_baseline.csv')
baseline_date = datetime.datetime(2011, 5, 2)

# Load and process per-riding election results from 2011.
old_ridings = {}
Expand All @@ -169,7 +127,7 @@ def RemoveAccentsFromText(s):
province = WhichProvince(row['Province'])
region = WhichRegion(row['Province'])
assert region
before = regional_support_before[region][party]
before = interpolator.Interpolate(region, party, baseline_date)
after = interpolator.GetMostRecent(region, party)
projected_gain = after / before
projection = popular_vote * projected_gain
Expand Down

0 comments on commit 188a87c

Please sign in to comment.