diff --git a/regional_baseline.csv b/regional_baseline.csv index fe2b014..d1e7cb0 100644 --- a/regional_baseline.csv +++ b/regional_baseline.csv @@ -1,6 +1,35 @@ -,ATL,QC,ON,SK_MB,AB,BC,Canada -cpc,37.46139295,16.5,44.4,54.7875,66.8,45.5,39.6 -lpc,29.58301806,14.2,25.3,12.92142857,9.3,13.4,18.9 -ndp,29.75,42.9,25.6,28.78883929,16.8,32.5,30.6 -gpc,2.960361135,2.1,3.8,3.186160714,5.3,7.7,3.9 -bq,0,23.4,0,0,0,0,0 +2011-05-02,Canada,ndp,30.6 +2011-05-02,Canada,cpc,39.6 +2011-05-02,Canada,lpc,18.9 +2011-05-02,Canada,bq,0.0 +2011-05-02,Canada,gpc,3.9 +2011-05-02,ON,ndp,25.6 +2011-05-02,ON,cpc,44.4 +2011-05-02,ON,lpc,25.3 +2011-05-02,ON,bq,0.0 +2011-05-02,ON,gpc,3.8 +2011-05-02,AB,ndp,16.8 +2011-05-02,AB,cpc,66.8 +2011-05-02,AB,lpc,9.3 +2011-05-02,AB,bq,0.0 +2011-05-02,AB,gpc,5.3 +2011-05-02,SK_MB,ndp,28.78883929 +2011-05-02,SK_MB,cpc,54.7875 +2011-05-02,SK_MB,lpc,12.92142857 +2011-05-02,SK_MB,bq,0.0 +2011-05-02,SK_MB,gpc,3.186160714 +2011-05-02,BC,ndp,32.5 +2011-05-02,BC,cpc,45.5 +2011-05-02,BC,lpc,13.4 +2011-05-02,BC,bq,0.0 +2011-05-02,BC,gpc,7.7 +2011-05-02,ATL,ndp,29.75 +2011-05-02,ATL,cpc,37.46139295 +2011-05-02,ATL,lpc,29.58301806 +2011-05-02,ATL,bq,0.0 +2011-05-02,ATL,gpc,2.960361135 +2011-05-02,QC,ndp,42.9 +2011-05-02,QC,cpc,16.5 +2011-05-02,QC,lpc,14.2 +2011-05-02,QC,bq,23.4 +2011-05-02,QC,gpc,2.1 diff --git a/regional_poll_interpolator.py b/regional_poll_interpolator.py index a43adce..6b24d20 100644 --- a/regional_poll_interpolator.py +++ b/regional_poll_interpolator.py @@ -2,10 +2,13 @@ import datetime class RegionalPollInterpolator(object): + """Routines for interpolating a series of poll data.""" + def __init__(self): self.series_by_region_then_party = dict() def LoadFromCsv(self, csv_filename): + """Adds data from a csv file. Can be called multiple times.""" with open(csv_filename) as csv_file: csv_reader = csv.reader(csv_file) for row in csv_reader: @@ -24,6 +27,7 @@ def LoadFromCsv(self, csv_filename): series.sort() def Interpolate(self, region, party, date): + """Interpolate the loaded poll data.""" try: series = self.series_by_region_then_party[region][party] except: @@ -41,7 +45,34 @@ def Interpolate(self, region, party, date): return 0 def GetMostRecent(self, region, party): + """Returns the most recent data point for a region and party.""" try: return self.series_by_region_then_party[region][party][-1][1] except: return 0 + + def UniformSwingProjection(self, region, begin_date, begin_vector): + """Projects forward a vector of popular votes.""" + projection = {} + for party in begin_vector: + old_poll = self.Interpolate(region, party, begin_date) + new_poll = self.GetMostRecent(region, party) + projection[party] = begin_vector[party] + new_poll - old_poll + return projection + + def ProportionalSwingProjection(self, region, begin_date, begin_vector): + """Projects forward a vector of popular votes.""" + projection = {} + for party in begin_vector: + old_poll = self.Interpolate(region, party, begin_date) + new_poll = self.GetMostRecent(region, party) + if old_poll > 0: + gain = new_poll / old_poll + else: + gain = 1 + projection[party] = begin_vector[party] * gain + # Normalize so the projections sum to 1. + divisor = sum(projection.values()) + for k, v in projection.items(): + projection[k] = v / divisor + return projection diff --git a/riding_forecast.py b/riding_forecast.py index da632c3..6867426 100644 --- a/riding_forecast.py +++ b/riding_forecast.py @@ -1,6 +1,5 @@ import csv -import sys -import unicodedata +import datetime from scipy.stats import norm @@ -83,44 +82,6 @@ def WhichProvince(s): return abbr return None -def LoadMatrix(filename): - """Loads a table of numbers from a CSV file. - - The table of numbers should have labeled columns and rows. The first row - of the CSV file will contain column labels. The first cell in each row - thereafter will be a label for that row. The first column of the first - row must be blank. All other cells in the CSV file should contain numbers. - - The returned table is indexed first by column label then by row label. - - Example file format: - ,ColumnOne,ColumnTwo - RowOne,1,2 - RowTwo,3,4 - - Example usage: - m = LoadMatrix('example.csv') - print m['ColumnTwo']['RowOne'] - # Prints 2 - """ - matrix = {} - with open(filename) as csv_file: - reader = csv.DictReader(csv_file) - for row in reader: - row_label = row[''] - for column_label, value in row.items(): - if not column_label: - continue - if column_label not in matrix: - matrix[column_label] = {} - try: - value = float(value) - except: - # Blank values default to zero. - value = 0 - matrix[column_label][row_label] = value - return matrix - def NormalizeDictVector(d): """Adjusts numerical values so they add up to 1.""" normalized = {} @@ -146,14 +107,11 @@ def KeyWithHighestValue(d, forbidden_keys=[]): mv = v return mk -def RemoveAccentsFromText(s): - return ''.join((c for c in unicodedata.normalize('NFD', s) - if unicodedata.category(c) != 'Mn')) - # Load regional polling data. -regional_support_before = LoadMatrix('regional_baseline.csv') interpolator = RegionalPollInterpolator() interpolator.LoadFromCsv('regional_poll_averages.csv') +interpolator.LoadFromCsv('regional_baseline.csv') +baseline_date = datetime.datetime(2011, 5, 2) # Load and process per-riding election results from 2011. old_ridings = {} @@ -169,7 +127,7 @@ def RemoveAccentsFromText(s): province = WhichProvince(row['Province']) region = WhichRegion(row['Province']) assert region - before = regional_support_before[region][party] + before = interpolator.Interpolate(region, party, baseline_date) after = interpolator.GetMostRecent(region, party) projected_gain = after / before projection = popular_vote * projected_gain