# Simple state space model for 2pp voting intention

<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Python-setup" data-toc-modified-id="Python-setup-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Python setup</a></span><ul class="toc-item"><li><span><a href="#Set-up-Analysis-constants" data-toc-modified-id="Set-up-Analysis-constants-1.1"><span class="toc-item-num">1.1&nbsp;&nbsp;</span>Set up Analysis constants</a></span></li></ul></li><li><span><a href="#Load-2pp-data-and-prepare-for-analysis" data-toc-modified-id="Load-2pp-data-and-prepare-for-analysis-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Load 2pp data and prepare for analysis</a></span></li><li><span><a href="#The-model" data-toc-modified-id="The-model-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>The model</a></span><ul class="toc-item"><li><span><a href="#Fit-data-to-model" data-toc-modified-id="Fit-data-to-model-3.1"><span class="toc-item-num">3.1&nbsp;&nbsp;</span>Fit data to model</a></span></li><li><span><a href="#Diagnostics" data-toc-modified-id="Diagnostics-3.2"><span class="toc-item-num">3.2&nbsp;&nbsp;</span>Diagnostics</a></span></li><li><span><a href="#Plot-results" data-toc-modified-id="Plot-results-3.3"><span class="toc-item-num">3.3&nbsp;&nbsp;</span>Plot results</a></span></li></ul></li><li><span><a href="#Warnings" data-toc-modified-id="Warnings-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Warnings</a></span></li></ul></div>

## Python setup

In [1]:
# Data science imports
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.patheffects as pe
import arviz as az
import stan # install with pip - conda is problematic on the M1

# local imports
import common
from common import COLOR_COALITION, P_COLOR_COALITION

In [2]:
# report versions
print(f'numpy: {np.__version__}')
print(f'pandas: {pd.__version__}')
print(f'matplotlib: {mpl.__version__}')
print(f'stan: {stan.__version__}')
print(f'arviz: {az.__version__}')

numpy: 1.21.5
pandas: 1.3.5
matplotlib: 3.5.1
stan: 3.3.0
arviz: 0.11.4


In [3]:
# enable stan on jupyter notebook
import nest_asyncio
nest_asyncio.apply()

In [4]:
# path to the model data - assume it exists
DATA_DIR = '../data/'

In [5]:
# plotting stuff
plt.style.use('./markgraph.mplstyle')

### Set up Analysis constants

In [6]:
# Analysis based on this data ...
data_file = f'{DATA_DIR}primary_vote.csv' # f'{DATA_DIR}two_party_preferred.csv'
POLL_COLUMN = 'Calculated 2pp L/NP' #'2pp vote L/NP'

# Minimum polls requirement for a pollster
MIMIMUM_POLLS_FROM_POLLSTER = 2
# The one ANU poll since 2019 look like a rogue outlier, so let's exclude

# Plot left-side footnotes
LFOOTER = (
    'Polling data from Wikipedia; '
    '2pp calculated from primary votes and 2019 preference flows; '
    f'Pollsters ignored with < {MIMIMUM_POLLS_FROM_POLLSTER} polls.'
)

## Load 2pp data and prepare for analysis

In [7]:
# Check the data file has been updated today
common.check_file_current(data_file, 
    'Make sure you run the data capture notebook first')

In [8]:
# read in 2pp data; remind me, what are the column names ...
tpp = pd.read_csv(data_file, parse_dates=['Mean Date'], index_col=0)
tpp.columns

Index(['Primary vote L/NP', 'Primary vote ALP', 'Primary vote GRN',
       'Primary vote ONP', 'Primary vote OTH', 'Date', 'Mean Date', 'Brand',
       'Interview mode', 'Sample size', 'Calculated 2pp L/NP',
       'Calculated 2pp ALP'],
      dtype='object')

In [9]:
# Number of polls
len(tpp)

109

In [10]:
# Number of polls from each pollster
brand_counts = tpp['Brand'].value_counts()
brand_counts

Newspoll-YouGov                   40
Essential                         40
Roy Morgan                        20
Resolve Strategic                  8
Australian National University     1
Name: Brand, dtype: int64

In [11]:
# Remove pollsters with minimal polling history
keep = brand_counts[brand_counts >= MIMIMUM_POLLS_FROM_POLLSTER].index
tpp = tpp[tpp['Brand'].isin(keep)]
tpp['Brand'].value_counts()

Newspoll-YouGov      40
Essential            40
Roy Morgan           20
Resolve Strategic     8
Name: Brand, dtype: int64

In [12]:
len(tpp)

108

## The model

### Fit data to model

In [13]:
# this step typically takes around 3 minutes
fit, first_day, brand_map = common.bayes_poll_aggregation(tpp, 
                              poll_column=POLL_COLUMN,
                              date_column='Mean Date',
                              firm_column='Brand',
                              assumed_sample_size=1000,
                              num_chains=4,
                              num_samples=2_500) 

Stan version: 3.3.0
[36mBuilding:[0m 0.2s
[1A[0J[36mBuilding:[0m 0.3s
[1A[0J[36mBuilding:[0m 0.4s
[1A[0J[36mBuilding:[0m 0.5s


In file included from /Users/bryanpalmer/Library/Caches/httpstan/4.6.1/models/7jptub3f/model_7jptub3f.cpp:2:
In file included from /Users/bryanpalmer/miniforge3/lib/python3.9/site-packages/httpstan/include/stan/model/model_header.hpp:4:
In file included from /Users/bryanpalmer/miniforge3/lib/python3.9/site-packages/httpstan/include/stan/math.hpp:19:
In file included from /Users/bryanpalmer/miniforge3/lib/python3.9/site-packages/httpstan/include/stan/math/rev.hpp:4:
In file included from /Users/bryanpalmer/miniforge3/lib/python3.9/site-packages/httpstan/include/stan/math/prim/fun/Eigen.hpp:22:
In file included from /Users/bryanpalmer/miniforge3/lib/python3.9/site-packages/httpstan/include/Eigen/Dense:1:
In file included from /Users/bryanpalmer/miniforge3/lib/python3.9/site-packages/httpstan/include/Eigen/Core:96:
In file included from /Users/bryanpalmer/miniforge3/bin/../include/c++/v1/complex:237:
/Users/bryanpalmer/miniforge3/bin/../include/c++/v1/cmath:321:9: error: no member named '

[1A[0J[36mBuilding:[0m 0.6s
[1A[0J[36mBuilding:[0m 0.7s
[1A[0J[36mBuilding:[0m 0.8s
[1A[0J[36mBuilding:[0m 0.9s
[1A[0J[36mBuilding:[0m 1.0s
[1A[0J[36mBuilding:[0m 1.1s
[1A[0J[36mBuilding:[0m 1.2s
[1A[0J[36mBuilding:[0m 1.3s
[1A[0J[36mBuilding:[0m 1.4s
[1A[0J[36mBuilding:[0m 1.5s
[1A[0J[36mBuilding:[0m 1.6s
[1A[0J[36mBuilding:[0m 1.7s
[1A[0J[36mBuilding:[0m 1.8s
[1A[0J[36mBuilding:[0m 1.9s
[1A[0J[36mBuilding:[0m 2.0s
[1A[0J[36mBuilding:[0m 2.1s
[1A[0J[36mBuilding:[0m 2.2s
[1A[0J[36mBuilding:[0m 2.3s
[1A[0J[36mBuilding:[0m 2.4s
[1A[0J[36mBuilding:[0m 2.5s
[1A[0J[36mBuilding:[0m 2.6s
[1A[0J[36mBuilding:[0m 2.7s
[1A[0J[36mBuilding:[0m 2.8s
[1A[0J[36mBuilding:[0m 2.9s
[1A[0J[36mBuilding:[0m 3.0s
[1A[0J[36mBuilding:[0m 3.1s
[1A[0J[36mBuilding:[0m 3.2s
[1A[0J[36mBuilding:[0m 3.3s
[1A[0J[36mBuilding:[0m 3.4s
[1A[0J[36mBuilding:[0m 3.5s
[1A[0J[36mBuilding:[0m 3.6s
[1A[0J

20 errors generated.


RuntimeError: Exception while building model extension module: `CompileError(DistutilsExecError("command '/Users/bryanpalmer/miniforge3/bin/x86_64-apple-darwin13.4.0-clang' failed with exit code 1"))`, traceback: `['  File "/Users/bryanpalmer/miniforge3/lib/python3.9/site-packages/httpstan/views.py", line 114, in handle_create_model\n    compiler_output = await httpstan.models.build_services_extension_module(program_code)\n', '  File "/Users/bryanpalmer/miniforge3/lib/python3.9/site-packages/httpstan/models.py", line 168, in build_services_extension_module\n    compiler_output = await asyncio.get_running_loop().run_in_executor(\n', '  File "/Users/bryanpalmer/miniforge3/lib/python3.9/asyncio/futures.py", line 284, in __await__\n    yield self  # This tells Task to wait for completion.\n', '  File "/Users/bryanpalmer/miniforge3/lib/python3.9/asyncio/tasks.py", line 328, in __wakeup\n    future.result()\n', '  File "/Users/bryanpalmer/miniforge3/lib/python3.9/asyncio/futures.py", line 201, in result\n    raise self._exception\n', '  File "/Users/bryanpalmer/miniforge3/lib/python3.9/concurrent/futures/thread.py", line 58, in run\n    result = self.fn(*self.args, **self.kwargs)\n', '  File "/Users/bryanpalmer/miniforge3/lib/python3.9/site-packages/httpstan/build_ext.py", line 80, in run_build_ext\n    build_extension.run()\n', '  File "/Users/bryanpalmer/miniforge3/lib/python3.9/site-packages/setuptools/_distutils/command/build_ext.py", line 339, in run\n    self.build_extensions()\n', '  File "/Users/bryanpalmer/miniforge3/lib/python3.9/site-packages/setuptools/_distutils/command/build_ext.py", line 448, in build_extensions\n    self._build_extensions_serial()\n', '  File "/Users/bryanpalmer/miniforge3/lib/python3.9/site-packages/setuptools/_distutils/command/build_ext.py", line 473, in _build_extensions_serial\n    self.build_extension(ext)\n', '  File "/Users/bryanpalmer/miniforge3/lib/python3.9/site-packages/setuptools/_distutils/command/build_ext.py", line 528, in build_extension\n    objects = self.compiler.compile(sources,\n', '  File "/Users/bryanpalmer/miniforge3/lib/python3.9/site-packages/setuptools/_distutils/ccompiler.py", line 574, in compile\n    self._compile(obj, src, ext, cc_args, extra_postargs, pp_opts)\n', '  File "/Users/bryanpalmer/miniforge3/lib/python3.9/site-packages/setuptools/_distutils/unixccompiler.py", line 120, in _compile\n    raise CompileError(msg)\n']`

### Diagnostics

In [None]:
az.summary(fit)

In [None]:
# Estimated Bayesian Fraction of Missing Information ...
# Good if close to one; bad if close to zero ...
bfmi = az.bfmi(fit)
if (bfmi <= 0.4).any():
    common.warn(f'Note: the BFMI could be problematic ({bfmi})')

In [None]:
# R-hat chain convergence diagnostic
# ESS - effective sample size (used to be n_eff)
RHAT_THRESHOLD = 1.05
ESS_THRESHOLD = 1000
rhat = az.rhat(fit)
ess = az.ess(fit)
for var in ['houseEffect', 'hidden_vote_share']:
    if (rhat[var] > RHAT_THRESHOLD).any():
        common.warn(f'Check {var} - chain convergence')
    if (ess[var] < ESS_THRESHOLD).any():
        common.warn(f'Check {var} - effective sample size')

In [None]:
az.plot_trace(fit, var_names=['houseEffect'])

### Plot results

In [None]:
common.bayes_poll_aggregation_plots(df=tpp, 
                                    fit=fit, 
                                    first_day=first_day, 
                                    brand_map=brand_map,
                                    poll_column=POLL_COLUMN,
                                    date_column='Mean Date',
                                    firm_column='Brand',
                                    party='Coalition',
                                    title='2pp Vote Share (state Space Model)',
                                    line_color=COLOR_COALITION,
                                    point_color=P_COLOR_COALITION,
                                    lfooter=LFOOTER)


## Warnings

In [None]:
common.print_warnings()