<a href="https://colab.research.google.com/github/ellenwterry/PoliticalAnalysis/blob/main/Bayesian_Campaign_Planning_Priors_Deep_Dive.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy import stats

!pip install nest-asyncio
import nest_asyncio
nest_asyncio.apply()

import patsy
from sklearn.linear_model import LogisticRegression

!pip install pystan
!pip install corner
import stan

import plotly.express as px
import plotly.graph_objects as go

!pip install geopy
from geopy.geocoders import Nominatim
import matplotlib.pyplot as plt
!pip install pygris
# import matplotlib.pyplot as plt
from pygris import core_based_statistical_areas
from pygris import tracts

from google.colab import files


import geopandas as gpd
import folium
# from google.colab import files


In [None]:
url = 'https://raw.githubusercontent.com/ellenwterry/PoliticalAnalysis/main/BaseVote.csv'
VoteBase = pd.read_csv(url)

In [None]:
# Clean up the data a bit

from sklearn import preprocessing
le = preprocessing.LabelEncoder()

le.fit(VoteBase['Sex'])

codes = {'NR':0, 'M':1, 'F': 2}
VoteBase['Sex'] = VoteBase['Sex'].map(codes)

VoteBase['Age']=VoteBase.Age.astype('int32')

#VoteBase['LastPrimary'] = le.transform(VoteBase['LastPrimary'])
codes = {'NR':0, 'R': 1, 'D':2}
VoteBase['LastPrimary'] = VoteBase['LastPrimary'].map(codes)

#VoteBase['Education'] = le.transform(VoteBase['Education'])
codes = {'NR':0, 'HS': 1, 'Some College':2, 'Bachelor':3, 'Masters':4, 'Doctorate':5}
VoteBase['Education'] = VoteBase['Education'].map(codes)

#VoteBase['HHIncome'] = le.transform(VoteBase['HHIncome'])
codes = {'NR':0, 'Under 50k': 1, '50k-100k':2, '100k-200k':3, '200k-300k':4, '300k-500k':5, 'Over 500k':6}
VoteBase['HHIncome'] = VoteBase['HHIncome'].map(codes)

#VoteBase['ReligiousAffil'] = le.transform(VoteBase['ReligiousAffil'])
codes = {'NR':0,'Protestant': 1, 'Catholic':2, 'Other':3, 'None':4}
VoteBase['ReligiousAffil'] = VoteBase['ReligiousAffil'].map(codes)

#VoteBase['Support24'] = le.transform(VoteBase['Support24'])
codes = {'R':0, 'D': 1}
VoteBase['Support24'] = VoteBase['Support24'].map(codes)
# NOTE: NAs were excluded from sample so that algorithms could score using logistic scale - 2nd pass will use imputed values

#VoteBase['TopIssue'] = le.transform(VoteBase['TopIssue'])
codes = {'NR':0, 'RFree':1, 'Crime':2, 'Parents':3, 'Economy':4, 'Womens':5, 'Education':6, 'Democracy':7}
VoteBase['TopIssue'] = VoteBase['TopIssue'].map(codes)

# This is for the second data source (later)
codes = {'NR':0, 'Signed':1}
VoteBase['RRPetition'] = VoteBase['RRPetition'].map(codes)

In [None]:
Xmatrix = patsy.dmatrix('Age + Sex + Education + HHIncome+ ReligiousAffil + LastPrimary + TopIssue + RRPetition', VoteBase)

In [None]:
Xmatrix[1154,]

array([ 1., 53.,  2.,  3.,  4.,  1.,  0.,  0.,  0.])

In [None]:
VoteBase.iloc[1154], VoteBase.iloc[464]

(ID                          1154
 LastName                  Nelson
 FirstName                   Lisa
 Address           45 HUSTED LANE
 Town                   Greenwich
 Sex                            F
 Age                           53
 LastPrimary                   NR
 TurnoutScore                0.11
 SupportScore                0.47
 Latitude                41.05322
 Longitude              -73.63014
 Education               Bachelor
 HHIncome               200k-300k
 ReligiousAffil        Protestant
 Support24                      D
 TopIssue                      NR
 RRPetition                    NR
 Name: 1154, dtype: object,
 ID                                 464
 LastName                      Fletcher
 FirstName                       Joseph
 Address           154 BYRAM SHORE ROAD
 Town                         Greenwich
 Sex                                  M
 Age                                 35
 LastPrimary                         NR
 TurnoutScore                      0.23


array([ 1., 40.,  0.,  3.,  4.,  2.,  0.,  0.])

In [None]:

#                                 Age              Sex           Education      HHIncome      ReligiousAffil LastPrimary   TopIssue      Petition
priors = np.array(  [-4.171538879, -0.007734962083, 0.1191608616, 0.08832740723, 0.7838277788, 0.5216263998,  0.2825111121, 0.2004093626])
Ipriors = np.array([-4.175475702, -0.01344553919,  0.3878384869, 0.08312154144, 0.7756178787, 0.5169169459,  0.2823963907, 0.201670231 ])
priors2 = np.array([-4.179523767, -0.01324793534,  0.3755421152, 0.07952228111, 0.7722277551, 0.5138709413,  0.2827103348, 0.2011010537, 0.4062255315])

testRecord1a = Xmatrix[1154, 0:8]
testRecord1b = Xmatrix[1154, 0:9]
testRecord2a = Xmatrix[464, 0:8]
testRecord2b = Xmatrix[464, 0:9]

def stable_sigmoid(x):
  # Using np.where to avoid numerical overflow or underflow.
  return np.where(x >= 0, 1 / (1 + np.exp(-x)), np.exp(x) / (1 + np.exp(x)))

# Create Probabilities for rec 1154
rec1prob1 = stable_sigmoid((np.dot(priors,testRecord1a.transpose())).transpose())
rec1Iprob = stable_sigmoid((np.dot(Ipriors,testRecord1a.transpose())).transpose())
rec1prob2 = stable_sigmoid((np.dot(priors2,testRecord1b.transpose())).transpose())

# Create Probabilities for rec 464
rec2prob1 = stable_sigmoid((np.dot(priors,testRecord2a.transpose())).transpose())
rec2Iprob = stable_sigmoid((np.dot(Ipriors,testRecord2a.transpose())).transpose())
rec2prob2 = stable_sigmoid((np.dot(priors2,testRecord2b.transpose())).transpose())

print(rec1prob1, rec1Iprob, rec1prob2)
print(rec2prob1, rec2Iprob, rec2prob2)



0.39621757687305553 0.4393847914263568 0.42819206683830935
0.40036168554057106 0.40372622664595204 0.49426831646681907


In [None]:
Xmatrix


DesignMatrix with shape (2498, 27)
  Columns:
    ['Intercept',
     'Sex[T.M]',
     'Education[T.Doctorate]',
     'Education[T.HS]',
     'Education[T.Masters]',
     'Education[T.NR]',
     'Education[T.Some College]',
     'HHIncome[T.200k-300k]',
     'HHIncome[T.300k-500k]',
     'HHIncome[T.50k-100k]',
     'HHIncome[T.NR]',
     'HHIncome[T.Over 500k]',
     'HHIncome[T.Under 50k]',
     'ReligiousAffil[T.NR]',
     'ReligiousAffil[T.None]',
     'ReligiousAffil[T.Other]',
     'ReligiousAffil[T.Protestant]',
     'LastPrimary[T.NR]',
     'LastPrimary[T.R]',
     'TopIssue[T.Democracy]',
     'TopIssue[T.Economy]',
     'TopIssue[T.Education]',
     'TopIssue[T.NR]',
     'TopIssue[T.Parents]',
     'TopIssue[T.RFree]',
     'TopIssue[T.Womens]',
     'Age']
  Terms:
    'Intercept' (column 0)
    'Sex' (column 1)
    'Education' (columns 2:7)
    'HHIncome' (columns 7:13)
    'ReligiousAffil' (columns 13:17)
    'LastPrimary' (columns 17:19)
    'TopIssue' (columns 19:26)
  