In [None]:
import pandas as pd

from sklearn.preprocessing import OneHotEncoder
from sklearn.linear_model import HuberRegressor, LogisticRegression
from sklearn.ensemble import RandomForestRegressor
# from util import run_election_model
import numpy as np
from joblib import Parallel, delayed
import tqdm
from util_model_v1 import run_election_model

In [116]:
# PH vs PN
# PH, PN, Unsure
malay_1 = [0.39, 0.53, 0.08]
chinese_1 = [0.62, 0.09, 0.29]
indian_1 = [0.68, 0.21, 0.11]

# BN vs PN
# BN, PN, Unsure
malay_2 = [0.35, 0.57, 0.07]
chinese_2 = [0.41, 0.28, 0.31]
indian_2 = [0.46, 0.37, 0.17]


def compute_outcome(unity_party, num_malay, num_chinese, num_indian, tilt=[0.5, 0.5, 0.5], turnout = [0.79, 0.69, 0.78]):
    tilt_malay, tilt_chinese, tilt_indian = tilt
    turnout_malay, turnout_chinese, turnout_indian = turnout
    # turnout_malay, turnout_chinese, turnout_indian = 0.8,0.9,0.9
    if unity_party == "PH":
        malay_k, chinese_k, indian_k = malay_1.copy(), chinese_1.copy(), indian_1.copy()
    elif unity_party == "BN":
        malay_k, chinese_k, indian_k = malay_2.copy(), chinese_2.copy(), indian_2.copy()
    else:
        raise

    malay_unity = malay_k[0]+malay_k[2]*(tilt_malay)
    chinese_unity = chinese_k[0]+chinese_k[2]*(tilt_chinese)
    indian_unity = indian_k[0]+indian_k[2]*(tilt_indian)

    malay_pn = malay_k[1]+malay_k[2]*(1-tilt_malay)
    chinese_pn = chinese_k[1]+chinese_k[2]*(1-tilt_chinese)
    indian_pn = indian_k[1]+indian_k[2]*(1-tilt_indian)
    # print(malay_unity, chinese_unity, indian_unity)
    # print(malay_pn, chinese_pn, indian_pn)
    
    unity_votes = num_malay*(malay_unity)*turnout_malay + num_chinese*(chinese_unity)*turnout_chinese + num_indian*(indian_unity)*turnout_indian
    pn_votes = num_malay*(malay_pn)*turnout_malay + num_chinese*(chinese_pn)*turnout_chinese + num_indian*(indian_pn)*turnout_indian
    return unity_votes, pn_votes

In [117]:
# dun_comp_df

In [118]:
dun_comp_df = pd.read_csv('data/SELANGOR_2023_DUN_COMPOSITION_custom_done.csv').set_index('STATE CONSTITUENCY NAME')
dun_comp_df['Bumi'] = dun_comp_df[['ORANG ASLI (%)', 'BUMIPUTERA SABAH (%)', 'BUMIPUTERA SARAWAK (%)']].sum(axis=1)
dun_comp_df['Malay'] = dun_comp_df['MALAY (%)']
dun_comp_df['Chinese'] = dun_comp_df['CHINESE (%)']
dun_comp_df['Indian'] = dun_comp_df['INDIANS (%)']
dun_comp_df['Other'] = dun_comp_df['OTHERS (%)']
dun_comp_df['Young1'] = dun_comp_df['18-20 (%)']
dun_comp_df['Young2'] = dun_comp_df['18-20 (%)'] + dun_comp_df['21-29 (%)']
dun_comp_sub_df = dun_comp_df[['Bumi', 'Malay', 'Chinese', 'Indian', 'Other', 'Young1', 'Young2', 'Party 2']].copy()

In [153]:
tilt=[0.0, 0.5, 0.5]
turnout = [0.79, 0.69, 0.78]
turnout = [0.9, 0.6, 0.7]

for i in dun_comp_sub_df.index:
    d = dun_comp_sub_df.loc[i, :]
    x = compute_outcome(unity_party=d['Party 2'], num_malay=d['Malay'], num_chinese=d['Chinese'], num_indian=d['Indian'], tilt=tilt,turnout=turnout)
    # print(i, d,x)
    # raise
    dun_comp_sub_df.loc[i, 'Majority'] = abs(x[0] - x[1])
    if x[0] > x[1]:
        dun_comp_sub_df.loc[i, 'Unity_IsWinner'] = 1
    else:
        dun_comp_sub_df.loc[i, 'Unity_IsWinner'] = 0
dun_comp_sub_df['Unity_IsWinner'].mean(), dun_comp_sub_df['Unity_IsWinner'].sum()

(0.5178571428571429, 29.0)

In [146]:
dun_comp_sub_df[['Party 2', 'Unity_IsWinner']].value_counts().reset_index()

Unnamed: 0,Party 2,Unity_IsWinner,0
0,PH,1.0,34
1,BN,0.0,12
2,PH,0.0,10


In [140]:
dun_comp_sub_df.sort_values(['Majority', 'Party 2'])

Unnamed: 0_level_0,Bumi,Malay,Chinese,Indian,Other,Young1,Young2,Party 2,Unity_IsWinner,Majority
STATE CONSTITUENCY NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
TAMAN MEDAN,0.55,65.09,12.03,19.92,2.41,5.9,25.6,PH,1.0,0.389401
SUNGAI KANDIS,0.94,69.05,12.98,15.17,1.86,9.0,34.1,PH,0.0,1.692782
PELABUHAN KLANG,1.6,61.96,18.07,16.39,1.98,8.1,31.4,PH,1.0,1.848125
PERMATANG,0.13,63.23,21.51,13.81,1.32,6.0,26.3,PH,1.0,1.939579
HULU BERNAM,0.89,70.13,14.82,13.07,1.11,5.7,24.9,PH,0.0,1.977458
PAYA JARAS,0.81,66.42,15.54,9.1,8.12,9.5,36.9,PH,0.0,2.524758
KOTA DAMANSARA,1.69,60.55,24.69,11.02,2.05,9.3,35.7,PH,1.0,2.545475
MORIB,1.07,71.12,10.64,15.44,1.73,7.1,30.8,PH,0.0,2.809304
LEMBAH JAYA,0.64,56.91,22.92,12.11,7.41,6.1,27.7,PH,1.0,2.930412
IJOK,0.2,61.63,13.18,24.2,0.8,6.4,27.8,PH,1.0,2.980352


In [None]:
dun_comp_sub_df.sort_values("Party 2")

In [None]:
pd.set_option('display.max_columns', 50)

In [None]:
state = 'Selangor'
lost_faith = 0
bn_to_ph = 0.15
ph_to_bn = 0.24
kdf, scenario_df = run_election_model(state, lost_faith, bn_to_ph, ph_to_bn)

In [None]:
num_malay = 84.36
num_chinese = 12.18
num_indian = 2.73

In [None]:
malay_ph = 0.39
malay_bn = 0.35
chinese_ph = 0.62
chinese_bn = 0.41
indian_ph = 
indian_bn = 
ph_bn_transfer = 
bn_ph_transfer = 

bn_led_unity_votes = num_malay*(malay_ph*ph_bn_transfer + malay_bn*1 ) + num_chinese*(chinese_ph*ph_bn_transfer + chinese_bn*1 ) + num_indian*(indian_ph*1 + indian_bn*bn_ph_transfer )
ph_led_unity_votes = num_malay*(malay_ph*1 + malay_bn*bn_ph_transfer   ) + num_chinese*(chinese_ph*1 + chinese_bn*bn_ph_transfer ) + num_indian*(indian_ph*1 + indian_bn*bn_ph_transfer )


In [None]:
fdf = pd.concat([kdf, dun_comp_sub_df], axis=1)
fdf

In [None]:
def get_unity_win(state, bn_to_ph, ph_to_bn):
    if bn_to_ph < 0:
        bn_to_ph = 0.0
    if ph_to_bn < 0:
        ph_to_bn = 0.0
    if bn_to_ph > 0.95:
        bn_to_ph = 0.95
    if ph_to_bn > 0.95:
        ph_to_bn = 0.95
        
    bn_to_pn = 1-bn_to_ph-0.05
    ph_to_pn = 1-ph_to_bn-0.05
    df, scenario_df = run_election_model(state, bn_to_ph, bn_to_pn, ph_to_bn, ph_to_pn)
    return (df['Winner_S2'] == 'Unity').mean() > 0.5

In [None]:
def get_unity_win_23(state, bn_to_ph, ph_to_bn):
    if bn_to_ph < 0:
        bn_to_ph = 0.0
    if ph_to_bn < 0:
        ph_to_bn = 0.0
    if bn_to_ph > 0.95:
        bn_to_ph = 0.95
    if ph_to_bn > 0.95:
        ph_to_bn = 0.95
        
    bn_to_pn = 1-bn_to_ph-0.05
    ph_to_pn = 1-ph_to_bn-0.05
    df, scenario_df = run_election_model(state, bn_to_ph, bn_to_pn, ph_to_bn, ph_to_pn)
    return (df['Winner_S2'] == 'Unity').mean() > 0.6666

In [None]:
-0.15/-1.645


In [None]:
state = 'SELANGOR'

bn_to_ph = 0.0
ph_to_bn = 0.24
bn_to_pn = 1-bn_to_ph-0.05
ph_to_pn = 1-ph_to_bn-0.05

get_unity_win(state, bn_to_ph, ph_to_bn)

In [None]:
n = 5
bn_to_phs = np.random.normal(0.15, 0.05, 250)
ph_to_bns = np.random.normal(0.24, 0.05, 250)



In [None]:
# pd.Series(bn_to_phs).hist()

In [None]:
%%time
res = Parallel(n_jobs=12)(delayed(get_unity_win)(state, bn_to_ph, ph_to_bn) for bn_to_ph in bn_to_phs for ph_to_bn in tqdm.tqdm(ph_to_bns, position=0))

In [None]:
np.mean(res)

In [None]:
%%time
res2 = Parallel(n_jobs=12)(delayed(get_unity_win_23)(state, bn_to_ph, ph_to_bn) for bn_to_ph in bn_to_phs for ph_to_bn in tqdm.tqdm(ph_to_bns, position=0))

In [None]:
np.mean(res2)

In [None]:
# %%time
# df, scenario_df = run_election_model(state, bn_to_ph, bn_to_pn, ph_to_bn, ph_to_pn)

In [None]:
# pd.read_csv(f'{state}_2018_DUN_RESULTS.csv')