In [3]:
import sys 
import os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..', 'src')))

from data_loading import load_data
from create_weight_matrices import yea_nay_weight_matrices
from random_walk import rwhg
from helpers import evaluate_rwhg

import pandas as pd
import numpy as np
import csv
from sklearn.model_selection import KFold

ModuleNotFoundError: No module named 'src'

### 1) Within-Matrix Prediction

In [None]:
# 49th legislative period 
period = [49]

# output path
results_path = f'../results/within/results_{period[0]}.csv'

# create CSV file with header if it doesn't exist
if not os.path.exists(results_path):
    with open(results_path, mode='w', newline='') as f:
        writer = csv.writer(f)
        writer.writerow([
            'period', 'fold', 'approach', 'accuracy',
            'f1_EH', 'f1_Yes', 'f1_No',
            'precision_EH', 'precision_Yes', 'precision_No',
            'recall_EH', 'recall_Yes', 'recall_No'
        ])

# load main data
votes, affairs, councillors = load_data(period)

# create mappings (and reverse mappings)
ordered_c_ids = sorted(set(councillors['elanId']))
ordered_a_ids = sorted(set(affairs['id']))

c_id2idx = {id_: i for i, id_ in enumerate(ordered_c_ids)}
a_id2idx = {id_: i for i, id_ in enumerate(ordered_a_ids)}

# load weight matrices

# approach I
W_x1 = pd.read_csv(f'./data/clean/weight_matrices/{period[0]}/W_x1_{period[0]}.csv').to_numpy()
W_y1 = pd.read_csv(f'./data/clean/weight_matrices/{period[0]}/W_y1_{period[0]}.csv').to_numpy()
W_yea_xy1, W_nay_xy1 = yea_nay_weight_matrices(votes, c_id2idx, a_id2idx)

# approach II
W_x2 = pd.read_csv(f'./data/clean/weight_matrices/{period[0]}/W_x2_{period[0]}.csv').to_numpy()
W_y2 = pd.read_csv(f'./data/clean/weight_matrices/{period[0]}/W_y2_{period[0]}.csv').to_numpy()
W_yea_xy2, W_nay_xy2 = yea_nay_weight_matrices(votes, c_id2idx, a_id2idx)

# hyperparameters
b = 1
gamma = 6/7
alpha_x = 1/2
alpha_y = 1/2
K_x = 6
K_y = 6

# create folds
kf = KFold(n_splits=5, shuffle=True, random_state=42)

for fold, (train_idx, test_idx) in enumerate(kf.split(votes)):
    print(f"\n--- Fold {fold + 1} ---")

    # get matrix indices for test fold
    test_c_id = votes.loc[test_idx, 'elanId']
    test_a_id = votes.loc[test_idx, 'id']
    test_decision = votes.loc[test_idx, 'decision']

    test_c_idx = test_c_id.map(c_id2idx)
    test_a_idx = test_a_id.map(a_id2idx)

    # stack them for input into RWHG
    test1 = np.column_stack((test_c_idx, test_a_idx)).astype(int)
    test2 = test1.copy()

    # record ground truth for eval
    ground_truth = list(zip(test_c_idx.values, test_a_idx.values, test_decision.values))

    # run RWHG for both approaches
    results1 = rwhg(W_yea_xy1.copy(),W_nay_xy1.copy(),W_x1.copy(),W_y1.copy(),test1,b,gamma,alpha_x,alpha_y,K_x, K_y,abstention=True)
    results2 = rwhg(W_yea_xy2.copy(),W_nay_xy2.copy(),W_x2.copy(),W_y2.copy(),test2,b,gamma,alpha_x,alpha_y,K_x, K_y,abstention=True)

    # evaluate
    acc1, f1_1, p1, r1, wf1_1 = evaluate_rwhg(results=results1, ground_truth=ground_truth)
    acc2, f1_2, p2, r2, wf1_2 = evaluate_rwhg(results=results2, ground_truth=ground_truth)

    # Append results
    with open(results_path, mode='a', newline='') as f:
        writer = csv.writer(f)
        writer.writerow([
            period[0], fold + 1, 'approach1', acc1,
            *f1_1, *p1, *r1, wf1_1
        ])
        writer.writerow([
            period[0], fold + 1, 'approach2', acc2,
            *f1_2, *p2, *r2, wf1_2
        ])
        


--- Fold 1 ---
Iteration 1 of 27283
Iteration 3032 of 27283
Iteration 6063 of 27283
Iteration 9095 of 27283
Iteration 12126 of 27283
Iteration 15157 of 27283
Iteration 18189 of 27283
Iteration 21220 of 27283
Iteration 24251 of 27283
Iteration 27283 of 27283
              precision    recall  f1-score   support

          EH       0.08      0.04      0.05       711
          No       0.92      0.93      0.93     13773
         Yes       0.92      0.93      0.92     12799

    accuracy                           0.91     27283
   macro avg       0.64      0.63      0.63     27283
weighted avg       0.90      0.91      0.90     27283


--- Fold 2 ---
Iteration 1 of 27283
Iteration 3032 of 27283
Iteration 6063 of 27283
Iteration 9095 of 27283
Iteration 12126 of 27283
Iteration 15157 of 27283
Iteration 18189 of 27283
Iteration 21220 of 27283
Iteration 24251 of 27283
Iteration 27283 of 27283
              precision    recall  f1-score   support

          EH       0.07      0.03      0.05   