In [None]:
from data_loading import load_data
from helpers import get_gower_weights
import pandas as pd
from concurrent.futures import ThreadPoolExecutor

# --- 1) Per Period Gower Weights --- #

# load data
periods = [49,50,51,52]
votes, affairs, councillors = load_data(periods)
target = pd.read_csv('../data/clean/cospon_count_agg.csv')

# Define your features
features = [
    'degree_class', 
    'profession_class', 
    'gender',
    'average_age', 
    'lang_region', 
    'military_rank_ordinal', 
    'faction_ordinal'
]

models = ['logistic', 'poisson', 'randomForestReg', 'randomForestClass']
results = {}

def run_model(model):
    try:
        result = get_gower_weights(councillors, features, target, model=model,s=0.3)
        return model, result
    except Exception as e:
        return model, f"ERROR: {str(e)}"

with ThreadPoolExecutor() as executor:
    futures = [executor.submit(run_model, m) for m in models]
    for f in futures:
        model, weights = f.result()
        results[model] = weights

# Display results
for model, weights in results.items():
    print(f"\nModel: {model}")
    print(weights)

# Save results to CSV
for model, weights in results.items():
    if isinstance(weights, pd.Series):
        weights.to_csv(f'weights_{model}.csv', index=True)
    else:
        print(f"Skipping saving weights for model '{model}' due to error: {weights}")


In [1]:
from data_loading import load_data
from helpers import get_gower_weights
import pandas as pd
import numpy as np

# --- 2) Intra Period Gower Weights --- #

# load data
periods = [51]
votes, affairs, councillors = load_data(periods)
target = pd.read_csv('../data/clean/cospon_count_intra_period.csv')

# Define your features
features = [
    'degree_class', 
    'profession_class', 
    'gender',
    'average_age', 
    'lang_region', 
    'military_rank_ordinal', 
    'faction_ordinal'
]

# create k splits of sorted data
k = 5
votes = votes.sort_values('date_clean')
splits = np.array_split(votes, k)

# Create cumulative combinations of splits
cumulative_splits = []

for i in range(2, k + 1):  # start from 2 to get split1+2, up to split1+2+...+k
    combined = pd.concat(splits[:i])
    cumulative_splits.append(combined)

# compute gower weights for each split
for i, split in enumerate(cumulative_splits): 
    print(f'iteration {i+1} out of {len(cumulative_splits)}')
    # filter councillors
    councillors_filt = councillors[councillors['elanId'].isin(pd.unique(split['elanId']))]
    # filter target
    leg_period = f'{periods[0]}_{i+1}'
    target_filt = target[target['legislative_period'] == leg_period]
    # compute gower weights
    result = get_gower_weights(councillors_filt, features, target_filt, 'randomForestReg', s=0.25)
    # save
    result.to_csv(f'../data/clean/gower_weights/oom_tuning/gower_weights_{leg_period}.csv')

  return bound(*args, **kwds)


iteration 1 out of 4
iteration 2 out of 4
iteration 3 out of 4
iteration 4 out of 4
