# Simulation

In [1]:
import pandas as pd
import numpy as np

import logging
import sys
sys.path.insert(0, '../src')

import utils.methods as al
from utils.logger import logger
from models.XPLORE import XPLORE

### Load Data

In [2]:
folder_name = '../data/'

voters = pd.read_csv(f'{folder_name}/voters_reactions.csv', index_col=0)
candidates = pd.read_csv(f'{folder_name}/candidates_reactions.csv', index_col=0)
coldstart = pd.DataFrame([], columns=candidates.columns)
gptdata = pd.read_csv(f'{folder_name}/coldstart.csv', index_col=0)
statements = pd.read_csv(f"{folder_name}/questions.csv", index_col=0)
gptvoters = pd.read_csv(f'{folder_name}/gpt_voters.csv', index_col=0)

test_reactions = voters.sample(n=10, random_state=0)
test_reactions


Unnamed: 0_level_0,32214,32215,32216,32217,32218,32219,32220,32221,32222,32223,...,32279,32280,32281,32282,32283,32284,32285,32286,32287,32288
voter,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1160049,1.0,0.25,0.75,0.0,0.25,1.0,0.75,0.0,0.25,0.25,...,0.17,0.67,0.75,0.75,0.25,0.75,0.5,0.25,0.25,0.75
717263,1.0,0.75,0.75,0.0,0.75,0.25,1.0,1.0,0.25,0.0,...,0.83,0.67,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5
231494,0.0,1.0,1.0,0.0,1.0,1.0,0.75,1.0,0.0,0.75,...,0.17,0.83,1.0,1.0,0.25,1.0,0.0,0.5,0.5,0.75
1444728,0.75,0.25,1.0,0.0,0.25,1.0,0.75,0.75,0.25,0.25,...,0.67,0.83,0.75,0.5,0.5,0.75,0.5,0.5,0.25,0.5
1208012,0.25,0.75,0.0,0.25,0.75,0.75,1.0,1.0,0.75,0.25,...,0.33,0.83,0.75,1.0,0.5,0.5,0.25,0.5,0.5,1.0
525723,1.0,0.0,1.0,1.0,0.75,0.25,1.0,1.0,1.0,0.0,...,0.67,1.0,0.5,0.75,0.5,0.5,0.0,0.5,0.75,0.75
792976,0.25,0.75,1.0,0.25,1.0,0.75,0.75,0.75,0.0,0.0,...,0.17,0.83,0.75,0.75,0.5,0.75,0.25,0.5,0.5,0.75
229866,0.75,1.0,1.0,0.0,1.0,1.0,0.25,1.0,0.75,0.0,...,0.5,1.0,1.0,0.75,0.75,1.0,0.25,0.5,0.0,0.5
528515,0.75,0.75,0.75,0.25,0.0,0.0,0.75,0.0,0.75,0.75,...,1.0,0.33,0.25,0.5,0.5,0.5,0.75,0.75,0.5,0.25
492506,0.25,1.0,0.75,0.0,1.0,1.0,0.25,1.0,0.25,0.0,...,0.17,0.83,0.75,0.75,0.25,1.0,0.25,0.25,0.25,0.5


### Prepare Model

In [3]:
xplore = XPLORE(candidates, prior_cov=np.array([[.1, 0], [0, .1]]))
xplore.items.head()

Unnamed: 0,beta1,beta2,alpha
32214,1.617838,-3.421553,-0.418051
32215,-3.599476,1.281332,0.446724
32216,-0.647695,0.816033,1.080903
32217,1.951557,-3.968032,-0.153674
32218,-4.754032,0.321055,0.356404


In [4]:
xplore.evaluate()

(0.2564247407867207, 0.7786727567217363)

### Implement Framework

In [6]:
method = al.RandomSelection(model=xplore)
simulation = al.ColdStartSimulation(method, test_reactions, candidates, number_queries=31)
simulation.run()

INFO - User 0: 1160049 gives 0.32 RMSE and 0.47% CRA.
INFO - User 1: 717263 gives 0.30 RMSE and 0.50% CRA.
INFO - User 2: 231494 gives 0.30 RMSE and 0.28% CRA.
INFO - User 3: 1444728 gives 0.25 RMSE and 0.33% CRA.
INFO - User 4: 1208012 gives 0.34 RMSE and 0.39% CRA.
DEBUG - Upgrade with batch sparsity: 0.59
INFO - Initialized model with 1034 users.
INFO - User 5: 525723 gives 0.35 RMSE and 0.42% CRA.
INFO - User 6: 792976 gives 0.20 RMSE and 0.42% CRA.
INFO - User 7: 229866 gives 0.36 RMSE and 0.50% CRA.
INFO - User 8: 528515 gives 0.26 RMSE and 0.50% CRA.
INFO - User 9: 492506 gives 0.16 RMSE and 0.61% CRA.


In [7]:
simulation.save_results()

Unnamed: 0,User,RMSE,CRA,ModelVersion,TimeStamp,Queries
0,1160049,0.322238,0.472222,0,2024-09-27 09:54:04.838231,"[32216, 32220, 32223, 32227, 32229, 32230, 322..."
1,717263,0.302676,0.5,0,2024-09-27 09:54:04.857870,"[32216, 32220, 32221, 32224, 32225, 32229, 322..."
2,231494,0.2968,0.277778,0,2024-09-27 09:54:04.877784,"[32215, 32217, 32220, 32222, 32223, 32224, 322..."
3,1444728,0.252943,0.333333,0,2024-09-27 09:54:04.896835,"[32216, 32218, 32219, 32220, 32221, 32222, 322..."
4,1208012,0.338624,0.388889,0,2024-09-27 09:54:04.916255,"[32216, 32218, 32220, 32225, 32227, 32229, 322..."
5,525723,0.34638,0.416667,5,2024-09-27 09:54:05.058116,"[32217, 32218, 32219, 32224, 32225, 32227, 322..."
6,792976,0.204851,0.416667,5,2024-09-27 09:54:05.076614,"[32216, 32219, 32223, 32227, 32228, 32229, 322..."
7,229866,0.363285,0.5,5,2024-09-27 09:54:05.095522,"[32214, 32218, 32220, 32221, 32223, 32228, 322..."
8,528515,0.263387,0.5,5,2024-09-27 09:54:05.114057,"[32214, 32219, 32222, 32224, 32235, 32236, 322..."
9,492506,0.15699,0.611111,5,2024-09-27 09:54:05.132725,"[32219, 32221, 32225, 32226, 32228, 32230, 322..."


## Simulation

In [7]:
datasets = {'Coldstart': coldstart,
            'GPT': gptdata,
            'GPTmeans':gptdata.groupby(level=0).mean(),
            'GPTvoters': gptvoters.sample(n=1000, random_state=42),
            'Candidates': candidates,
            # 'Voters': voters.sample(n=1000, random_state=42)
            }


In [4]:
logger.setLevel(logging.WARNING)

def run_simulation(dataset: str, method: al.SelectionMethod, n_queries:int, n_voters: int, random_state: int, verbose=40):
    test_reactions = voters.sample(n=n_voters, random_state=random_state)
    model = XPLORE(datasets[dataset], prior_cov=np.array([[.075, 0], [0, .075]]))
    method = method(model=model)
    simulation = al.ColdStartSimulation(method, test_reactions, candidates, number_queries=n_queries)
    simulation.run(verbose=verbose)
    simulation.save_results(f'{dataset}', suffix=f'_S{random_state}')

In [None]:
run_simulation(dataset='GPTvoters', 
               method=al.Uncertainty, 
               n_queries=10, 
               n_voters=1200,
               random_state=0,
               verbose=75)



In [9]:
logger.setLevel(logging.CRITICAL)
for seed in range(0,10):
    print(f'Starting simulation {seed}.')
    run_simulation(dataset='Coldstart', 
               method=al.Uncertainty, 
               n_queries=45, 
               n_voters=1200,
               random_state=seed,
               verbose=None)

Starting simulation 0.


  new_reactions = pd.concat([self.method.model.fit_reactions, batch_data])


Starting simulation 1.


  new_reactions = pd.concat([self.method.model.fit_reactions, batch_data])


Starting simulation 2.


  new_reactions = pd.concat([self.method.model.fit_reactions, batch_data])


Starting simulation 3.


  new_reactions = pd.concat([self.method.model.fit_reactions, batch_data])


Starting simulation 4.


  new_reactions = pd.concat([self.method.model.fit_reactions, batch_data])


Starting simulation 5.


  new_reactions = pd.concat([self.method.model.fit_reactions, batch_data])


Starting simulation 6.


  new_reactions = pd.concat([self.method.model.fit_reactions, batch_data])


Starting simulation 7.


  new_reactions = pd.concat([self.method.model.fit_reactions, batch_data])


Starting simulation 8.


  new_reactions = pd.concat([self.method.model.fit_reactions, batch_data])


Starting simulation 9.


  new_reactions = pd.concat([self.method.model.fit_reactions, batch_data])


In [9]:
logger.setLevel(logging.CRITICAL)

start_time = pd.Timestamp.now()
for data in datasets.keys():
    for K in range(45,46,5):        
        for random_state in range(90, 100):
            elapsed = (pd.Timestamp.now() - start_time).seconds
            print(f'Starting simulation with {data}, {K} responses, and seed {random_state} after {round(elapsed/60)} minutes.')
            run_simulation( dataset=data, 
                            method=al.Uncertainty, 
                            n_queries=K, 
                            n_voters=1000,
                            random_state=random_state,
                            verbose=None)

Starting simulation with Coldstart, 45 responses, and seed 90 after 0 minutes.


  new_reactions = pd.concat([self.method.model.fit_reactions, batch_data])


Starting simulation with Coldstart, 45 responses, and seed 91 after 8 minutes.


  new_reactions = pd.concat([self.method.model.fit_reactions, batch_data])


Starting simulation with Coldstart, 45 responses, and seed 92 after 15 minutes.


  new_reactions = pd.concat([self.method.model.fit_reactions, batch_data])


Starting simulation with Coldstart, 45 responses, and seed 93 after 23 minutes.


  new_reactions = pd.concat([self.method.model.fit_reactions, batch_data])


Starting simulation with Coldstart, 45 responses, and seed 94 after 30 minutes.


  new_reactions = pd.concat([self.method.model.fit_reactions, batch_data])


Starting simulation with Coldstart, 45 responses, and seed 95 after 37 minutes.


  new_reactions = pd.concat([self.method.model.fit_reactions, batch_data])


Starting simulation with Coldstart, 45 responses, and seed 96 after 44 minutes.


  new_reactions = pd.concat([self.method.model.fit_reactions, batch_data])


Starting simulation with Coldstart, 45 responses, and seed 97 after 51 minutes.


  new_reactions = pd.concat([self.method.model.fit_reactions, batch_data])


Starting simulation with Coldstart, 45 responses, and seed 98 after 59 minutes.


  new_reactions = pd.concat([self.method.model.fit_reactions, batch_data])


Starting simulation with Coldstart, 45 responses, and seed 99 after 66 minutes.


  new_reactions = pd.concat([self.method.model.fit_reactions, batch_data])


Starting simulation with GPT, 45 responses, and seed 90 after 73 minutes.
Starting simulation with GPT, 45 responses, and seed 91 after 80 minutes.
Starting simulation with GPT, 45 responses, and seed 92 after 88 minutes.
Starting simulation with GPT, 45 responses, and seed 93 after 95 minutes.
Starting simulation with GPT, 45 responses, and seed 94 after 102 minutes.
Starting simulation with GPT, 45 responses, and seed 95 after 110 minutes.
Starting simulation with GPT, 45 responses, and seed 96 after 117 minutes.
Starting simulation with GPT, 45 responses, and seed 97 after 124 minutes.
Starting simulation with GPT, 45 responses, and seed 98 after 132 minutes.
Starting simulation with GPT, 45 responses, and seed 99 after 139 minutes.
Starting simulation with GPTmeans, 45 responses, and seed 90 after 146 minutes.
Starting simulation with GPTmeans, 45 responses, and seed 91 after 154 minutes.
Starting simulation with GPTmeans, 45 responses, and seed 92 after 161 minutes.
Starting simul

### Replace Strategy

In [6]:
start_time = pd.Timestamp.now()
for random_state in range(90,100):
    for forget_step in [2, 4, 6, 8, 10]:
        for forget_max in [360, 400]:
            elapsed = (pd.Timestamp.now() - start_time).seconds
            print(f'Starting simulation with cap {forget_max}, step {forget_step}, and seed {random_state} after {round(elapsed/60)} minutes.')
            test_reactions = voters.sample(n=1200, random_state=random_state)
            model = XPLORE(gptdata, prior_cov=np.array([[.075, 0], [0, .075]]))
            method = al.Uncertainty(model=model)
            simulation = al.ColdStartSimulation(method, test_reactions, candidates, number_queries=30, forget=forget_max, forget_step=forget_step)
            simulation.run(verbose=100)
            simulation.save_results(f'GPT-Replace-{forget_max}-{forget_step}', suffix=f'_S{random_state}')

Starting simulation with cap 360, step 2, and seed 90 after 0 minutes.
Starting simulation with cap 400, step 2, and seed 90 after 6 minutes.
Starting simulation with cap 360, step 4, and seed 90 after 11 minutes.
Starting simulation with cap 400, step 4, and seed 90 after 16 minutes.
Starting simulation with cap 360, step 6, and seed 90 after 22 minutes.
Starting simulation with cap 400, step 6, and seed 90 after 28 minutes.
Starting simulation with cap 360, step 8, and seed 90 after 33 minutes.
Starting simulation with cap 400, step 8, and seed 90 after 39 minutes.
Starting simulation with cap 360, step 10, and seed 90 after 44 minutes.
Starting simulation with cap 400, step 10, and seed 90 after 50 minutes.
Starting simulation with cap 360, step 2, and seed 91 after 56 minutes.
Starting simulation with cap 400, step 2, and seed 91 after 61 minutes.
Starting simulation with cap 360, step 4, and seed 91 after 67 minutes.
Starting simulation with cap 400, step 4, and seed 91 after 72 m

In [8]:
start_time = pd.Timestamp.now()
for random_state in range(90,100):
    for forget_step in [20, 40]:
        for forget_max in [400]:
            elapsed = (pd.Timestamp.now() - start_time).seconds
            print(f'Starting simulation with cap {forget_max}, step {forget_step}, and seed {random_state} after {round(elapsed/60)} minutes.')
            test_reactions = voters.sample(n=1200, random_state=random_state)
            model = XPLORE(gptdata, prior_cov=np.array([[.075, 0], [0, .075]]))
            method = al.Uncertainty(model=model)
            simulation = al.ColdStartSimulation(method, test_reactions, candidates, number_queries=30, forget=forget_max, forget_step=forget_step)
            simulation.run(verbose=100)
            simulation.save_results(f'GPT-Replace-{forget_max}-{forget_step}', suffix=f'_S{random_state}')

Starting simulation with cap 400, step 20, and seed 90 after 0 minutes.
Starting simulation with cap 400, step 40, and seed 90 after 6 minutes.
Starting simulation with cap 400, step 20, and seed 91 after 11 minutes.
Starting simulation with cap 400, step 40, and seed 91 after 16 minutes.
Starting simulation with cap 400, step 20, and seed 92 after 22 minutes.
Starting simulation with cap 400, step 40, and seed 92 after 27 minutes.
Starting simulation with cap 400, step 20, and seed 93 after 33 minutes.
Starting simulation with cap 400, step 40, and seed 93 after 38 minutes.
Starting simulation with cap 400, step 20, and seed 94 after 44 minutes.
Starting simulation with cap 400, step 40, and seed 94 after 49 minutes.
Starting simulation with cap 400, step 20, and seed 95 after 55 minutes.
Starting simulation with cap 400, step 40, and seed 95 after 61 minutes.
Starting simulation with cap 400, step 20, and seed 96 after 67 minutes.
Starting simulation with cap 400, step 40, and seed 9

In [None]:
dataset = pd.concat([gptvoters.sample(n=1000, random_state=42),gptdata.groupby(level=0).mean()])
for random_state in range(90,100):
    print(f'Starting simulation {random_state}.')
    test_reactions = voters.sample(n=1200, random_state=random_state)
    model = XPLORE(dataset, prior_cov=np.array([[.075, 0], [0, .075]]))
    method = al.Uncertainty(model=model)
    simulation = al.ColdStartSimulation(method, test_reactions, candidates, number_queries=30, forget=1200, forget_step=15)
    simulation.run(verbose=100)
    simulation.save_results(f'GPTvoters-Replace-{...}', suffix=f'_S{random_state}')

(5.953333333333333, 2.815)