In [1]:
import os
import pandas as pd
from time import sleep

In [2]:
# Generate and save inferred model if does not exist
if not os.path.isfile(f"bayesian_model/glmm.pickle"):
    os.system("python3 infer_bayesian_model.py")

In [3]:
def run_command_gnome(command, title="test", keep_window=False, test=False):
    if keep_window:
        command_suffix = "; $SHELL"
    else:
        command_suffix = ""
        
    if test:
        return f"""gnome-terminal --title="{title}" --command="bash -c '{command} {command_suffix}'" """
    else:
        os.system(f"""gnome-terminal --title="{title}" --command="bash -c '{command} {command_suffix}'" """)

In [8]:
# Run simulations
def run_simulations():
    setting_files = [os.path.isfile(f"data/{sim_name}/sim_settings_{i}.csv") for i in range(N_PROCESSES)]
    process_active = [os.path.isfile(f"data/{sim_name}/process_{i}_running") for i in range(N_PROCESSES)]
    while sum(setting_files)>0:
        for i in range(N_PROCESSES):
            if setting_files[i] and not process_active[i]:
                source = "source ~/anaconda3/etc/profile.d/conda.sh\n"
                conda = "conda activate bayes_topic_measures\n"
                c = f"""python3 perform_sig_test.py --trials_per_sim {trials_per_sim} --process {i} --sim_name {sim_name} --n_runs 10 --optimal_alloc {optimal_alloc}"""
                run_command_gnome(source+conda+c, title=f"p{i}", keep_window=False)

        sleep(10)
        setting_files = [os.path.isfile(f"data/{sim_name}/sim_settings_{i}.csv") for i in range(N_PROCESSES)]
        process_active = [os.path.isfile(f"data/{sim_name}/process_{i}_running") for i in range(N_PROCESSES)]

In [5]:
# Settings for hoyle's significance testing
hoyle_total_scores = 50*26
hoyle_p_diff = 0.055
hoyle_n_raters = 38
hoyle_scores_per_r = hoyle_total_scores//hoyle_n_raters + 1
optimal_alloc = False

## Significance

In [7]:
# Varying the number of raters
N_PROCESSES = 6
sim_name = "sig_n_raters"
n_sims = 1_000
trials_per_sim = 1

p_diff = hoyle_p_diff
n_raters = "(15, 70)"
scores_per_r = hoyle_scores_per_r

# Removing all produced files
os.system(f"""rm data/{sim_name}/*""")

# Generate settings file
os.system(f"""
    python3 generate_sim_settings.py --n_raters "{n_raters}" --scores_per_r {scores_per_r} \
    --n_sims {n_sims} --trials_per_sim {trials_per_sim} --sim_name {sim_name}
""")

run_simulations()

In [7]:
# Varying the number of raters
N_PROCESSES = 6
sim_name = "sig_n_raters2"
n_sims = 1_000
trials_per_sim = 1

p_diff = hoyle_p_diff
n_raters = "(20, 150)"
scores_per_r = hoyle_scores_per_r

# Removing all produced files
os.system(f"""rm data/{sim_name}/*""")

# Generate settings file
os.system(f"""
    python3 generate_sim_settings.py --n_raters "{n_raters}" --scores_per_r {scores_per_r} \
    --n_sims {n_sims} --trials_per_sim {trials_per_sim} --sim_name {sim_name}
""")

run_simulations()

In [None]:
# Varying the topics_per_r
N_PROCESSES = 6
sim_name = "sig_scores_per_r"
n_sims = 1_000
trials_per_sim = 1

p_diff = hoyle_p_diff
scores_per_r = "(10, 100)"
total_scores = hoyle_total_scores

# Removing all produced files
if os.path.isfile(f"data/{sim_name}/sim_settings.csv"):
    os.system(f"""rm data/{sim_name}/*""")

# Generate settings file
os.system(f"""
    python3 generate_sim_settings.py --scores_per_r "{scores_per_r}" --total_scores {total_scores}\
    --n_sims {n_sims} --trials_per_sim {trials_per_sim} --sim_name {sim_name}
""")

run_simulations()

In [16]:
# Varying the topics_per_r
N_PROCESSES = 6
sim_name = "test"
n_sims = 100
trials_per_sim = 1

p_diff = hoyle_p_diff
scores_per_r = "(10, 100)"
total_scores = hoyle_total_scores

# Removing all produced files
if os.path.isfile(f"data/{sim_name}/sim_settings.csv"):
    os.system(f"""rm data/{sim_name}/*""")

# Generate settings file
os.system(f"""
    python3 generate_sim_settings.py --scores_per_r "{scores_per_r}" --total_scores {total_scores}\
    --n_sims {n_sims} --trials_per_sim {trials_per_sim} --sim_name {sim_name}
""")

run_simulations()


    p_diff=0.055, <class 'float'>
    n_raters=None, <class 'NoneType'>
    scores_per_r=(10, 100), <class 'tuple'>
    total_scores=1300, <class 'int'>
    n_sims=1000, <class 'int'>
    trials_per_sim=1
    seed=42, <class 'int'>
    sim_name=sig_scores_per_r, <class 'str'>
    chain_method=vectorized, <class 'str'>
    clean_dir=True, <class 'bool'>
    


# Option “--command” is deprecated and might be removed in a later version of gnome-terminal.
# Use “-- ” to terminate the options and put the command line to execute after it.
# Option “--command” is deprecated and might be removed in a later version of gnome-terminal.
# Use “-- ” to terminate the options and put the command line to execute after it.
# Option “--command” is deprecated and might be removed in a later version of gnome-terminal.
# Use “-- ” to terminate the options and put the command line to execute after it.
# Option “--command” is deprecated and might be removed in a later version of gnome-terminal.
# Use “-- ” to terminate the options and put the command line to execute after it.
# Option “--command” is deprecated and might be removed in a later version of gnome-terminal.
# Use “-- ” to terminate the options and put the command line to execute after it.
# Option “--command” is deprecated and might be removed in a later version of gnome-terminal.
# Use “-- ” to termin

In [20]:
# Varying the number of raters
N_PROCESSES = 6
sim_name = "sig_p_n_raters"
n_sims = 1_000
trials_per_sim = 1

p_diff = "(0.02, 0.2)"
n_raters = "(15, 70)"
scores_per_r = hoyle_scores_per_r

# Removing all produced files
if os.path.isfile(f"data/{sim_name}/sim_settings.csv"):
    os.system(f"""rm data/{sim_name}/*""")

# Generate settings file
os.system(f"""
    python3 generate_sim_settings.py --p_diff "{p_diff}" --n_raters "{n_raters}" \
    --scores_per_r {scores_per_r} --n_sims {n_sims} --trials_per_sim {trials_per_sim} \
    --sim_name {sim_name}
""")

run_simulations()


    p_diff=(0.02, 0.2), <class 'tuple'>
    n_raters=(15, 70), <class 'tuple'>
    scores_per_r=35, <class 'int'>
    total_scores=None, <class 'NoneType'>
    n_sims=1000, <class 'int'>
    trials_per_sim=1
    seed=42, <class 'int'>
    sim_name=sig_p_n_raters, <class 'str'>
    chain_method=vectorized, <class 'str'>
    clean_dir=True, <class 'bool'>
    


# Option “--command” is deprecated and might be removed in a later version of gnome-terminal.
# Use “-- ” to terminate the options and put the command line to execute after it.
# Option “--command” is deprecated and might be removed in a later version of gnome-terminal.
# Use “-- ” to terminate the options and put the command line to execute after it.
# Option “--command” is deprecated and might be removed in a later version of gnome-terminal.
# Use “-- ” to terminate the options and put the command line to execute after it.
# Option “--command” is deprecated and might be removed in a later version of gnome-terminal.
# Use “-- ” to terminate the options and put the command line to execute after it.
# Option “--command” is deprecated and might be removed in a later version of gnome-terminal.
# Use “-- ” to terminate the options and put the command line to execute after it.
# Option “--command” is deprecated and might be removed in a later version of gnome-terminal.
# Use “-- ” to termin

In [21]:
# Varying the topics_per_r and p_diff
N_PROCESSES = 6
sim_name = "sig_p_scores_per_r"
n_sims = 1_000
trials_per_sim = 1

p_diff = "(0.02, 0.2)"
scores_per_r = "(10, 100)"
total_scores = hoyle_total_scores

# Removing all produced files
if os.path.isfile(f"data/{sim_name}/sim_settings.csv"):
    os.system(f"""rm data/{sim_name}/*""")

# Generate settings file
os.system(f"""
    python3 generate_sim_settings.py --p_diff "{p_diff}" --scores_per_r "{scores_per_r}" \
    --total_scores {total_scores} --n_sims {n_sims} --trials_per_sim {trials_per_sim} \
    --sim_name {sim_name}
""")

run_simulations()


    p_diff=(0.02, 0.2), <class 'tuple'>
    n_raters=None, <class 'NoneType'>
    scores_per_r=(10, 100), <class 'tuple'>
    total_scores=1300, <class 'int'>
    n_sims=1000, <class 'int'>
    trials_per_sim=1
    seed=42, <class 'int'>
    sim_name=sig_p_scores_per_r, <class 'str'>
    chain_method=vectorized, <class 'str'>
    clean_dir=True, <class 'bool'>
    


# Option “--command” is deprecated and might be removed in a later version of gnome-terminal.
# Use “-- ” to terminate the options and put the command line to execute after it.
# Option “--command” is deprecated and might be removed in a later version of gnome-terminal.
# Use “-- ” to terminate the options and put the command line to execute after it.
# Option “--command” is deprecated and might be removed in a later version of gnome-terminal.
# Use “-- ” to terminate the options and put the command line to execute after it.
# Option “--command” is deprecated and might be removed in a later version of gnome-terminal.
# Use “-- ” to terminate the options and put the command line to execute after it.
# Option “--command” is deprecated and might be removed in a later version of gnome-terminal.
# Use “-- ” to terminate the options and put the command line to execute after it.
# Option “--command” is deprecated and might be removed in a later version of gnome-terminal.
# Use “-- ” to termin

In [21]:
# Varying the topics_per_r and p_diff
N_PROCESSES = 6
sim_name = "sig_p_diff"
n_sims = 1_000
trials_per_sim = 1

p_diff = "(0.04, 0.08)"
scores_per_r = hoyle_scores_per_r
total_scores = hoyle_total_scores

# Removing all produced files
if os.path.isfile(f"data/{sim_name}/sim_settings.csv"):
    os.system(f"""rm data/{sim_name}/*""")

# Generate settings file
os.system(f"""
    python3 generate_sim_settings.py --p_diff "{p_diff}" --scores_per_r "{scores_per_r}" \
    --total_scores {total_scores} --n_sims {n_sims} --trials_per_sim {trials_per_sim} \
    --sim_name {sim_name}
""")

run_simulations()


    p_diff=(0.02, 0.2), <class 'tuple'>
    n_raters=None, <class 'NoneType'>
    scores_per_r=(10, 100), <class 'tuple'>
    total_scores=1300, <class 'int'>
    n_sims=1000, <class 'int'>
    trials_per_sim=1
    seed=42, <class 'int'>
    sim_name=sig_p_scores_per_r, <class 'str'>
    chain_method=vectorized, <class 'str'>
    clean_dir=True, <class 'bool'>
    


# Option “--command” is deprecated and might be removed in a later version of gnome-terminal.
# Use “-- ” to terminate the options and put the command line to execute after it.
# Option “--command” is deprecated and might be removed in a later version of gnome-terminal.
# Use “-- ” to terminate the options and put the command line to execute after it.
# Option “--command” is deprecated and might be removed in a later version of gnome-terminal.
# Use “-- ” to terminate the options and put the command line to execute after it.
# Option “--command” is deprecated and might be removed in a later version of gnome-terminal.
# Use “-- ” to terminate the options and put the command line to execute after it.
# Option “--command” is deprecated and might be removed in a later version of gnome-terminal.
# Use “-- ” to terminate the options and put the command line to execute after it.
# Option “--command” is deprecated and might be removed in a later version of gnome-terminal.
# Use “-- ” to termin

## Power

In [10]:
# Varying the topics_per_r and p_diff
N_PROCESSES = 6
sim_name = "power_n_raters"
n_sims = 50
trials_per_sim = 200

p_diff = hoyle_p_diff
n_raterss = range(50, 150, 2)
scores_per_r = hoyle_scores_per_r
total_scores = None

# Removing all produced files
if os.path.isfile(f"data/{sim_name}/sim_settings.csv"):
    os.system(f"""rm data/{sim_name}/*""")

# Generate settings file
for n_raters in n_raterss:
    os.system(f"""
        python3 generate_sim_settings.py --p_diff "{p_diff}" --n_raters {n_raters} \
        --scores_per_r "{scores_per_r}" --n_sims {n_sims} --trials_per_sim {trials_per_sim} \
        --sim_name {sim_name} --append "True"
    """)



# run_simulations()


    p_diff=0.055, <class 'float'>
    n_raters=50, <class 'int'>
    scores_per_r=35, <class 'int'>
    total_scores=None, <class 'NoneType'>
    n_sims=50, <class 'int'>
    trials_per_sim=200
    seed=42, <class 'int'>
    sim_name=power_n_raters, <class 'str'>
    chain_method=vectorized, <class 'str'>
    append=False, <class 'bool'>
    

    p_diff=0.055, <class 'float'>
    n_raters=52, <class 'int'>
    scores_per_r=35, <class 'int'>
    total_scores=None, <class 'NoneType'>
    n_sims=50, <class 'int'>
    trials_per_sim=200
    seed=42, <class 'int'>
    sim_name=power_n_raters, <class 'str'>
    chain_method=vectorized, <class 'str'>
    append=False, <class 'bool'>
    

    p_diff=0.055, <class 'float'>
    n_raters=54, <class 'int'>
    scores_per_r=35, <class 'int'>
    total_scores=None, <class 'NoneType'>
    n_sims=50, <class 'int'>
    trials_per_sim=200
    seed=42, <class 'int'>
    sim_name=power_n_raters, <class 'str'>
    chain_method=vectorized, <class 'str'>



    p_diff=0.055, <class 'float'>
    n_raters=98, <class 'int'>
    scores_per_r=35, <class 'int'>
    total_scores=None, <class 'NoneType'>
    n_sims=50, <class 'int'>
    trials_per_sim=200
    seed=42, <class 'int'>
    sim_name=power_n_raters, <class 'str'>
    chain_method=vectorized, <class 'str'>
    append=False, <class 'bool'>
    

    p_diff=0.055, <class 'float'>
    n_raters=100, <class 'int'>
    scores_per_r=35, <class 'int'>
    total_scores=None, <class 'NoneType'>
    n_sims=50, <class 'int'>
    trials_per_sim=200
    seed=42, <class 'int'>
    sim_name=power_n_raters, <class 'str'>
    chain_method=vectorized, <class 'str'>
    append=False, <class 'bool'>
    

    p_diff=0.055, <class 'float'>
    n_raters=102, <class 'int'>
    scores_per_r=35, <class 'int'>
    total_scores=None, <class 'NoneType'>
    n_sims=50, <class 'int'>
    trials_per_sim=200
    seed=42, <class 'int'>
    sim_name=power_n_raters, <class 'str'>
    chain_method=vectorized, <class 'str'


    p_diff=0.055, <class 'float'>
    n_raters=146, <class 'int'>
    scores_per_r=35, <class 'int'>
    total_scores=None, <class 'NoneType'>
    n_sims=50, <class 'int'>
    trials_per_sim=200
    seed=42, <class 'int'>
    sim_name=power_n_raters, <class 'str'>
    chain_method=vectorized, <class 'str'>
    append=False, <class 'bool'>
    

    p_diff=0.055, <class 'float'>
    n_raters=148, <class 'int'>
    scores_per_r=35, <class 'int'>
    total_scores=None, <class 'NoneType'>
    n_sims=50, <class 'int'>
    trials_per_sim=200
    seed=42, <class 'int'>
    sim_name=power_n_raters, <class 'str'>
    chain_method=vectorized, <class 'str'>
    append=False, <class 'bool'>
    


## Optimal Allocation

In [19]:
# Varying the number of raters
N_PROCESSES = 4
sim_name0 = "oa0_test"
sim_name1 = "oa1_test"
n_sims = 100
trials_per_sim = 1

p_diff = hoyle_p_diff
n_raters = "(20, 150)"
scores_per_r = hoyle_scores_per_r

# Removing all produced files
os.system(f"""rm data/simulations/{sim_name0}.csv""")
os.system(f"""rm data/simulations/{sim_name1}.csv""")
os.system(f"""rm data/{sim_name0}/*""")
os.system(f"""rm -rf data/{sim_name1}""")

# Generate settings file
os.system(f"""
    python3 generate_sim_settings.py --n_raters "{n_raters}" --scores_per_r {scores_per_r} \
    --n_sims {n_sims} --trials_per_sim {trials_per_sim} --sim_name {sim_name0} 
""")

os.system(f"cp -r data/{sim_name0} data/{sim_name1}")
os.system(f"cp data/simulations/{sim_name0}.csv data/simulations/{sim_name1}.csv")


    p_diff=0.055, <class 'float'>
    n_raters=(20, 150), <class 'tuple'>
    scores_per_r=35, <class 'int'>
    total_scores=None, <class 'NoneType'>
    n_sims=100, <class 'int'>
    trials_per_sim=1
    seed=42, <class 'int'>
    sim_name=oa0_test, <class 'str'>
    chain_method=vectorized, <class 'str'>
    clean_dir=True, <class 'bool'>
    


0

In [20]:
sim_name = sim_name0
optimal_alloc = False
run_simulations()

# Option “--command” is deprecated and might be removed in a later version of gnome-terminal.
# Use “-- ” to terminate the options and put the command line to execute after it.
# Option “--command” is deprecated and might be removed in a later version of gnome-terminal.
# Use “-- ” to terminate the options and put the command line to execute after it.
# Option “--command” is deprecated and might be removed in a later version of gnome-terminal.
# Use “-- ” to terminate the options and put the command line to execute after it.
# Option “--command” is deprecated and might be removed in a later version of gnome-terminal.
# Use “-- ” to terminate the options and put the command line to execute after it.
# Option “--command” is deprecated and might be removed in a later version of gnome-terminal.
# Use “-- ” to terminate the options and put the command line to execute after it.
# Option “--command” is deprecated and might be removed in a later version of gnome-terminal.
# Use “-- ” to termin

In [21]:
sim_name = sim_name1
optimal_alloc = True
run_simulations()

# Option “--command” is deprecated and might be removed in a later version of gnome-terminal.
# Use “-- ” to terminate the options and put the command line to execute after it.
# Option “--command” is deprecated and might be removed in a later version of gnome-terminal.
# Use “-- ” to terminate the options and put the command line to execute after it.
# Option “--command” is deprecated and might be removed in a later version of gnome-terminal.
# Use “-- ” to terminate the options and put the command line to execute after it.
# Option “--command” is deprecated and might be removed in a later version of gnome-terminal.
# Use “-- ” to terminate the options and put the command line to execute after it.
# Option “--command” is deprecated and might be removed in a later version of gnome-terminal.
# Use “-- ” to terminate the options and put the command line to execute after it.
# Option “--command” is deprecated and might be removed in a later version of gnome-terminal.
# Use “-- ” to termin