In [None]:
"""
This notebook generates simulation data, using empirical data to adjust free parameters
"""

In [None]:
%load_ext autoreload

In [None]:
%autoreload

import pandas as pd
import numpy as np

from utils import generate_data

In [None]:
raw_df = pd.read_csv("~/Desktop/thesis/data/Adams_experiment_cleaned_filtered.csv")
raw_df["last"] = ~raw_df["did_continue_eval"]
n = 10000 #len(raw_df["subject"].unique())
months = raw_df.groupby("word").agg({"s2_value": "mean"}).reset_index()

In [None]:
# random based on real-life average stopping probabilities based on number evaluated so far

empirical_stop_proba = raw_df.groupby("order").agg({"last": "mean"})["last"].values

def stop_proba_random(_):
    return empirical_stop_proba

random = generate_data(n, months, stop_proba_random)
random.head(16)

random.to_csv("~/Desktop/thesis/data/generated_random.csv")

In [None]:
# optimal with no revisiting (assumption you can't go back)
# Explore until explore_index, then take next that is at least as good

def stop_proba_no_revisiting(df):
    explore_index = 3 # 1 + the zero-indexed last index for explore before exploit
    highest = max(df["value"].iloc[:explore_index])
    stop_proba = [1 if i >= explore_index and x >= highest else 0 for i, x in enumerate(df["value"])]
    if sum(stop_proba) == 0:
        stop_proba[-1] = 1
    return stop_proba

optimal_no_revisit = generate_data(n, months, stop_proba_no_revisiting)
optimal_no_revisit.head(16)

optimal_no_revisit.to_csv("~/Desktop/thesis/data/generated_optimal_no_revisit.csv")

# 169 have best value in the top 3 and thus have to go through all the months
#count

In [None]:
# optimal as a function of most recent value?
# - need to assume some cost of time...


In [None]:
# optimal based on limited knowledge/set of variables?
# as a function of best value so far (interaction with order)


In [None]:
"""
This section of the script generates data with no reference to the empirical data.

Assumes:
Normal distribution of values
No correlation between order/context-free value and context-specific value (random draws)
"""

In [None]:
%autoreload

from dmaker.environment import DecisionEnvironment, DecisionEnvironmentGrid
from dmaker.decision_maker import DynamicDecisionMaker

In [None]:
env = DecisionEnvironment(N=100, num_trials=100, sigma=1, mu=0, tau=1)
dm = DynamicDecisionMaker(env=env, num_samples=1000, cost_eval=None)
dm.decide()
dm.experiment_data

In [None]:
dm.experiment_data.to_csv("~/Desktop/thesis/data/generated_gaussian_optimal.csv")

In [None]:
env = DecisionEnvironment(N=12, num_trials=n, sigma=0, mu=13, tau=8)
dm = DynamicDecisionMaker(env=env, num_samples=1000, cost_eval=0.2)