---
## Notes
-------


# Data Structure
    - Look at unit tests for Data Class.
    - Consider dataframes > dictionaries for reasonability.
    
# RL models
    - logical struct.
    -

In [1]:
import plotly.graph_objects as go
import plotly.express as px
from tqdm import tqdm
import pandas as pd

import numpy as np
import statistics
import scipy.io
import logging
import pprint
import os
from collections import Counter
import time
import sys
import mlflow
# internals
from src.helpers import log_rat_metadata, log_sequence_data, load_config
from src.query_dataset import (QuerySequenceData, load_data)

In [2]:
from src.rescorla_wagner_model import (RoscorlaWagner)
from src.rescorla_wagner_model_plots import (RescorlaWagnerPlots)
from src.rescorla_wagner_model_simulation import (RescorlaWagnerSimulate)
from src.rescorla_wagner_model_diagnostics import (RoscorlaWagerModelDiagnostics)

In [3]:
from src.cog_sci_random_response_model import (RandomResponseModel)
from src.cog_sci_win_stay_lose_shift_model import (WinStayLoseShiftModel)
from src.cog_sci_learning_model_base import (MultiArmedBanditModels)
from src.cog_sci_roscorla_wagner_model import RoscorlaWagnerModel

np.random.seed(2024) # set seed for reproducibility

In [6]:
# load yaml config
_config = load_config('config.yaml')
logging.basicConfig(level=logging.INFO)
_config

{'processed_data': {'location': './data/processed_data/',
  'rat_experiment': {'metadata': 'attention_behaviorals_metadata.csv',
   'trials': 'attention_behaviorals_actions.pkl'},
  'human_pilot_experiment': {'metadata': 'attention_behaviorals_human_pilot_metadata.csv',
   'trials': 'attention_behaviorals_human_pilot_actions.pkl'},
  'human_motivation_experiment': {'metadata': 'attention_behaviorals_human_motivation_metadata.csv',
   'trials': 'attention_behaviorals_human_motivation_actions.pkl'}}}

----
# Experiment Tracker
----

In [None]:
class Tracking_Experiments:

    def __init__(self) -> None:
        self._results = self.init_results()

    def update_results(self, results):
        for key, val in results.items():
            
            # check if key is valid
            if key not in self._results.keys():
                raise ValueError(f"Key {key} not found in results")
            self._results[key].append(val)
        return self

    @staticmethod
    def init_results():
        return {
            # describe experiment
            'experiment_ID': [],
            'experiment': [],

            # describe input data
            'reward_rate': [],
            'action_rate': [],
            'corr_action_reward': [],
            'corr_stim_resp': [],

            'model_1_b_pred': [],
            'model_1_negLL': [],
            'model_1_BIC': [],

            'model_2_epsilon_pred': [],
            'model_2_negLL': [],
            'model_2_BIC': [],

            'model_3_alpha_pred': [],
            'model_3_theta_pred': [],
            'model_3_negLL': [],
            'model_3_BIC': [],
            'model_3_opt_init_params': []
        }


----
# Experiment Class
----


In [9]:
# Set up a local directory for MLflow tracking
def configure_mlflow_tracking(
        mlflow_tracking_dir = "./mlruns",
        experiment_name = "RL Experiment - Rat Data - EDS baseline"):
    if not os.path.exists(mlflow_tracking_dir):
        os.makedirs(mlflow_tracking_dir)
        
    print('Warning: using file based method for mlflow tracking.')
    mlflow.set_tracking_uri(f"file://{os.path.abspath(mlflow_tracking_dir)}")
    mlflow.set_experiment(experiment_name)

In [None]:
# qsd = QuerySequenceData(StimCode, RespCode)

In [7]:

def run_experiment_model_1(action_vector, reward_vector, init_guess=[0.5]):
    # fit model 1. Random Response Model
    rrm = RandomResponseModel()
    results = rrm.optimize_scikit(
        loss_function=rrm.neg_log_likelihood,
        init_guess=init_guess,
        args=(action_vector, reward_vector), bounds=[(0,1)])
    return results

def run_experiment_model_2(action_vector, reward_vector, init_guess=[0.5]):
    wsls = WinStayLoseShiftModel()
    results = wsls.optimize_scikit(
        loss_function=wsls.neg_log_likelihood,
        init_guess=init_guess,
        args=(action_vector, reward_vector), bounds=[(0,1)])
    return results

def run_experiment_model_3(action_vector, reward_vector):
    rwm = RoscorlaWagnerModel()
    results = rwm.optimize_scikit_model_over_init_parameters(
        actions=action_vector,
        rewards=reward_vector,
        loss_function=None,
        alpha_init_range=np.linspace(0, 1, 5),
        theta_init_range=np.linspace(.1, 10, 7),
        bounds=((0,1), (0.1, 10)),
        log_progress=False
        )
    negLL, params_opt, BIC, optimal_init_params = results
    return negLL, params_opt, BIC, optimal_init_params

In [8]:
def compute_experiment_features(qsd):
    _stimCount = Counter(qsd._stimCodeFlat)
    _respCount = Counter(qsd._respCodeFlat)
    action, reward = qsd._action, qsd._reward
    action_mean = np.mean(action)
    reward_mean = np.mean(reward)
    action_reward_corr = np.corrcoef(action, reward)[0,1]
    stim_resp_corr = np.corrcoef(qsd._stimCodeFlat, qsd._respCodeFlat)[0,1]

    return _stimCount, _respCount, action, reward, action_mean, reward_mean, action_reward_corr, stim_resp_corr


In [None]:

def generate_experiment(experiments):
    for idx, experiment in enumerate(experiments):
        yield idx, experiment


In [None]:
def print_progress_bar(iteration, total, length=50):
    percent = ("{0:.1f}").format(100 * (iteration / float(total)))
    filled_length = int(length * iteration // total)
    bar = '█' * filled_length + '-' * (length - filled_length)
    sys.stdout.write(f'\r|{bar}| {percent}% Complete')
    sys.stdout.flush()

# # Example loop with manual progress bar
# total = 100
# for i in range(total):
#     time.sleep(0.1)  # Simulate work being done
#     print_progress_bar(i + 1, total)

# print()  # Move to the next line after the progress bar is complete

def generate_experiment(experiments):
    for idx, experiment in enumerate(experiments):
        yield idx, experiment

def run_experiment(idx, exp, StimCode, RespCode, drop_n_trails=0):
    # extract experiment data
    qsd, action_vector, reward_vector = extract_experiment(exp, StimCode, RespCode)
    
    # drop n trails
    if drop_n_trails > 0:
        action_vector = action_vector[drop_n_trails:]
        reward_vector = reward_vector[drop_n_trails:]

    # generate metadata
    _stimCount, _respCount, action, reward, action_mean, reward_mean, action_reward_corr, stim_resp_corr = \
    compute_experiment_features(qsd)

    # print(f'exp: {idx}, action rate: {np.mean(action_vector)}, reward rate: {np.mean(reward_vector)}')

    # fit experiment 1
    result_model_1 = run_experiment_model_1(action_vector, reward_vector)
    result_model_2 = run_experiment_model_2(action_vector, reward_vector)
    result_model_3 = run_experiment_model_3(action_vector, reward_vector)

    # extract model 3 data (not dict)
    negLL, params_opt, BIC, optimal_init_params = result_model_3
    
    del qsd
    
    # result object
    _result = {
        # describe experiment
        'experiment_ID': idx, 'experiment': exp,

        # describe input data
        'reward_rate': reward_mean, 'action_rate': action_mean,
        'corr_action_reward': action_reward_corr, 'corr_stim_resp': stim_resp_corr,

        'model_1_b_pred': result_model_1['param_opt'][0],
        'model_1_negLL': result_model_1['negLL'],
        'model_1_BIC': result_model_1['BIC'],
        'model_2_epsilon_pred': result_model_2['param_opt'][0],
        'model_2_negLL': result_model_2['negLL'],
        'model_2_BIC': result_model_2['BIC'],
        'model_3_alpha_pred': params_opt[0],
        'model_3_theta_pred': params_opt[1],
        'model_3_negLL': negLL, 'model_3_BIC': BIC,
        'model_3_opt_init_params': optimal_init_params
    }

    return _result       

def run_experiment_suite(exp_gen, StimCode, RespCode, n_experiments=100, drop_n_trails=0, mlflow_tracking=True):
    results = init_results()

    for idx, exp in exp_gen:
        # print(f'Experiment: {idx}')
        print_progress_bar(idx + 1, n_experiments)

        _result = run_experiment(idx, exp, StimCode, RespCode, drop_n_trails=drop_n_trails)
        update_results(_result, results) # external store

        if mlflow_tracking:
            with mlflow.start_run(run_name=f'Experiment {idx}', nested=True):
                # extract experiment data
                _metrics = {k:v for k,v in _result.items() if isinstance(v, (int, float))}
                _params = {k:str(v) for k,v in _result.items() if not isinstance(v, (int, float))}

                # Log parameters and metrics to MLFlow
                mlflow.log_params(_result)
                mlflow.log_metrics(_metrics)

                mlflow.log_param('experiment_ID', idx)
                mlflow.log_param('experiment', exp)
                # [mlflow.log_param(k,v) for k,v in _params.items()]
        
    print('Runtime complete :).')
    return pd.DataFrame(results)


In [None]:
def print_progress_bar(iteration, total, length=50):
    percent = ("{0:.1f}").format(100 * (iteration / float(total)))
    filled_length = int(length * iteration // total)
    bar = '█' * filled_length + '-' * (length - filled_length)
    sys.stdout.write(f'\r|{bar}| {percent}% Complete')
    sys.stdout.flush()


In [5]:

class Experiments:
    pass

SyntaxError: expected ':' (295747153.py, line 1)