# Exploratory Data Analysis
----

Objective: Determine if the data is encoded as expected.

----
```
: 26.06.24
: zach.wolpe@medibio.com.au
```
----

In [1]:
import pandas as pd
import numpy as np
import scipy.io
import logging
import pprint
import os
import plotly.graph_objects as go
import plotly.express as px
from tqdm import tqdm


# src
from src.helpers import log_rat_metadata, log_sequence_data, load_config
from src.query_dataset import (QuerySequenceData, load_data)

In [2]:
from src.rescorla_wagner_model import (RoscorlaWagner)
from src.rescorla_wagner_model_plots import (RescorlaWagnerPlots)
from src.rescorla_wagner_model_simulation import (RescorlaWagnerSimulate)
from src.rescorla_wagner_model_diagnostics import (RoscorlaWagerModelDiagnostics)

In [3]:
# load yaml config
_config = load_config('config.yaml')
logging.basicConfig(level=logging.INFO)

----
# Human Data: Pilot
----

In [16]:
(data,
 sequence_data,
 meta_data,
 StimCode,
 RespCode
 ) = load_data(config=_config, experiment_class='human_pilot_experiment')

INFO:root:StimCode (284): [0. 1.]
INFO:root:RespCode (284): [1. 4.]


In [17]:
def calc_reward_rate(
        qsd,
        subject_ID = 1,
        session_Type = 'training',
        session_Num = None):
    
    KEY = (subject_ID, session_Type, session_Num)

    # filter sequence data based on subjectID and sessionType
    # qsd.filter_sequences(subjectID=KEY[0], sessionType=KEY[1], sessionNum=KEY[2])
    
    # extract stimCode and respCode
    qsd.filter_sequences(*KEY)
    # qsd.filter_sequences(KEY[0], KEY[1], KEY[2])


    # qsd.return_experiment_vectors(**KEY)

    # Extract stimData & respData: (assuming sorting isn't required.)
    qsd.extract_stim_resp_data()

    # infer action / reward pairs
    qsd.infer_action_reward_pairs()
    # print('Overall Reward Rate: ', np.mean(qsd._reward))
    return np.mean(qsd._reward), qsd


# example usage
qsd = QuerySequenceData(StimCode, RespCode)
calc_reward_rate(qsd, subject_ID=1, session_Type='training', session_Num=1)

(0.6, <src.query_dataset.QuerySequenceData at 0x178ea5c10>)

### Calc Reward Rate per experiment

In [18]:

def unique_experiments(StimCode):
    subjects = list(np.unique([i[0] for i in StimCode.keys()]))
    session_types = list(np.unique([i[1] for i in StimCode.keys()]))
    session_numbers = list(np.unique([i[2] for i in StimCode.keys()]))
    return subjects, session_types, session_numbers


def generate_all_experiments(StimCode):
    """Generate all unique experiment sets."""
    subjects, session_types, session_numbers = unique_experiments(StimCode)
    for _sub in subjects:
        for _sess in session_types:
            for _num in session_numbers:
                yield (_sub, _sess, _num)


def calc_reward_for_experiment(qsd, subject_ID, session_Type, session_Num):
    """Calculate reward rate for a single experiment."""
    _reward_rate, qsd = calc_reward_rate(qsd, subject_ID=subject_ID, session_Type=session_Type, session_Num=session_Num)
    return _reward_rate, qsd


def reward_per_experiment(qsd, StimCode):
    results = {'subject':[], 'sessionType':[],'sessionNum':[], 'reward_rate':[]}

    # create experiment geneator
    experiments = generate_all_experiments(StimCode)

    # run experiments
    for _sub, _sess, _num in tqdm(experiments):
        _reward_rate, qsd = calc_reward_for_experiment(qsd, subject_ID=_sub, session_Type=_sess, session_Num=_num)
        # print(f'Subject: {_sub}, Session Type: {_sess}, Session Number: {_num}, Reward Rate: {_reward_rate}')
        results['subject'].append(_sub)
        results['sessionType'].append(_sess)
        results['sessionNum'].append(_num)
        results['reward_rate'].append(_reward_rate)

    return pd.DataFrame(results), qsd

    # return pd.DataFrame(results), qsd


In [19]:
# experiments = generate_all_experiments(StimCode)
# for (_sub, _sess, _num) in experiments:
#     print(f'Subject: {_sub}, Session Type: {_sess}, Session Number: {_num}.')


#     # get experiment
#     qsd = QuerySequenceData(StimCode, RespCode)
#     qsd.filter_sequences(subjectID=_sub, sessionType=_sess, sessionNum=_num)
#     # qsd._stimCode
#     # qsd._respCode
#     # _reward, _ = calc_reward_for_experiment(qsd, subject_ID=_sub, session_Type=_sess, session_Num=_num)
#     break
    
# qsd._stimCode.keys()
    

---
# Assess Learning Rates
----
### Calc Reward Rate per session

In [20]:
(data,
 sequence_data,
 meta_data,
 StimCode,
 RespCode
 ) = load_data(config=_config, experiment_class='human_pilot_experiment')
qsd = QuerySequenceData(StimCode, RespCode)
rewards, _qsd = reward_per_experiment(qsd, StimCode)

# drop na
rewards = rewards[~rewards.reward_rate.isna()]

INFO:root:StimCode (284): [0. 1.]
INFO:root:RespCode (284): [1. 4.]

Mean of empty slice.


invalid value encountered in scalar divide

1620it [00:00, 17398.05it/s]


In [28]:
rewards.groupby(['subject', 'sessionType'])[['reward_rate']].mean().sort_values(by='reward_rate', ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,reward_rate
subject,sessionType,Unnamed: 2_level_1
3,ids1,1.000000
17,ams2,1.000000
15,eds2,1.000000
3,ams1,1.000000
19,ams1,0.991667
...,...,...
18,ids2,0.500000
18,staircase2,0.500000
9,eds1,0.483333
22,ams2,0.441667


In [9]:

def plot_reward_experiment(rewards, title='Reward Rates'):
    fig = px.scatter(rewards, x='sessionNum', y='reward_rate', color='subject', facet_col='sessionType')
    # fig = px.scatter(rewards, x='sessionNum', y='reward_rate', color='sessionType')
    fig.update_layout(template='plotly_dark', title=title)
    # fig.update_xaxes(title_text='Session Number')
    # fig.update_yaxes(title_text='Reward Rate')
    
    # remove multiple x axes titles
    # fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
    # fig.for_each_annotation(lambda a: None)

    return fig

plot_reward_experiment(rewards, title='Reward Rates: Human Pilot Experiment')


In [10]:
def plot_boxes(rewards, title='Distribution of Reward Rates'):
    fig = px.box(rewards, x='sessionType', y='reward_rate', color='subject')
    fig.update_layout(template='plotly_dark', title=title)
    fig.update_xaxes(title='Session Type')
    fig.update_yaxes(title='Reward Rate')

    return fig



plot_boxes(rewards, title='Distribution of Reward Rates: Human Pilot Experiment')

In [11]:
def plot_box(subject=1, rewards=rewards, title=None):
    """
    Plot box plot of reward rates for a given subject.
    """
    if title is None:
        title = f'Distribution of Reward Rates for Subject {subject}'
    _df = pd.DataFrame(rewards)
    _df = _df[_df['subject'] == subject]
    fig = go.Figure()
    fig.add_trace(go.Box(x=_df['sessionType'], y=_df['reward_rate'], name='Reward Rate'))
    fig.update_layout(template='plotly_dark', title=title)
    fig.update_xaxes(title='Session Type')
    fig.update_yaxes(title='Reward Rate')
    return fig

plot_box(1).show()
plot_box(2).show()


---
# Human Data: Motivation
----

In [12]:
(data,
 sequence_data,
 meta_data,
 StimCode,
 RespCode
 ) = load_data(config=_config, experiment_class='human_motivation_experiment')
qsd = QuerySequenceData(StimCode, RespCode)
rewards, _qsd = reward_per_experiment(qsd, StimCode)

# drop na
rewards = rewards[~rewards.reward_rate.isna()]

# plot
plot_reward_experiment(rewards, title='Reward Rates: Human Motivation Experiment')

INFO:root:StimCode (245): [0. 1.]
INFO:root:RespCode (245): [1. 4.]

Mean of empty slice.


invalid value encountered in scalar divide

2268it [00:00, 23377.79it/s]


----
# Rat Data
----

In [13]:
(data,
 sequence_data,
 meta_data,
 StimCode,
 RespCode
 ) = load_data(config=_config, experiment_class='rat_experiment')
qsd = QuerySequenceData(StimCode, RespCode)
rewards, _qsd = reward_per_experiment(qsd, StimCode)

# drop na
rewards = rewards[~rewards.reward_rate.isna()]

# plot
plot_reward_experiment(rewards, title='Reward Rates: Rat Experiment')



INFO:root:StimCode (571): [0. 1.]
INFO:root:RespCode (571): [1. 2. 3. 4.]

Mean of empty slice.


invalid value encountered in scalar divide

2730it [00:00, 3682.06it/s]
