In [18]:
import random
import numpy as np
import pandas as pd
import pymc as pm
import arviz as az

from scipy.optimize import minimize

import matplotlib.pyplot as plt

from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.feature_selection import SelectFromModel

import json

In [19]:
dfBlInfo = pd.read_csv("combined_participant_info.csv")
dfBlTrial = pd.read_csv("combined_trials_new.csv")
dfRtInfo = pd.read_csv("participant_info.csv")
dfRtTrial = pd.read_csv("participant_trials.csv")

In [20]:
dfBlTrial

Unnamed: 0,id,trial,prob1,prob2,mag1,mag2,stimID,choice,happy_rating,gain_loss,group,context
0,P_MoodFlexi_01_Behavior10_01s2v0,1,0.2,0.8,20.0,40.0,2.0,1.0,,40.0,s2v,stable
1,P_MoodFlexi_01_Behavior10_01s2v0,2,0.2,0.8,10.0,60.0,2.0,1.0,,60.0,s2v,stable
2,P_MoodFlexi_01_Behavior10_01s2v0,3,0.2,0.8,40.0,20.0,1.0,0.0,,0.0,s2v,stable
3,P_MoodFlexi_01_Behavior10_01s2v0,4,0.2,0.8,10.0,80.0,2.0,1.0,0.225260,0.0,s2v,stable
4,P_MoodFlexi_01_Behavior10_01s2v0,5,0.2,0.8,40.0,10.0,1.0,0.0,,0.0,s2v,stable
...,...,...,...,...,...,...,...,...,...,...,...,...
11995,P_MoodFlexi_01_Behavior62_01v2s37,156,0.2,0.8,20.0,40.0,2.0,1.0,,40.0,v2s,stable
11996,P_MoodFlexi_01_Behavior62_01v2s37,157,0.2,0.8,60.0,10.0,2.0,1.0,0.647135,10.0,v2s,stable
11997,P_MoodFlexi_01_Behavior62_01v2s37,158,0.2,0.8,10.0,40.0,2.0,1.0,,40.0,v2s,stable
11998,P_MoodFlexi_01_Behavior62_01v2s37,159,0.2,0.8,40.0,40.0,2.0,1.0,,40.0,v2s,stable


### Blain

In [21]:
print(dfBlInfo.shape)

(75, 16)


In [22]:
dfBlInfo['id'].nunique()

75

In [23]:
# depression 확인: 우울 평가가 어렵다는 한계 보유
def categorize_bdi(score):
    if pd.isna(score):
        return "missing"
    elif score <= 13:
        return "minimal"
    elif score <= 19:
        return "mild"
    elif score <= 28:
        return "moderate"
    else:
        return "severe"

# Apply categorization
dfBlInfo['BDI_category'] = dfBlInfo['BDI_bdi_score'].apply(categorize_bdi)

# Count each category
bdi_counts = dfBlInfo['BDI_category'].value_counts()
bdi_counts

BDI_category
minimal     55
mild         9
moderate     9
severe       2
Name: count, dtype: int64

In [24]:
# PHQ로도 우울집단이 적음
def categorize_phq(score):
    if pd.isna(score):
        return "missing"
    elif score <= 4:
        return "minimal"
    elif score <= 9:
        return "mild"
    elif score <= 14:
        return "moderate"
    elif score <= 19:
        return "moderateSevere"
    else:
        return "severe"

# Apply categorization
dfBlInfo['PHQ_category'] = dfBlInfo['PHQ_PHQ_score'].apply(categorize_phq)

# Count each category
bdi_counts = dfBlInfo['PHQ_category'].value_counts()
bdi_counts

PHQ_category
minimal           38
mild              25
moderate           9
moderateSevere     2
severe             1
Name: count, dtype: int64

In [25]:
# 모델 피팅을 위해 CR 추가 (실험에 없으니 0으로 첨부 - 보장된 보상 없음)
dfBlTrial["CR"] = 0

In [26]:
print(dfBlTrial.shape) # 75*160

(12000, 13)


### Rutledge

In [10]:
dfRtInfo.shape

(1858, 21)

In [11]:
dfRtInfo.head()

Unnamed: 0,id,age,isFemale,location,lifeSatisfaction,education,nativeLanguage,deviceType,nPlays,timesPlayed,...,timeOfDay,designVersion,depStatus,depEpisodes,depYears,depMeds,depFamily,bdiDayNumber,bdiRaw,bdiTotal
0,89,5,1,400,7,1,1.0,i,2,1,...,0.991713,1,2,5,5,4,1,242,0,6
1,92,3,0,3,6,3,1.0,i,4,2,...,0.898831,1,2,5,5,4,2,229,0,6
2,106,4,1,3,7,3,1.0,i,7,1,...,0.039907,1,2,1,5,4,3,335,0,1
3,113,4,1,400,7,3,1.0,i,2,1,...,0.079664,1,3,5,5,3,2,268,0,2
4,194,6,0,900,6,3,1.0,i,2,1,...,0.324479,1,3,5,5,4,3,332,0,5


In [12]:
# Apply categorization
dfRtInfo['BDI_category'] = dfRtInfo['bdiTotal'].apply(categorize_bdi)

# Count each category
bdi_counts = dfRtInfo['BDI_category'].value_counts()
print(bdi_counts)

age_counts = dfRtInfo['age'].value_counts()
print(age_counts)

fm_counts = dfRtInfo['isFemale'].value_counts()
print(fm_counts)

fm_counts = dfRtInfo['isFemale'].value_counts()
print(fm_counts)

BDI_category
minimal     1224
mild         230
moderate     228
severe       176
Name: count, dtype: int64
age
3    511
4    375
1    319
2    283
5    228
6    129
7     13
Name: count, dtype: int64
isFemale
1    931
0    927
Name: count, dtype: int64
isFemale
1    931
0    927
Name: count, dtype: int64


In [13]:
dfRtTrial.shape

(55740, 14)

In [14]:
dfRtTrial['prob1'] = 0.5
dfRtTrial['prob2'] = 0.5


In [75]:
dfRtTrial.head()

Unnamed: 0,id,nTrial,riskySide,certainValue,winValue,loseValue,choseRisky,outcome,choiceRT,happiness,startValue,happinessRT,spinDuration,spinAngle,prob1,prob2
0,89,1.0,1.0,0.0,40.0,-14.0,1.0,40.0,9.784,70.0,50.0,7.467,4.382,6.239,0.5,0.5
1,89,2.0,1.0,55.0,97.0,0.0,1.0,97.0,5.083,,,,4.383,3.348,0.5,0.5
2,89,3.0,1.0,-45.0,0.0,-122.0,0.0,-45.0,9.6,49.0,50.0,7.349,,,0.5,0.5
3,89,4.0,1.0,-45.0,0.0,-90.0,1.0,-90.0,5.484,,,,4.383,2.72,0.5,0.5
4,89,5.0,1.0,55.0,124.0,0.0,1.0,124.0,4.384,,,,4.383,4.982,0.5,0.5


## Data Modification
- Rutledge
1. 확률 추가: certain(1), gamble(0.5) - prob1, prob2
2. 참가자별 Happiness 정규화

- Blain
1. outcome 열 명시적으로 

### Rutledge

In [76]:
# 1. 0이 아닌 값 추출 → value1, value2 설정
def assign_values(row):
    values = [row['certainValue'], row['winValue'], row['loseValue']]
    non_zero_vals = [v for v in values if v != 0]

    # 비어 있지 않다면 앞에서 두 개를 value1, value2로 사용
    if len(non_zero_vals) >= 2:
        return pd.Series({'value1': non_zero_vals[0], 'value2': non_zero_vals[1]})
    else:
        return pd.Series({'value1': row['winValue'], 'value2': row['loseValue']}) # gamble choice = 0 경우

dfRtTrial[['value1', 'value2']] = dfRtTrial.apply(assign_values, axis=1)

In [77]:
# 2. case 정의: CertainValue의 부호 기준
def determine_case(val):
    if val > 0:
        return 1
    elif val < 0:
        return 3 # certain gamble
    else:
        return 2 # gamble

dfRtTrial['case'] = dfRtTrial['certainValue'].apply(determine_case)

In [78]:
# # 3. prob1, prob2 설정
# def assign_probs(row):
#     if row['case'] in [1, 3]:
#         return pd.Series({'prob1': 1.0, 'prob2': 0.5})
#     elif row['case'] == 2:
#         return pd.Series({'prob1': 0.5, 'prob2': 0.5})
#     else:
#         return pd.Series({'prob1': np.nan, 'prob2': np.nan})

# dfRtTrial[['prob1', 'prob2']] = dfRtTrial.apply(assign_probs, axis=1)

In [79]:
# 4. winLose 정의: 1 win, 0 lose, -1 not gamble
def determine_case(row):
    if row["case"] == -1:            # gamble을 선택하지 않음
        return -1
    elif row["outcome"] == row["winValue"]:  # gamble 결과가 win
        return 1
    else:                            # gamble 결과가 lose
        return 0

dfRtTrial["winLose"] = dfRtTrial.apply(determine_case, axis=1)

In [80]:
# 5. happiness 정규화 → 'happiness_z' 열로 저장
dfRtTrial['happiness_z'] = (
    dfRtTrial['happiness'] - dfRtTrial['happiness'].mean()
    ) / dfRtTrial['happiness'].std()

In [81]:
dfRtTrial = dfRtTrial.rename(columns={"certainValue": "CR"})
dfRtTrial = dfRtTrial.rename(columns={"winValue": "mag1"})
dfRtTrial = dfRtTrial.rename(columns={"loseValue": "mag2"})

dfRtTrial

Unnamed: 0,id,nTrial,riskySide,CR,mag1,mag2,choseRisky,outcome,choiceRT,happiness,...,happinessRT,spinDuration,spinAngle,prob1,prob2,value1,value2,case,winLose,happiness_z
0,89,1.0,1.0,0.0,40.0,-14.0,1.0,40.0,9.784,70.0,...,7.467,4.382,6.239,0.5,0.5,40.0,-14.0,2,1,0.762380
1,89,2.0,1.0,55.0,97.0,0.0,1.0,97.0,5.083,,...,,4.383,3.348,0.5,0.5,55.0,97.0,1,1,
2,89,3.0,1.0,-45.0,0.0,-122.0,0.0,-45.0,9.600,49.0,...,7.349,,,0.5,0.5,-45.0,-122.0,3,0,-0.372028
3,89,4.0,1.0,-45.0,0.0,-90.0,1.0,-90.0,5.484,,...,,4.383,2.720,0.5,0.5,-45.0,-90.0,3,0,
4,89,5.0,1.0,55.0,124.0,0.0,1.0,124.0,4.384,,...,,4.383,4.982,0.5,0.5,55.0,124.0,1,1,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
55735,44455,26.0,1.0,55.0,85.0,0.0,0.0,55.0,3.532,,...,,,,0.5,0.5,55.0,85.0,1,0,
55736,44455,27.0,1.0,0.0,55.0,-63.0,0.0,0.0,2.865,,...,,,,0.5,0.5,55.0,-63.0,2,0,
55737,44455,28.0,1.0,45.0,79.0,0.0,0.0,45.0,4.566,59.0,...,2.250,,,0.5,0.5,45.0,79.0,1,0,0.168166
55738,44455,29.0,1.0,-55.0,0.0,-79.0,1.0,0.0,3.284,,...,,-5.582,3.603,0.5,0.5,-55.0,-79.0,3,1,


### Blain

In [27]:
# dfBlTrial['outcome'] = np.where(dfBlTrial['choice'] == 0, dfBlTrial['mag1'], dfBlTrial['mag2']) : gainloss임
dfBlTrial['winLose'] = np.where(dfBlTrial['gain_loss'] > 0, 1, 0)

In [28]:
dfBlTrial["choseRisky"] = 1.0
dfBlTrial

Unnamed: 0,id,trial,prob1,prob2,mag1,mag2,stimID,choice,happy_rating,gain_loss,group,context,CR,winLose,choseRisky
0,P_MoodFlexi_01_Behavior10_01s2v0,1,0.2,0.8,20.0,40.0,2.0,1.0,,40.0,s2v,stable,0,1,1.0
1,P_MoodFlexi_01_Behavior10_01s2v0,2,0.2,0.8,10.0,60.0,2.0,1.0,,60.0,s2v,stable,0,1,1.0
2,P_MoodFlexi_01_Behavior10_01s2v0,3,0.2,0.8,40.0,20.0,1.0,0.0,,0.0,s2v,stable,0,0,1.0
3,P_MoodFlexi_01_Behavior10_01s2v0,4,0.2,0.8,10.0,80.0,2.0,1.0,0.225260,0.0,s2v,stable,0,0,1.0
4,P_MoodFlexi_01_Behavior10_01s2v0,5,0.2,0.8,40.0,10.0,1.0,0.0,,0.0,s2v,stable,0,0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11995,P_MoodFlexi_01_Behavior62_01v2s37,156,0.2,0.8,20.0,40.0,2.0,1.0,,40.0,v2s,stable,0,1,1.0
11996,P_MoodFlexi_01_Behavior62_01v2s37,157,0.2,0.8,60.0,10.0,2.0,1.0,0.647135,10.0,v2s,stable,0,1,1.0
11997,P_MoodFlexi_01_Behavior62_01v2s37,158,0.2,0.8,10.0,40.0,2.0,1.0,,40.0,v2s,stable,0,1,1.0
11998,P_MoodFlexi_01_Behavior62_01v2s37,159,0.2,0.8,40.0,40.0,2.0,1.0,,40.0,v2s,stable,0,1,1.0


In [29]:
# 2. Expected reward 정규화
# Inspect reward magnitude columns (mag1, mag2)
mag1_min, mag1_max = dfBlTrial['mag1'].min(), dfBlTrial['mag1'].max()
mag2_min, mag2_max = dfBlTrial['mag2'].min(), dfBlTrial['mag2'].max()

# Apply min-max normalization to [0, 1] scale
dfBlTrial['mag1_norm'] = (dfBlTrial['mag1'] - mag1_min) / (mag1_max - mag1_min)
dfBlTrial['mag2_norm'] = (dfBlTrial['mag2'] - mag2_min) / (mag2_max - mag2_min)

dfBlTrial["outcome"] = np.where(
    dfBlTrial["gain_loss"] == 0, 
    0,
    np.where(dfBlTrial["choice"] == 0, dfBlTrial["mag1_norm"], dfBlTrial["mag2_norm"])
)

In [30]:
dfBlTrial = dfBlTrial.rename(columns={"happy_rating": "happiness"})


In [31]:
# 5. 저장 - Norm, Norm
dfBlTrial.to_csv("Blain_trials_modified.csv", index=False)

In [32]:
dfBlTrial

Unnamed: 0,id,trial,prob1,prob2,mag1,mag2,stimID,choice,happiness,gain_loss,group,context,CR,winLose,choseRisky,mag1_norm,mag2_norm,outcome
0,P_MoodFlexi_01_Behavior10_01s2v0,1,0.2,0.8,20.0,40.0,2.0,1.0,,40.0,s2v,stable,0,1,1.0,0.142857,0.428571,0.428571
1,P_MoodFlexi_01_Behavior10_01s2v0,2,0.2,0.8,10.0,60.0,2.0,1.0,,60.0,s2v,stable,0,1,1.0,0.000000,0.714286,0.714286
2,P_MoodFlexi_01_Behavior10_01s2v0,3,0.2,0.8,40.0,20.0,1.0,0.0,,0.0,s2v,stable,0,0,1.0,0.428571,0.142857,0.000000
3,P_MoodFlexi_01_Behavior10_01s2v0,4,0.2,0.8,10.0,80.0,2.0,1.0,0.225260,0.0,s2v,stable,0,0,1.0,0.000000,1.000000,0.000000
4,P_MoodFlexi_01_Behavior10_01s2v0,5,0.2,0.8,40.0,10.0,1.0,0.0,,0.0,s2v,stable,0,0,1.0,0.428571,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11995,P_MoodFlexi_01_Behavior62_01v2s37,156,0.2,0.8,20.0,40.0,2.0,1.0,,40.0,v2s,stable,0,1,1.0,0.142857,0.428571,0.428571
11996,P_MoodFlexi_01_Behavior62_01v2s37,157,0.2,0.8,60.0,10.0,2.0,1.0,0.647135,10.0,v2s,stable,0,1,1.0,0.714286,0.000000,0.000000
11997,P_MoodFlexi_01_Behavior62_01v2s37,158,0.2,0.8,10.0,40.0,2.0,1.0,,40.0,v2s,stable,0,1,1.0,0.000000,0.428571,0.428571
11998,P_MoodFlexi_01_Behavior62_01v2s37,159,0.2,0.8,40.0,40.0,2.0,1.0,,40.0,v2s,stable,0,1,1.0,0.428571,0.428571,0.428571


### Rescale 커지게

In [33]:
dfBlCopy = dfBlTrial.copy()

In [35]:
dfBlCopy["mag1_norm"] = dfBlTrial["mag1_norm"] * 100 
dfBlCopy["mag2_norm"] = dfBlTrial["mag2_norm"] * 100 
dfBlCopy["outcome"] = dfBlTrial["outcome"] * 100 

In [37]:
dfBlCopy["happiness"]= dfBlTrial["happiness"] * 100 - 50

In [38]:
dfBlCopy

Unnamed: 0,id,trial,prob1,prob2,mag1,mag2,stimID,choice,happiness,gain_loss,group,context,CR,winLose,choseRisky,mag1_norm,mag2_norm,outcome
0,P_MoodFlexi_01_Behavior10_01s2v0,1,0.2,0.8,20.0,40.0,2.0,1.0,,40.0,s2v,stable,0,1,1.0,14.285714,42.857143,42.857143
1,P_MoodFlexi_01_Behavior10_01s2v0,2,0.2,0.8,10.0,60.0,2.0,1.0,,60.0,s2v,stable,0,1,1.0,0.000000,71.428571,71.428571
2,P_MoodFlexi_01_Behavior10_01s2v0,3,0.2,0.8,40.0,20.0,1.0,0.0,,0.0,s2v,stable,0,0,1.0,42.857143,14.285714,0.000000
3,P_MoodFlexi_01_Behavior10_01s2v0,4,0.2,0.8,10.0,80.0,2.0,1.0,-27.473958,0.0,s2v,stable,0,0,1.0,0.000000,100.000000,0.000000
4,P_MoodFlexi_01_Behavior10_01s2v0,5,0.2,0.8,40.0,10.0,1.0,0.0,,0.0,s2v,stable,0,0,1.0,42.857143,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11995,P_MoodFlexi_01_Behavior62_01v2s37,156,0.2,0.8,20.0,40.0,2.0,1.0,,40.0,v2s,stable,0,1,1.0,14.285714,42.857143,42.857143
11996,P_MoodFlexi_01_Behavior62_01v2s37,157,0.2,0.8,60.0,10.0,2.0,1.0,14.713542,10.0,v2s,stable,0,1,1.0,71.428571,0.000000,0.000000
11997,P_MoodFlexi_01_Behavior62_01v2s37,158,0.2,0.8,10.0,40.0,2.0,1.0,,40.0,v2s,stable,0,1,1.0,0.000000,42.857143,42.857143
11998,P_MoodFlexi_01_Behavior62_01v2s37,159,0.2,0.8,40.0,40.0,2.0,1.0,,40.0,v2s,stable,0,1,1.0,42.857143,42.857143,42.857143


In [40]:
dfBlCopy.to_csv("Blain_trials_rescaled.csv", index=False)

### Rutledge 줄이기

In [None]:
# 2. Categorize BDI into clinical categories
def categorize_bdi(score):
    if score <= 13:
        return "minimal"
    elif score <= 19:
        return "mild"
    elif score <= 28:
        return "moderate"
    else:
        return "severe"

dfRtInfo["BDI_category"] = dfRtInfo["bdiTotal"].apply(categorize_bdi)

# 3. Collapse age group: 1–2 = A, 3–4 = B, 5–7 = C
def collapse_age(age_code):
    if age_code in [1, 2]:
        return "A"
    elif age_code in [3, 4]:
        return "B"
    else:
        return "C"

dfRtInfo["age_group"] = dfRtInfo["age"].apply(collapse_age)

# 4. Create stratification variable
dfRtInfo["strata"] = dfRtInfo["isFemale"].astype(str) + "_" + dfRtInfo["BDI_category"] + "_" + dfRtInfo["age_group"]

# 5. Drop missing values
dfRtInfo_clean = dfRtInfo.dropna(subset=["strata"])
dfRtInfo_unique = dfRtInfo_clean.drop_duplicates(subset="id")

# 6. Stratified sampling (500 participants)
sss = StratifiedShuffleSplit(n_splits=1, test_size=200, random_state=42)
for _, sample_idx in sss.split(dfRtInfo_unique, dfRtInfo_unique["strata"]):
    sampled_ids = dfRtInfo_unique.iloc[sample_idx]["id"].values

# 7. Subset full trials to only include those participants
dfRtTrial_sampled = dfRtTrial[dfRtTrial["id"].isin(sampled_ids)].copy()
dfRtInfo_sampled = dfRtInfo[dfRtInfo["id"].isin(sampled_ids)].copy()

# 8. (Optional) Save results
dfRtTrial_sampled.to_csv("Rutledge_sampled_trials.csv", index=False)
dfRtInfo_sampled.to_csv("Rutledge_sampled_info.csv", index=False)


In [None]:
print(dfRtTrial_sampled.shape)
print(dfRtInfo_sampled.shape)

In [None]:
# Apply categorization
dfRtInfo = dfRtInfo_sampled
dfRtInfo['BDI_category'] = dfRtInfo['bdiTotal'].apply(categorize_bdi)

# Count each category
bdi_counts = dfRtInfo['BDI_category'].value_counts()
print(bdi_counts)

age_counts = dfRtInfo['age'].value_counts()
print(age_counts)

fm_counts = dfRtInfo['isFemale'].value_counts()
print(fm_counts)

fm_counts = dfRtInfo['isFemale'].value_counts()
print(fm_counts)

In [None]:
# 저장
dfRtTrial_sampled.to_csv("Rutledge_trials_modified.csv", index=False)
dfRtInfo_sampled.to_csv("Rutledge_info_modified.csv", index=False)

### For mini

In [None]:
# 6. Stratified sampling (500 participants)
sss = StratifiedShuffleSplit(n_splits=1, test_size=24, random_state=42)
for _, sample_idx in sss.split(dfRtInfo_unique, dfRtInfo_unique["strata"]):
    sampled_ids = dfRtInfo_unique.iloc[sample_idx]["id"].values
    
# 7. Subset full trials to only include those participants
dfRtTrial_sampled = dfRtTrial[dfRtTrial["id"].isin(sampled_ids)].copy()
dfRtInfo_sampled = dfRtInfo[dfRtInfo["id"].isin(sampled_ids)].copy()

# 8. (Optional) Save results
dfRtTrial_sampled.to_csv("Rutledge_trials_fortest.csv", index=False)
dfRtInfo_sampled.to_csv("Rutledge_info_fortest.csv", index=False)

print(dfRtTrial_sampled.shape)
print(dfRtInfo_sampled.shape)

## Data Split

In [3]:
dfRtInfo = pd.read_csv("Rutledge_info_modified.csv")
dfRtTrial = pd.read_csv("Rutledge_trials_modified.csv")

In [25]:
dfRtInfo_low = dfRtInfo[(dfRtInfo["BDI_category"] == "minimal") | (dfRtInfo["BDI_category"] == "mild")]
dfRtInfo_low["BDI_category"].value_counts()

BDI_category
minimal    133
mild        24
Name: count, dtype: int64

In [26]:
dfRtInfo_high = dfRtInfo[(dfRtInfo["BDI_category"] != "minimal") & (dfRtInfo["BDI_category"] != "mild")]
dfRtInfo_high["BDI_category"].value_counts()

BDI_category
moderate    25
severe      18
Name: count, dtype: int64

In [27]:
# 1. 낮은 BDI 그룹의 ID 목록 추출
low_ids = dfRtInfo_low["id"].unique()

# 2. dfRtTrial에서 해당 ID에 해당하는 행만 필터링
dfRtTrial_low = dfRtTrial[dfRtTrial["id"].isin(low_ids)]
dfRtTrial_low

Unnamed: 0,id,nTrial,riskySide,CR,mag1,mag2,choseRisky,outcome,choiceRT,happiness,...,happinessRT,spinDuration,spinAngle,prob1,prob2,value1,value2,case,winLose,happiness_z
0,1105,1.0,1.0,0.0,55.0,-35.0,1.0,55.0,8.293,79.0,...,7.600,4.396,4.945,0.5,0.5,55.0,-35.0,2,1,1.248555
1,1105,2.0,1.0,-45.0,0.0,-85.0,0.0,-45.0,7.499,,...,,,,0.5,0.5,-45.0,-85.0,3,0,
2,1105,3.0,1.0,-30.0,0.0,-96.0,0.0,-30.0,4.440,77.0,...,5.922,,,0.5,0.5,-30.0,-96.0,3,0,1.140516
3,1105,4.0,1.0,45.0,85.0,0.0,1.0,85.0,3.820,,...,,4.416,3.186,0.5,0.5,45.0,85.0,1,1,
4,1105,5.0,1.0,55.0,103.0,0.0,0.0,55.0,6.981,,...,,,,0.5,0.5,55.0,103.0,1,0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5965,44160,26.0,1.0,0.0,75.0,-37.0,1.0,75.0,3.167,,...,,-4.083,4.256,0.5,0.5,75.0,-37.0,2,1,
5966,44160,27.0,1.0,-35.0,0.0,-66.0,1.0,-66.0,3.033,,...,,-5.434,2.801,0.5,0.5,-35.0,-66.0,3,0,
5967,44160,28.0,1.0,55.0,92.0,0.0,1.0,92.0,3.317,64.0,...,2.433,-5.367,4.665,0.5,0.5,55.0,92.0,1,1,0.438263
5968,44160,29.0,1.0,-35.0,0.0,-66.0,1.0,-66.0,5.133,,...,,-4.202,1.026,0.5,0.5,-35.0,-66.0,3,0,


In [28]:
# 1. 낮은 BDI 그룹의 ID 목록 추출
high_ids = dfRtInfo_high["id"].unique()

# 2. dfRtTrial에서 해당 ID에 해당하는 행만 필터링
dfRtTrial_high = dfRtTrial[dfRtTrial["id"].isin(high_ids)]
dfRtTrial_high

Unnamed: 0,id,nTrial,riskySide,CR,mag1,mag2,choseRisky,outcome,choiceRT,happiness,...,happinessRT,spinDuration,spinAngle,prob1,prob2,value1,value2,case,winLose,happiness_z
180,4344,1.0,1.0,-35.0,0.0,-64.0,1.0,0.0,3.985,84.0,...,5.883,4.382,5.359,0.5,0.5,-35.0,-64.0,3,1,1.518652
181,4344,2.0,1.0,-30.0,0.0,-96.0,1.0,0.0,2.401,,...,,4.382,4.228,0.5,0.5,-30.0,-96.0,3,1,
182,4344,3.0,1.0,-35.0,0.0,-112.0,1.0,0.0,5.383,89.0,...,3.800,4.383,5.233,0.5,0.5,-35.0,-112.0,3,1,1.788749
183,4344,4.0,1.0,0.0,40.0,-8.0,0.0,0.0,2.116,,...,,,,0.5,0.5,40.0,-8.0,2,0,
184,4344,5.0,1.0,0.0,40.0,-8.0,1.0,-8.0,1.601,,...,,4.382,0.207,0.5,0.5,40.0,-8.0,2,0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5995,44217,26.0,1.0,55.0,105.0,0.0,1.0,105.0,4.450,,...,,-4.917,6.134,0.5,0.5,55.0,105.0,1,1,
5996,44217,27.0,1.0,-55.0,0.0,-66.0,1.0,-66.0,3.460,,...,,3.935,2.081,0.5,0.5,-55.0,-66.0,3,0,
5997,44217,28.0,1.0,45.0,124.0,0.0,1.0,124.0,3.432,74.0,...,5.999,4.434,5.958,0.5,0.5,45.0,124.0,1,1,0.978457
5998,44217,29.0,1.0,45.0,85.0,0.0,1.0,85.0,3.419,,...,,-4.099,3.873,0.5,0.5,45.0,85.0,1,1,


In [29]:
# 저장
dfRtTrial_low.to_csv("Rutledge_trials_lowBDI.csv", index=False)
dfRtInfo_low.to_csv("Rutledge_info_lowBDI.csv", index=False)

dfRtTrial_high.to_csv("Rutledge_trials_highBDI.csv", index=False)
dfRtInfo_high.to_csv("Rutledge_info_highBDI.csv", index=False)

## Then, use separately for the best model

### Blain

In [16]:
dfBlInfoS2v = pd.read_csv("stable2volatile_participant_info.csv")
dfBlTrialS2v = pd.read_csv("s2v_new_trial_data.csv")

In [17]:
dfBlInfoV2s = pd.read_csv("volatile2stable_participant_info.csv")
dfBlTrialV2s = pd.read_csv("v2s_new_trial_data.csv")

In [None]:
## Then, use it as mu = w0 * bdi_group[i] + ... for the best model