In [3]:
import pandas as pd

# Load Dataset 1 (ACE_Group)
dataset1_path = '/scratch/connectome/justin/ACE_dataset.csv'
df_group = pd.read_csv(dataset1_path)

# Load Dataset 2 (Trial observations)
dataset2_path = '/scratch/connectome/justin/ACE_trial_data.csv'
df_trials = pd.read_csv(dataset2_path)

# Step 1: Convert "L" to 0 in "Apple_return" column in Dataset 2
df_trials.loc[df_trials["Apple_return"] == "L", "Apple_return"] = 0

# Step 2: Divide observations into two separate csv files based on group assignment
group_1_participants = df_group[df_group["ACE_Group"] == 0]["Participant.ID"].tolist()
group_2_participants = df_group[df_group["ACE_Group"] == 1]["Participant.ID"].tolist()

df_group1 = df_trials[df_trials["Participant.ID"].isin(group_1_participants)]
df_group2 = df_trials[df_trials["Participant.ID"].isin(group_2_participants)]

selected_columns = ["Trial_number", "Participant.ID", "Environment", "Response", "Apple_return"]
df_group11 = df_group1[selected_columns]
df_group22 = df_group2[selected_columns]

# Save the divided datasets to separate CSV files
df_group11.to_csv("/scratch/connectome/justin/group1_dataset.csv", index=False)
df_group22.to_csv("/scratch/connectome/justin/group2_dataset.csv", index=False)


In [4]:
df_group11.head()

Unnamed: 0,Trial_number,Participant.ID,Environment,Response,Apple_return
173,3.0,1350320,1,S,9
174,5.0,1350320,1,S,8
175,7.0,1350320,1,S,8
176,9.0,1350320,1,S,7
177,11.0,1350320,1,S,7


In [13]:
import gymnasium as gym
from gymnasium import spaces
import pandas as pd
import numpy as np

class PFEnv(gym.Env):
    def __init__(self, dataset_path, seed=2023):
        # Load the dataset
        self.df = pd.read_csv(dataset_path)

        # Define the action and observation space
        self.action_space = spaces.Discrete(2)
        self.observation_space = spaces.Discrete(2)

        # Initialize the current trial number and participant index
        self.current_trial = 0
        self.current_participant = 0

        # Initialize the environment
        self.reset()
        
    def reset(self):
        # Reset the environment to start a new episode
        self.current_trial = 0
        self.current_participant = 0
        self.total_return = 0

        # Check if there are trials for the current participant
        if self.current_participant not in self.df["Participant.ID"].unique():
            return None

        # Get the participant's data for the current trial
        participant_data = self.df[
            (self.df["Participant.ID"] == self.current_participant) &
            (self.df["Trial_number"] == self.current_trial)
        ]

        # Check if participant_data is empty
        if participant_data.empty:
            return None

        # Get the current state and reward
        state = self._get_state(participant_data)
        reward = participant_data["Apple_return"].values[0]

        return state

    def _get_state(self, data):
        # Check if data is empty
        if data.empty:
            return None

        # Map the environment to an integer (0 or 1)
        state = 0 if data["Environment"].values[0] == "Environment_1" else 1

        return state

    def step(self, action):
        # Execute the chosen action and observe the new state and reward
        self.current_trial += 1

        # Check if the episode is over
        done = False
        if self.current_trial >= len(self.df[self.df["Participant.ID"] == self.current_participant]):
            self.current_trial = 0
            self.current_participant += 1
            done = self.current_participant >= self.df["Participant.ID"].nunique()

        # Get the participant's data for the current trial
        participant_data = self.df[
            (self.df["Participant.ID"] == self.current_participant) &
            (self.df["Trial_number"] == self.current_trial)
        ]

        # Get the next state and reward
        next_state = self._get_state(participant_data)
        reward = participant_data["Apple_return"].values[0]

        # Update the total return
        self.total_return += reward

        return next_state, reward, done, {}

    def render(self, mode='human'):
        # Print the current state and total return
        print(f"Participant: {self.current_participant}, Trial: {self.current_trial}, State: {self.state}, Total Return: {self.total_return}")
        
    def seed(self, seed):
        np.random.seed(seed)

In [14]:
dataset_path = "/scratch/connectome/justin/group1_dataset.csv"
env = PFEnv(dataset_path, seed=2023)

In [23]:
import tianshou as ts

train_envs = ts.env.DummyVectorEnv([lambda: PFEnv(dataset_path, seed=2023) for _ in range(10)])
test_envs = ts.env.DummyVectorEnv([lambda: PFEnv(dataset_path, seed=2023) for _ in range(100)])

state_shape = env.observation_space.n  # equivalent to 4 for CartPole-v1
action_shape = env.action_space.n  # equivalent to 2 for CartPole-v1

print(state_shape)
print(action_shape)