In [3]:
import pandas as pd
import numpy as np
import os
import shinobi_behav
import glob
import os.path as op
from shinobi_behav import DATA_PATH, FIG_PATH
import nibabel as nib
import json
import matplotlib.pyplot as plt
import retro
from datetime import datetime
from itertools import product
import dataframe_image as dfi

def count_frames(bk2_path):
    movie = retro.Movie()
    frame_count = 0
    while movie.step():
        frame_count += 1
    return frame_count

def get_rep_info(bk2_path):
    json_fname = bk2_path.replace(".bk2", ".json")
    with open(json_fname) as f:
        sidecar = json.load(f)
        duration = sidecar["duration"]
        cleared = sidecar["cleared"]
        kills = sidecar["enemies killed"]
        healthlosses = sidecar["total health lost"]
    return duration, cleared, kills, healthlosses




In [5]:
replayfile_list = sorted(glob.glob(op.join(DATA_PATH, "shinobi_released", "shinobi", "*", "*", "*", "*.bk2")))

sub_list = []
run_list = []
ses_list = []
duration_dict = []
clear_dict = []
kill_dict = []
healthloss_dict = []

data_dict = {"subject":[],
             "session":[],
             "run":[],
             "level":[],
             "N":[],
             "duration (h:m:s)":[],
             "cleared":[],
             "kills":[],
             "healthlosses":[]}
for replayfile in replayfile_list:
    if op.isfile(replayfile.replace(".bk2", ".json")):
        duration, cleared, kills, healthlosses = get_rep_info(replayfile)
        data_dict["subject"].append(replayfile.split('/')[7])
        data_dict["session"].append(replayfile.split('/')[8])
        data_dict["run"].append(replayfile.split('/')[-1].split('_')[3].split('-')[1])
        data_dict["level"].append(replayfile.split('/')[-1].split('_')[-2].replace('-', ' '))
        data_dict["duration (h:m:s)"].append(float(duration))
        data_dict["cleared"].append(int(cleared))
        data_dict["kills"].append(int(kills))
        data_dict["healthlosses"].append(int(healthlosses))
        data_dict["N"].append(1)

data_df = pd.DataFrame(data_dict)    
#data_df['duration'] = pd.to_timedelta(data_df['duration'], unit='s').dt.components[['hours', 'minutes', 'seconds']].astype(str).agg(':'.join, axis=1)
dataset_fname = op.join(DATA_PATH, "processed", "descriptive_scan_full.csv")
data_df.to_csv(op.join(DATA_PATH, "processed", "descriptive_scan_full.csv"), index=False)
#return data_df



In [9]:
# Pivot table
data_pivot = data_df.pivot_table(index='subject', columns='level', aggfunc=np.sum).swaplevel(axis=1).sort_index(axis=1)

# Add total column
total_df = data_df.groupby("subject").sum()
multiindex_tuples = [x for x in product(['Total'], total_df.columns)]
total_df.columns = pd.MultiIndex.from_tuples(multiindex_tuples)
data_pivot = pd.concat([data_pivot, total_df], axis=1)

# Add total row
data_pivot.loc['Total'] = data_pivot.sum()


# Convert time
def convert_seconds_to_time(df, col):
    df[col] = pd.to_timedelta(df[col], unit='s').dt.components[['hours', 'minutes', 'seconds']].astype(str).agg(':'.join, axis=1)
# Apply conversion function to each sub-column
for col in data_pivot.columns:
    if col[1] == 'duration (h:m:s)':
        convert_seconds_to_time(data_pivot, col)

for col in data_pivot.select_dtypes(include=['float64']):
    data_pivot[col] = data_pivot[col].astype(int)
    
data_pivot.to_csv(op.join(DATA_PATH, "processed", "descriptive_scan_pivot.csv"))
data_pivot

level,level 1,level 1,level 1,level 1,level 1,level 4,level 4,level 4,level 4,level 4,level 5,level 5,level 5,level 5,level 5,Total,Total,Total,Total,Total
Unnamed: 0_level_1,N,cleared,duration (h:m:s),healthlosses,kills,N,cleared,duration (h:m:s),healthlosses,kills,N,cleared,duration (h:m:s),healthlosses,kills,N,duration (h:m:s),cleared,kills,healthlosses
subject,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2
sub-01,89,84,3:57:26,126,2539,64,44,1:59:50,61,671,53,53,2:5:44,46,1712,206,8:3:1,181,4922,233
sub-02,81,76,3:41:53,108,2529,58,30,2:11:49,45,1085,55,47,2:37:34,192,2050,194,8:31:16,153,5664,345
sub-04,70,67,3:26:50,129,2255,56,29,2:6:0,41,1046,53,47,2:39:47,86,2090,179,8:12:39,143,5391,256
sub-06,46,45,3:29:24,64,1540,42,23,2:44:22,51,984,13,12,0:48:59,17,520,101,7:2:45,80,3044,132
Total,286,272,14:35:34,427,8863,220,126,9:2:2,198,3786,174,159,8:12:6,341,6372,680,7:49:43,557,19021,966


In [7]:
dfi.export(data_pivot, op.join(FIG_PATH, 'repetitions_description_scan.png'))

OSError: Chrome executable not able to be found on your machine

In [None]:


# Convert the styled DataFrame to an image
df_styled_to_image(data_pivot, op.join(FIG_PATH, 'repetitions_description_scan.png'))

In [None]:
# Number of usable repetitions

usable = np.sum(df["nrep_usable"])
total = np.sum(df["nrep_total"])
print(f"{usable}/{total}")

In [None]:
# total number of repetitions : 
grouped = df[["sub", "nlvl1", "nlvl4", "nlvl5"]].groupby('sub').sum()
grouped.loc['Total'] = grouped.sum(axis=0)
grouped['Total'] = grouped.sum(axis=1)
grouped

In [None]:
df_without_total = grouped.drop(columns='Total')
df_cumsum = df_without_total.cumsum(axis=1)

fig, ax = plt.subplots(figsize=(10, 6))

# Create a bar for each level, with the bottom at the cumulative sum of the previous levels
for i, col in enumerate(df_without_total.columns):
    ax.bar(df_without_total.index, df_without_total[col], bottom=(df_cumsum.iloc[:, i-1] if i>0 else 0), label=col)

plt.xlabel('Subject')
plt.ylabel('Count')
plt.title('Number of repetitions per level and per subject')
plt.legend()
plt.show()

In [None]:

eventsfile_list = sorted(glob.glob(op.join(DATA_PATH, "shinobi_released", "shinobi", "*", "*", "*", "*_desc-annotated_events.tsv")))
#def build_dataset_description(dataset_fname):
sub_list = []
run_list = []
ses_list = []
fmrifile_list = []

rep_dict = {"level 1":[],
            "level 4":[],
            "level 5":[]}
duration_dict = {"level 1":[],
                "level 4":[],
                "level 5":[]}
clear_dict = {"level 1":[],
              "level 4":[],
              "level 5":[]}
kill_dict = {"level 1":[],
             "level 4":[],
             "level 5":[]}
healthloss_dict = {"level 1":[],
                   "level 4":[],
                   "level 5":[]}



for events_file in eventsfile_list:
    print(events_file)
    # Get general info
    sub = events_file.split('/')[7]
    ses = events_file.split('/')[8]
    run = events_file.split('/')[-1].split('_')[3].split('-')[1][-1]

    # Check if fmrifile exists
    fmri_file = op.join(DATA_PATH, "shinobi.fmriprep", sub, ses, "func", f"{sub}_{ses}_task-shinobi_run-{run}_space-MNI152NLin2009cAsym_desc-preproc_bold.nii.gz")
    fmrifile_ok = op.isfile(fmri_file)
    if fmrifile_ok:
        nvol = nib.load(fmri_file).shape[-1]
        events = pd.read_csv(events_file, sep="\t")
        reps_df = events[events["trial_type"]=="gym-retro_game"]
        ntotreps = len(reps_df)
        # Init 
        usable_reps = 0
        nframes = {"level 1":0,
                   "level 4":0,
                   "level 5":0}
        clear = {"level 1":0,
                 "level 4":0,
                 "level 5":0}
        repetitions = {"level 1":0,
                       "level 4":0,
                       "level 5":0}
        kills = {"level 1":0,
                 "level 4":0,
                 "level 5":0}
        healthloss = {"level 1":0,
                      "level 4":0,
                      "level 5":0}
        
        for _, rep in reps_df.iterrows():
            if type(rep["stim_file"]) == str and not "Missing file" in rep["stim_file"]:
                bk2_path = op.join(DATA_PATH, "shinobi", rep["stim_file"])
                frame_count = count_frames(bk2_path)
                usable_reps += 1
                if rep["level"] == "level-1":
                    nlvl1 += 1
                    nframes_lvl1 += frame_count
                elif rep["level"] == "level-4":
                    nlvl4 += 1
                    nframes_lvl4 += frame_count
                elif rep["level"] == "level-5":
                    nlvl5 += 1
                    nframes_lvl5 += frame_count
                    
                json_fname = rep["stim_file"].replace(".bk2", ".json")
                with open(op.join(DATA_PATH, "shinobi", json_fname)) as f:
                    sidecar = json.load(f)
                nclear += int(sidecar["cleared"])
                nhealthloss += int(sidecar["total health lost"])
                nkill += int(sidecar["enemies killed"])

        sub_list.append(sub)
        ses_list.append(ses)
        run_list.append(f"run-0{run}")
        fmrifile_list.append(fmri_file)
        nvol_list.append(nvol)
        nrep_total_list.append(ntotreps)
        nrep_usable_list.append(usable_reps)
        nlvl1_list.append(nlvl1)
        nlvl4_list.append(nlvl4)
        nlvl5_list.append(nlvl5)
        nframes_lvl1_list.append(nframes_lvl1)
        nframes_lvl4_list.append(nframes_lvl4)
        nframes_lvl5_list.append(nframes_lvl5)
        
        nclear_list.append(nclear)
        nhealthloss_list.append(nhealthloss)
        nkill_list.append(nkill)



data_df = pd.DataFrame({
    "sub" : sub_list,
    "ses" : ses_list,
    "run" : run_list,
    "fmri_file" : fmrifile_list,
    "nvol" : nvol_list,
    "nrep_total" : nrep_total_list,
    "nrep_usable" : nrep_usable_list,
    "nlvl1" : nlvl1_list,
    "nlvl4" : nlvl4_list,
    "nlvl5" : nlvl5_list,
    "nclear" : nclear_list,
    "nhealthloss" : nhealthloss_list,
    "nkill" : nkill_list,

})
data_df.to_csv(dataset_fname, index=False)
    #return data_df


dataset_fname = op.join(DATA_PATH, "processed", "shinobi_dataset_description_test.csv")

#if not op.isfile(dataset_fname):
#    data_df = build_dataset_description(dataset_fname)
#else:
#    data_df = pd.read_csv(dataset_fname)