# Global Analysis Pipeline

## Imports

In [44]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import os
pd.set_option('display.max_columns', None)

## Global variables / settings for this notebook

In [45]:
# subject-independant directory and filename of raw-data
input_subject_folder = f"..\\data\\preprocessed\\"
# path to stimuli of experiment
exp_stimuli_folder = "..\\stimuli\\"
# set global colormap var
cmp = plt.cm.viridis_r
# display resolution
display = {"width": 1920, "height": 1080}
# Define the folder name
plt_folder_name = f'..\\plots\\global'# Define the global folder name

In [46]:
li = []

for filename in ["subject-6.tsv", "subject-3.tsv"]:
    df = pd.read_csv(input_subject_folder + filename, sep="\t", header=0)
    li.append(df)

data = pd.concat(li, axis=0, ignore_index=True)

# clean up sentence_id
for index, value in data['SENTENCE_ID'].items():
    if pd.notna(value):
        if type(value) == float:
            data.at[index, "SENTENCE_ID"] = str(int(data.at[index, "SENTENCE_ID"]))
        elif ".0" in value:
            data.at[index, "SENTENCE_ID"] = str(data.at[index, "SENTENCE_ID"].split('.')[0])

  df = pd.read_csv(input_subject_folder + filename, sep="\t", header=0)
  df = pd.read_csv(input_subject_folder + filename, sep="\t", header=0)


In [47]:
def bpog_in_target_bbox(bpogx, bpogy, pos):

    width = display["width"]
    height = display["height"]

    x = bpogx * width
    y = bpogy * height
    
    if pos == "TL":
        relpos = (width/4, height/4)
    elif pos == "TR":
        relpos = (width*(3/4), height/4)
    elif pos == "BL":
        relpos = (width/4, height*(3/4))
    elif pos == "BR":
        relpos = (width*(3/4), height*(3/4))
    else:
        relpos = -1

    pos_x_l = relpos[0] - 200
    pos_x_r = relpos[0] + 200
    pos_y_d = relpos[1] - 200
    pos_y_u = relpos[1] + 200

    if (x > pos_x_l) and (x < pos_x_r):
        if(y > pos_y_d) and (y < pos_y_u):
            return True

    return False


def get_bounding_box_coords(pos):
    
    relpos = 0    
    width = display["width"]
    height = display["height"]


    if pos == "TL":
        relpos = (width/4, height/4)
    elif pos == "TR":
        relpos = (width*(3/4), height/4)
    elif pos == "BL":
        relpos = (width/4, height*(3/4))
    elif pos == "BR":
        relpos = (width*(3/4), height*(3/4))
    else:
        relpos = -1

    pos_x_l = relpos[0] - 200
    pos_x_r = relpos[0] + 200
    pos_y_d = relpos[1] - 200
    pos_y_u = relpos[1] + 200
    
    return (pos_x_l, pos_x_r, pos_y_d, pos_y_u)

In [48]:
data.query("SENTENCE_ID=='2'").head(3)

Unnamed: 0,TIME,BPOGX,BPOGY,BPOGV,SUBJECT,EVENT,TRIAL,GROUP,SENTENCE_ID,SENTENCE,CONDITION,STIMULUS_NAME,AUDIO,SSTIM_TL,SSTIM_TR,SSTIM_BL,SSTIM_BR,TARGET_POS,LOCATION_CUE,VERB_CUE_TIMING,VERB_CUE,TARGET_CUE_TIMING
162,218.055642,0.52477,0.53737,0,6.0,FIXATION,0.0,CHICKEN,2,"IN THE COUNTRYSIDE, THE GIRL RODE A HORSE.",STATIC,HORSE,02_HORSE.OGG,HORSE,NOVEL,PAPERCLIP,OVEN,TL,COUNTRYSIDE,1655.0,RODE,2600.0
163,218.062308,0.52477,0.53737,0,6.0,FIXATION,0.0,CHICKEN,2,"IN THE COUNTRYSIDE, THE GIRL RODE A HORSE.",STATIC,HORSE,02_HORSE.OGG,HORSE,NOVEL,PAPERCLIP,OVEN,TL,COUNTRYSIDE,1655.0,RODE,2600.0
164,218.068975,0.52477,0.53737,0,6.0,FIXATION,0.0,CHICKEN,2,"IN THE COUNTRYSIDE, THE GIRL RODE A HORSE.",STATIC,HORSE,02_HORSE.OGG,HORSE,NOVEL,PAPERCLIP,OVEN,TL,COUNTRYSIDE,1655.0,RODE,2600.0


In [78]:
def calc_tr_ntr(data, highlight, sentence_id):

    # get samples with specific id
    data_subset = data.query(f"SENTENCE_ID=='{sentence_id}'")
    data_subset = data_subset.query(f"EVENT=='AUDIOSTART' or EVENT=='VERBONSET' or EVENT=='TARGETONSET' or EVENT=='PAUSE'")
    
    # filter invalid samples
    data_subset = data_subset.query("BPOGV==1 and BPOGX>=0 and BPOGY >=0")
    
    # reset index
    data_subset = data_subset.reset_index(drop=True)

    # get cues
    audiostart = data_subset.query("EVENT=='AUDIOSTART'").sort_values(by="TIME", ascending=True).reset_index(drop=True).loc[0, "TIME"]
    verb_onset = data_subset.query("EVENT=='VERBONSET'").sort_values(by="TIME", ascending=True).reset_index(drop=True).loc[0, "TIME"]
    target_onset = data_subset.query("EVENT=='TARGETONSET'").sort_values(by="TIME", ascending=True).reset_index(drop=True).loc[0, "TIME"]
    pause = data_subset.query("EVENT=='PAUSE'").sort_values(by="TIME", ascending=True).reset_index(drop=True).loc[0, "TIME"]

    # get first row of dataframe
    first_row = data_subset.iloc[0]

    stc = first_row["SENTENCE"]
    stc_id = first_row["SENTENCE_ID"]
    # condition = data_subset.loc[0, "CONDITION"]
    t_pos = first_row["TARGET_POS"]
    
    # determin other AOIs
    all_t_pos = ["TL", "TR", "BL", "BR"]
    all_t_pos.remove(t_pos)


    dsub_wolf = data_subset.query("GROUP=='WOLF'").sort_values(by="TIME", ascending=True).reset_index(drop=True).copy()
    dsub_chic = data_subset.query("GROUP=='CHICKEN'").sort_values(by="TIME", ascending=True).reset_index(drop=True).copy()

    for index, row in dsub_wolf.iterrows():
        
        idx = index + 1

        x = row["BPOGX"]
        y = row["BPOGY"]
        in_b_box = bpog_in_target_bbox(x, y, t_pos)
        in_b_box_other = bpog_in_target_bbox(x, y, all_t_pos[0]) or bpog_in_target_bbox(x, y,  all_t_pos[1]) or bpog_in_target_bbox(x, y,  all_t_pos[2])
        
        # if first index
        if index == 0:
            if in_b_box:
                # TR
                dsub_wolf.at[index, "TR_SMPL"] = 1
                dsub_wolf.at[index, "TR"] = 1
                # NTR
                dsub_wolf.at[index, "NTR_SMPL"] = 0
                dsub_wolf.at[index, "NTR"] = 0
            else:
                dsub_wolf.at[index, "TR_SMPL"] = 0
                dsub_wolf.at[index, "TR"] = 0 
                if in_b_box_other:
                    dsub_wolf.at[index, "NTR_SMPL"] = 1           
                    dsub_wolf.at[index, "NTR"] = 1
                else:
                    dsub_wolf.at[index, "NTR_SMPL"] = 0
                    dsub_wolf.at[index, "NTR"] = 0
        else:
            if in_b_box:
                dsub_wolf.at[index, "TR_SMPL"] = dsub_wolf.at[index-1, "TR_SMPL"] + 1
                dsub_wolf.at[index, "NTR_SMPL"] = dsub_wolf.at[index-1, "NTR_SMPL"]
            else:
                dsub_wolf.at[index, "TR_SMPL"] = dsub_wolf.at[index-1, "TR_SMPL"]
                if in_b_box_other:
                    dsub_wolf.at[index, "NTR_SMPL"] = dsub_wolf.at[index-1, "NTR_SMPL"] + 1
                else:
                    dsub_wolf.at[index, "NTR_SMPL"] = dsub_wolf.at[index-1, "NTR_SMPL"]

            dsub_wolf.at[index, "TR"] = dsub_wolf.at[index, "TR_SMPL"]/idx
            dsub_wolf.at[index, "NTR"] = dsub_wolf.at[index, "NTR_SMPL"]/idx
    
    for index, row in dsub_chic.iterrows():
        
        idx = index + 1

        x = row["BPOGX"]
        y = row["BPOGY"]
        in_b_box = bpog_in_target_bbox(x, y, t_pos)
        in_b_box_other = bpog_in_target_bbox(x, y, all_t_pos[0]) or bpog_in_target_bbox(x, y,  all_t_pos[1]) or bpog_in_target_bbox(x, y,  all_t_pos[2])
        
        # if first index
        if index == 0:
            if in_b_box:
                # TR
                dsub_chic.at[index, "TR_SMPL"] = 1
                dsub_chic.at[index, "TR"] = 1
                # NTR
                dsub_chic.at[index, "NTR_SMPL"] = 0
                dsub_chic.at[index, "NTR"] = 0
            else:
                dsub_chic.at[index, "TR_SMPL"] = 0
                dsub_chic.at[index, "TR"] = 0 
                if in_b_box_other:
                    dsub_chic.at[index, "NTR_SMPL"] = 1           
                    dsub_chic.at[index, "NTR"] = 1
                else:
                    dsub_chic.at[index, "NTR_SMPL"] = 0
                    dsub_chic.at[index, "NTR"] = 0
        else:
            if in_b_box:
                dsub_chic.at[index, "TR_SMPL"] = dsub_chic.at[index-1, "TR_SMPL"] + 1
                dsub_chic.at[index, "NTR_SMPL"] = dsub_chic.at[index-1, "NTR_SMPL"]
            else:
                dsub_chic.at[index, "TR_SMPL"] = dsub_chic.at[index-1, "TR_SMPL"]
                if in_b_box_other:
                    dsub_chic.at[index, "NTR_SMPL"] = dsub_chic.at[index-1, "NTR_SMPL"] + 1
                else:
                    dsub_chic.at[index, "NTR_SMPL"] = dsub_chic.at[index-1, "NTR_SMPL"]

            dsub_chic.at[index, "TR"] = dsub_chic.at[index, "TR_SMPL"]/idx
            dsub_chic.at[index, "NTR"] = dsub_chic.at[index, "NTR_SMPL"]/idx


    # plot
    fig = plt.figure(figsize=(10,5))
    
    dlt = dsub_wolf.loc[0, "TIME"] - dsub_chic.loc[0, "TIME"]

    
    plt.scatter(dsub_wolf["TIME"]-dlt,dsub_wolf["NTR"], s=2,color="lightgreen",label="$ntr_{static}$")
    plt.scatter(dsub_wolf["TIME"]-dlt,dsub_wolf["TR"], s=2,color="darkgreen",label="$tr_{static}$")
    plt.scatter(dsub_chic["TIME"],dsub_chic["NTR"], s=2,color="#b8c1f2",label="$ntr_{motion}$")
    plt.scatter(dsub_chic["TIME"],dsub_chic["TR"], s=2,color="darkblue",label="$tr_{motion}$")
    
    # labels and ticks    
    plt.axvline(x = audiostart, color = 'r' if 'a' in highlight else 'k')
    plt.text(audiostart-0.02, 1.06, 'AUDIOSTART', color = 'r' if 'a' in highlight else 'k')
    plt.axvline(x = verb_onset, color = 'r' if 'v' in highlight else 'k')
    plt.text(verb_onset-0.02, 1.06, 'VERBONSET', color = 'r' if 'v' in highlight else 'k')
    plt.axvline(x = target_onset, color = 'r' if 't' in highlight else 'k')
    plt.text(target_onset-0.02, 1.06, 'TARGETONSET', color = 'r' if 't' in highlight else 'k')

    plt.title(f"Metrics of interest for subject 6 and 7 and sentence (ID: {stc_id}): {stc}\n")
    plt.xlim((audiostart-0.1, pause))
    plt.ylim((-0.05,1.05))
    plt.ylabel("Metrics")
    plt.xlabel("Time in seconds")
    plt.legend(loc='lower right')

    # display plot
    plt.show()

    # save plot to assets
    fig.savefig(f'{plt_folder_name}/4-analysis-metrics_{str(sentence_id)}.png')


calc_tr_ntr(data, "", 4)

KeyError: 0