### Liberary

In [13]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import glob
import os

### functions

In [14]:
#Input: result 4 csv directories with 'dew6' or 'sg16'  or 'yckr'  or 'venc'
#Output: a list of csv directory 
def find_resultCSV(dir):
    csv_files = glob.glob(dir +"/*.csv")
    result_files = [i.replace('\\', '/') for i in csv_files]
    result_files = [file for file in result_files if 'dew6'in file or 'sg16' in file or 'asre' in file or '7wjx' in file or 'ucr5' in file]

    return result_files

# Input: df consist of PA task information
# Output: return the correctness with trial name and percent correctness
def calculate_correctness(df):
    # Add 'result' column: 1 if 'Response' matches 'Absent_Present', 0 otherwise
    df['result'] = (df['Response'] == df['Absent_Present']).astype(int)

    # Group by 'Sequence_Names' and 'Switch_Names' and calculate percent correctness
    correctness = df.groupby(['Stim_Ind', 'Participant Private ID'])['result'].mean().reset_index()

    # Rename the 'result' column to 'Percent_Correctness'
    correctness.rename(columns={'result': 'Percent_Correctness'}, inplace=True)

    return correctness

#Input: a direcotry with the result csv file
#Output: a dataframe with different types of trials and number of correct response for every type
def count(dir, output_dir):
    result_files = find_resultCSV(dir)
    #print(result_files)

    # 1. Extract the 'Absent_Present' & 'Length' & 'Attend_Condition' & 'Response' from every csv file
    df = pd.concat([pd.read_csv(file, usecols=['Participant Private ID', 'Absent_Present', 'Attend_Condition', 'Response', 'Sequence_Names', 'Switch_Names', 'Stim_Ind']) for file in result_files])
    df = df.query('Response != "AUDIO PLAY REQUESTED"').dropna(subset=['Response'])
    df = df.query('Absent_Present != "absent"')
    #print(df)
    
    # 2. Splitting the DataFrame into two based on 'Attend_Condition'
    df1 = df[df['Attend_Condition'] == 'complex_tones']
    df2 = df[df['Attend_Condition'] != 'complex_tones']

    # 3. Sorting each DataFrame
    df1 = df1.sort_values(by=['Sequence_Names', 'Switch_Names'])
    df2 = df2.sort_values(by=['Sequence_Names', 'Switch_Names'])

    # 4. Sequence_Name and Switch_Names combination find the percent correctness
    # add a new column result by add 1 or 0 by comparing the 'Response' vs 'Absent_Present column' if they are the same, use 1 as correct use 0 as incorrect
    # for every Sequence_Name and Switch_Names combination, find the percent correctness since different participant (each row)
    # Calculate correctness for each DataFrame
    correctness_df1 = calculate_correctness(df1)
    correctness_df2 = calculate_correctness(df2)

    # Export to CSV files
    correctness_df1.to_csv(os.path.join(output_dir,'PA_correctness_complexTone.csv'), index=False)
    correctness_df2.to_csv(os.path.join(output_dir,'PA_correctness_other.csv'), index=False)


### This code extract the sequence information and the performance (%correctness) each sequence

#### Experiment 2a Everyday sound snippets vs Complex tones

In [15]:
# input directory
dir_mac1 = "/Volumes/T7/CMU LiMN Research/Perceptual Organization/Result & Analysis/7.replicate-len4-Complextone-Everydaysounds/data/prolific-8-29/data_exp_141875-v2"
# dir_mac2 = "/Volumes/T7/CMU LiMN Research/Perceptual Organization/Result & Analysis/4.replicate-len6-Complextone-Everydaysounds/data/Joys-8-1"
# dir_mac3 = "/Volumes/T7/CMU LiMN Research/Perceptual Organization/Result & Analysis/4.replicate-len6-Complextone-Everydaysounds/data/Perceptual Organization Pilot - Audio - ES"
dir1 = [dir_mac1]

# define the directory where you want to save the plot
save_dir_mac1 = "/Volumes/T7/CMU LiMN Research/Perceptual Organization/Result & Analysis/10. serial position analysis/Everyday_CompexTone_result"
# check if directory exists, if not, create it
if not os.path.isdir(save_dir_mac1):
    os.makedirs(save_dir_mac1) 

for i in range(len(dir1)):
    # use same axis
    df = count(dir1[i], save_dir_mac1)

#### Experiment 2b Cat Meows vs Complex tones

In [16]:
# input directory
dir_mac2 = "/Volumes/T7/CMU LiMN Research/Perceptual Organization/Result & Analysis/6.replicate-len4-Complextone-Catsounds/data/prolific-8-31/data_exp_141161-v2"
# dir_mac2 = "/Volumes/T7/CMU LiMN Research/Perceptual Organization/Result & Analysis/4.replicate-len6-Complextone-Everydaysounds/data/Joys-8-1"
# dir_mac3 = "/Volumes/T7/CMU LiMN Research/Perceptual Organization/Result & Analysis/4.replicate-len6-Complextone-Everydaysounds/data/Perceptual Organization Pilot - Audio - ES"
dir2 = [dir_mac2]

# define the directory where you want to save the plot
save_dir_mac2 = "/Volumes/T7/CMU LiMN Research/Perceptual Organization/Result & Analysis/10. serial position analysis/Cat_CompexTone_result"
# check if directory exists, if not, create it
if not os.path.isdir(save_dir_mac2):
    os.makedirs(save_dir_mac2)

for i in range(len(dir2)):
    # use same axis
    df = count(dir2[i], save_dir_mac2)
