In [1]:
import pandas as pd

In [2]:
import glob

In [3]:
import re

In [4]:
data_list = glob.glob('*_data.csv')
data_list

['123_data.csv',
 '6_data.csv',
 '7_data.csv',
 '746_data.csv',
 '14_data.csv',
 '15_data.csv',
 '772_data.csv',
 '1_data.csv',
 '13_data.csv',
 '12_data.csv',
 '970_data.csv',
 '963_data.csv',
 '5_data.csv',
 '181_data.csv',
 '8_data.csv',
 '10_data.csv',
 '11_data.csv',
 '88_data.csv']

In [5]:
form_list = list()
for item in data_list:
    form_list.append(item.replace('data', 'form'))
form_list

['123_form.csv',
 '6_form.csv',
 '7_form.csv',
 '746_form.csv',
 '14_form.csv',
 '15_form.csv',
 '772_form.csv',
 '1_form.csv',
 '13_form.csv',
 '12_form.csv',
 '970_form.csv',
 '963_form.csv',
 '5_form.csv',
 '181_form.csv',
 '8_form.csv',
 '10_form.csv',
 '11_form.csv',
 '88_form.csv']

In [6]:
data_df = pd.DataFrame({'Hit': pd.Series(dtype='int'),
                        'False Alarm': pd.Series(dtype='int'),
                        'No Response': pd.Series(dtype='int'),
                        'Data File': pd.Series(dtype='string')})

for item in data_list:    
    participant_df = pd.read_csv(item)
    hit = 0
    noAnswer = 0
    falseAlarm = 0

    for index, row in participant_df.iterrows():
        if row['Signal'] == 0:
            if row['Response'] == '[]':
                noAnswer += 1
            elif int(row['Response']) == 1:
                hit += 1
        else:
            if row['Response'] == '[]':
                noAnswer += 1
            elif int(row['Response']) == 1:
                falseAlarm += 1
    
    new_row = pd.Series({'Hit': hit,
                        'False Alarm': falseAlarm,
                        'No Response': noAnswer,
                        'Data File': item})
    data_df = pd.concat([data_df, new_row.to_frame().T], ignore_index=True)

    
data_df

Unnamed: 0,Hit,False Alarm,No Response,Data File
0,13,7,1,123_data.csv
1,7,5,0,6_data.csv
2,10,2,0,7_data.csv
3,7,4,1,746_data.csv
4,12,6,0,14_data.csv
5,8,4,1,15_data.csv
6,6,3,0,772_data.csv
7,6,5,0,1_data.csv
8,10,6,2,13_data.csv
9,10,4,0,12_data.csv


In [7]:
def clean_string(input_string):
    cleaned_string = re.sub(r'[^0-9,]', '', input_string)
    return cleaned_string

In [8]:
def sum_integers(input_string):
    components = input_string.split(',')
    return sum(int(component) for component in components if component.isdigit())

In [9]:
form_df = pd.DataFrame()
for item in form_list:
    new_row = pd.read_csv(item)
    new_row['Form File'] = [item]

    # Processing
    new_row.at[0, 'prof_list'] = clean_string(str(new_row.iloc[0]['prof_list']))
    new_row.at[0, 'sum_proficiency'] = sum_integers(str(new_row.iloc[0]['prof_list']))

    form_df = pd.concat([form_df, new_row], ignore_index=True)

form_df = form_df.drop(form_df.columns[0], axis=1)
form_df

Unnamed: 0,n_languages,lang_list,prof_list,sum_proficiency,isMono,Form File
0,2,"Chinese, English",108,18,True,123_form.csv
1,2,"English, Gujurati",105,15,False,6_form.csv
2,3,"english,spanish,Italian",1044,18,True,7_form.csv
3,2,"English, French",105,15,False,746_form.csv
4,2,"English, French",104,14,False,14_form.csv
5,2,"English, Japanese",96,15,True,15_form.csv
6,3,"Chinese, English, Spanish",1082,20,False,772_form.csv
7,2,"English, Farsi",106,16,True,1_form.csv
8,2,"English, Korean",1010,20,True,13_form.csv
9,2,"English, German",106,16,False,12_form.csv


In [10]:
result_df = pd.concat([data_df, form_df], axis=1)
result_df

Unnamed: 0,Hit,False Alarm,No Response,Data File,n_languages,lang_list,prof_list,sum_proficiency,isMono,Form File
0,13,7,1,123_data.csv,2,"Chinese, English",108,18,True,123_form.csv
1,7,5,0,6_data.csv,2,"English, Gujurati",105,15,False,6_form.csv
2,10,2,0,7_data.csv,3,"english,spanish,Italian",1044,18,True,7_form.csv
3,7,4,1,746_data.csv,2,"English, French",105,15,False,746_form.csv
4,12,6,0,14_data.csv,2,"English, French",104,14,False,14_form.csv
5,8,4,1,15_data.csv,2,"English, Japanese",96,15,True,15_form.csv
6,6,3,0,772_data.csv,3,"Chinese, English, Spanish",1082,20,False,772_form.csv
7,6,5,0,1_data.csv,2,"English, Farsi",106,16,True,1_form.csv
8,10,6,2,13_data.csv,2,"English, Korean",1010,20,True,13_form.csv
9,10,4,0,12_data.csv,2,"English, German",106,16,False,12_form.csv


In [11]:
result_df.to_csv('result.csv')