In [None]:
import os
import ast 
import pandas as pd

# Working Dir.
os.chdir('/Users/fogellmcmuffin/Documents/ra/team_discussions/AI/')

In [None]:
###############
## Functions ##
###############

def extract_dict_from_file(file_path):  # Extracting info from GPT response text files
    with open(file_path, 'r', encoding='utf-8') as file:    # Opening response file
        lines = file.readlines()
    
    text = ''.join(lines)
    
    # Extracting dictionary
    start = text.find('{')
    end = text.find('}') + 1
    dict_text = text[start:end]
    
    cat_dict = ast.literal_eval(dict_text) # Turning dictionary string into python dictionary
    
    for i in cat_dict['assigned_categories']:  # Making a binary key for each assigned category
        cat_dict[i] = 1
    
    # Extracting GPT reasoning
    start_keyword = "Step-by-step Reasoning: "
    end_keyword = "Python Dictionary:"

    start_index = text.find(start_keyword) + len(start_keyword)
    end_index = text.find(end_keyword)
    reasoning = text[start_index:end_index].strip()
    
    data_dict = {}
    data_dict['gpt_reasoning'] = reasoning
    
    for key, value in cat_dict.items(): # Making sure gpt_reasoning is the first key
        data_dict[key] = value
    
    return data_dict


def response_df(response_dir, test_df):  # Turning dictionary list into GPT coded dataframe
    resp_list = []
    
    for file in os.listdir(response_dir):
        file_path = os.path.join(response_dir, file)
        reponse_dict = extract_dict_from_file(file_path)
        resp_list.append(reponse_dict)
    
    df = pd.DataFrame.from_records(resp_list)
    df = df.drop(['assigned_categories'], axis=1)
    df = df.fillna(0)
    
    df = pd.merge(test_df, df, on='window_number', how='outer')
    
    return df


def ucoop_udef_rename(df, prefix):  # Function to add ucoop or udef prefix to created category columns
    remove_columns = ['summary', 'unilateral_cooperation', 'window_number', 'gpt_reasoning', 'a', 'b', 'c', 'd', 'e']
    df_dropped = df.drop(columns=remove_columns)
    category_columns = df_dropped.columns.to_list()
    
    rename_dict = {col: f'{prefix}_{col}' for col in category_columns}
    df = df.rename(columns=rename_dict)
        
    return df


In [None]:
#########################
## GPT Coded DataFrame ##
#########################

treatment = 'noise' 

ucoop_test_data = pd.read_csv(f'test_data/RAsum_{treatment}_ucoop.csv')
udef_test_data = pd.read_csv(f'test_data/RAsum_{treatment}_udef.csv')
ucoop_test_data = ucoop_test_data[:20]
ucoop_test_data['window_number'] = ucoop_test_data['window_number'].astype(int)
ucoop_test_data['unilateral_cooperation'] = 1
udef_test_data = udef_test_data[:20]
udef_test_data['window_number'] = udef_test_data['window_number'].astype(int)
udef_test_data['unilateral_cooperation'] = 0

test = 'test_10'
ucoop_response_dir = f'output/{test}/stage_2_ucoop/responses/'
udef_response_dir = f'output/{test}/stage_2_udef/responses/'

ucoop_df = response_df(ucoop_response_dir, ucoop_test_data)
ucoop_df = ucoop_udef_rename(ucoop_df, 'ucoop')

udef_df = response_df(udef_response_dir, udef_test_data)
udef_df = ucoop_udef_rename(udef_df, 'udef')
GPT_df = pd.concat([ucoop_df, udef_df], ignore_index=True, sort=False)
GPT_df = GPT_df.fillna(0)
GPT_df.to_csv(f'output/{test}/t{test[5:]}_final_output.csv', index=False)
