# Coherence Score Refinement

In [49]:
# Merge coherence score datasets
import pandas as pd
import numpy as np

cw_grading = pd.read_excel('cw_graded.xlsx')

# Limit columns but leave in response
first_10 = pd.read_csv('predictions_based_on_first_10.csv')[['model_task_method', 'conversation_number', 'Prediction', 'response']]
# Rename column Prediction to Prediction_Based_On_First_10
first_10.rename(columns={'Prediction': 'Prediction_Based_On_First_10'}, inplace=True)
# Rename column response to response_first_10
first_10.rename(columns={'response': 'response_first_10'}, inplace=True)

last_10 = pd.read_csv('predictions_based_on_last_10.csv')[['model_task_method', 'conversation_number', 'Prediction', 'response']]
# Rename column Prediction to Prediction_Based_On_Last_10
last_10.rename(columns={'Prediction': 'Prediction_Based_On_Last_10'}, inplace=True)
# Rename column response to response_last_10
last_10.rename(columns={'response': 'response_last_10'}, inplace=True)


In [50]:
# Merge on model_task_method and conversation_number
cw_grading_w_pred = cw_grading.merge(first_10, on=['model_task_method', 'conversation_number'], how = 'left')
cw_grading_w_pred = cw_grading_w_pred.merge(last_10, on=['model_task_method', 'conversation_number'], how = 'left')


In [51]:
# Create aggregated response column by taking response_first_10, response_last_10 if it is missing
cw_grading_w_pred['response'] = cw_grading_w_pred['response_first_10'].fillna(cw_grading_w_pred['response_last_10'])



In [52]:
# Remove {"Score": "
# Remove "}
cw_grading_w_pred['Prediction_Based_On_First_10'] = cw_grading_w_pred['Prediction_Based_On_First_10'].str.replace('{"Score": "', '')
cw_grading_w_pred['Prediction_Based_On_First_10'] = cw_grading_w_pred['Prediction_Based_On_First_10'].str.replace('"}', '')
cw_grading_w_pred['Prediction_Based_On_Last_10'] = cw_grading_w_pred['Prediction_Based_On_Last_10'].str.replace('{"Score": "', '')
cw_grading_w_pred['Prediction_Based_On_Last_10'] = cw_grading_w_pred['Prediction_Based_On_Last_10'].str.replace('"}', '')

# Convert score to float
cw_grading_w_pred['Prediction_Based_On_First_10'] = cw_grading_w_pred['Prediction_Based_On_First_10'].astype(float)
cw_grading_w_pred['Prediction_Based_On_Last_10'] = cw_grading_w_pred['Prediction_Based_On_Last_10'].astype(float)

cw_grading_w_pred


Unnamed: 0,model_task_method,conversation_number,coherence_1_incoherent_10_very_coherent,task_constraints_followed_0_not_followed_1_followed,ease_of_review_1_easy_10_hard,Prediction_Based_On_First_10,response_first_10,Prediction_Based_On_Last_10,response_last_10,response
0,td3_cw_direct_prompting_responses,1,1,1.0,,,,1.0,Learning to do a handstand is a fun activity f...,Learning to do a handstand is a fun activity f...
1,td3_cw_direct_prompting_responses,2,7,0.0,,,,7.0,The hawk was used to hunting what he needed fo...,The hawk was used to hunting what he needed fo...
2,td3_cw_direct_prompting_responses,3,1,1.0,,,,1.0,I love the smell of roasting almonds in the ki...,I love the smell of roasting almonds in the ki...
3,td3_cw_direct_prompting_responses,4,10,1.0,,,,7.0,Ralph's bedroom was routinely filled up with s...,Ralph's bedroom was routinely filled up with s...
4,td3_cw_direct_prompting_responses,5,4,1.0,,,,1.0,Joe had a unique way of dealing with the hospi...,Joe had a unique way of dealing with the hospi...
...,...,...,...,...,...,...,...,...,...,...
1595,gpt4_cw_manual_cot_responses,96,10,,,8.0,"For as long as I can remember, my brother had ...",,,"For as long as I can remember, my brother had ..."
1596,gpt4_cw_manual_cot_responses,97,7,,,7.0,Ideas:\n1. Make the passage about learning les...,,,Ideas:\n1. Make the passage about learning les...
1597,gpt4_cw_manual_cot_responses,98,7,,,7.0,Shawn was lagging behind in his political scie...,,,Shawn was lagging behind in his political scie...
1598,gpt4_cw_manual_cot_responses,99,7,,,7.0,Ideas:\n1. Make the passage about a child play...,,,Ideas:\n1. Make the passage about a child play...


In [53]:
# Create mean/aggregated prediction
cw_grading_w_pred['Aggegated_Prediction'] = cw_grading_w_pred[['Prediction_Based_On_First_10', 'Prediction_Based_On_Last_10']].mean(axis=1)
cw_grading_w_pred['Aggegated_Prediction'] = cw_grading_w_pred['Aggegated_Prediction'].fillna(cw_grading_w_pred['Prediction_Based_On_First_10'])
cw_grading_w_pred['Aggegated_Prediction'] = cw_grading_w_pred['Aggegated_Prediction'].fillna(cw_grading_w_pred['Prediction_Based_On_Last_10'])


In [54]:
# Order columns
# model_task_method, conversation_number, response, coherence_1_incoherent_10_very_coherent, Aggregated_Prediction, diff
# Create diff, abs_diff column as difference, abs difference between coherence score and prediction
cw_grading_w_pred['diff'] = cw_grading_w_pred['coherence_1_incoherent_10_very_coherent'] - cw_grading_w_pred['Aggegated_Prediction']
cw_grading_w_pred['abs_diff'] = np.abs(cw_grading_w_pred['coherence_1_incoherent_10_very_coherent'] - cw_grading_w_pred['Aggegated_Prediction'])
cw_grading_w_pred_output = cw_grading_w_pred[['model_task_method', 'conversation_number', 'response', 'coherence_1_incoherent_10_very_coherent', 'Aggegated_Prediction', 'diff', 'abs_diff']]


In [55]:
# Sort by absolute value of diff descending and output to Excel
cw_grading_w_pred_output.sort_values(by='abs_diff', ascending=False, inplace=True)
cw_grading_w_pred_output.to_excel('cw_grading_w_pred.xlsx', index=False)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cw_grading_w_pred_output.sort_values(by='abs_diff', ascending=False, inplace=True)


In [56]:
# Load in Combined_Data.xlsx
combined_data = pd.read_excel('Combined_Data.xlsx')


In [57]:
# Also create LP version
combined_data['LP_diff'] = combined_data['coherence_1_incoherent_10_very_coherent'] - combined_data['Aggregated_Prediction_LP']
combined_data['LP_abs_diff'] = np.abs(combined_data['coherence_1_incoherent_10_very_coherent'] - combined_data['Aggregated_Prediction_LP'])


In [58]:
# Limit columns
combined_data_output = combined_data[['model_task_method', 'conversation_number', 'response_LP', 'coherence_1_incoherent_10_very_coherent', 'Aggregated_Prediction_LP', 'LP_diff', 'LP_abs_diff']]


In [59]:
# Sort by absolute value of diff descending and output to Excel
combined_data_output.sort_values(by='LP_abs_diff', ascending=False, inplace=True)
combined_data_output.to_excel('cw_grading_w_pred_LP.xlsx', index=False)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  combined_data_output.sort_values(by='LP_abs_diff', ascending=False, inplace=True)
