# Run Regressions.ipynb

Logistic regressions of GSM8K accuracy on length and complexity variables, quadratic terms, clustering by identity of the question and method (task, conversation_id, method).

Regression of CW scores similarly.

Add complexity of provided answers as an interaction term in GSM8K regressions.

Add model as an interaction term.

Model and question (conversation_number by task) controls can soak up additional variation.

In [1]:
from stargazer.stargazer import Stargazer
import statsmodels.api as sm
import pandas as pd
import statsmodels.formula.api as smf


In [2]:
# Load Combined_Data.xlsx
df = pd.read_excel('Combined_Data.xlsx')

print(df.columns)

df


Index(['model_task_method', 'conversation_number',
       'coherence_1_incoherent_10_very_coherent', 'compliance_OLD',
       'ease_of_review_1_easy_10_hard', 'correct',
       'Prediction_Based_On_First_10', 'Prediction_Based_On_Last_10',
       'Aggregated_Prediction', 'Prediction_Based_On_First_10_LP',
       'response_Based_On_First_10_LP', 'Prediction_Based_On_Last_10_LP',
       'response_Based_On_Last_10_LP', 'response_LP',
       'Aggregated_Prediction_LP', 'Prediction_Based_On_First_50_LP',
       'response_Based_On_First_50_LP', 'Prediction_Based_On_Last_50_LP',
       'response_Based_On_Last_50_LP', 'Aggregated_Prediction_50_LP',
       'Prediction_Based_On_random_50_LP_1',
       'response_Based_On_random_50_LP_1',
       'Prediction_Based_On_random_50_LP_2',
       'response_Based_On_random_50_LP_2',
       'Aggregated_Prediction_random_50_LP', 'Unnamed: 0_x', 'response_x',
       'replace_slash_n_slash_n_with_newline_x',
       'replace_slash_n_slash_n_with_newline_values

Unnamed: 0,model_task_method,conversation_number,coherence_1_incoherent_10_very_coherent,compliance_OLD,ease_of_review_1_easy_10_hard,correct,Prediction_Based_On_First_10,Prediction_Based_On_Last_10,Aggregated_Prediction,Prediction_Based_On_First_10_LP,...,num_linebreaks_prompts_diff,num_sentences_prompts_diff,num_step_i_prompts_diff,num_1_dot_etc_prompts_diff,sentence_length_prompts_diff,fres_prompts_diff,num_linebreaks_provided_diff,num_sentences_provided_diff,num_step_i_provided_diff,num_1_dot_etc_provided_diff
0,td3_cw_direct_prompting_responses,1,1.0,1.0,1.0,,,1.0,1.0,,...,1,2,0,-2,5.971429,-12.31,,,,
1,td3_cw_direct_prompting_responses,2,7.0,0.0,1.0,,,7.0,7.0,,...,1,3,0,-2,9.350000,1.05,,,,
2,td3_cw_direct_prompting_responses,3,1.0,1.0,1.0,,,1.0,1.0,,...,1,4,0,-2,4.533333,6.64,,,,
3,td3_cw_direct_prompting_responses,4,10.0,1.0,1.0,,,7.0,7.0,,...,1,1,0,-2,9.533333,9.31,,,,
4,td3_cw_direct_prompting_responses,5,4.0,1.0,1.0,,,1.0,1.0,,...,1,2,0,-2,7.828571,-13.64,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3195,gpt4_gsm8k_manual_cot_responses,96,,,,1.0,,,,,...,-16,-39,0,-17,,,-4.0,1.0,0.0,2.0
3196,gpt4_gsm8k_manual_cot_responses,97,,,,1.0,,,,,...,-16,-43,0,-19,,,-2.0,0.0,0.0,1.0
3197,gpt4_gsm8k_manual_cot_responses,98,,,,1.0,,,,,...,-16,-44,0,-20,,,-3.0,-1.0,0.0,0.0
3198,gpt4_gsm8k_manual_cot_responses,99,,,,1.0,,,,,...,-15,-44,0,-19,,,-1.0,0.0,0.0,1.0


In [3]:
# Transformation - divide conversation length by 1000 to get effect per 1000K tokens
df['conversation_length_thousands'] = df['conversation_length']/1000


In [4]:
# Create quadratics
df['conversation_length_thousands_2'] = df['conversation_length_thousands']**2
df['consolidated_num_steps_ideas_2'] = df['consolidated_num_steps_ideas']**2
df['fres_2'] = df['fres']**2


In [5]:
# Create clustering variable
# Concatenate task, conversation_number, method
df['task_conversation_method'] = df['task'].astype(str) + "_" + df['conversation_number'].astype(str) + "_" + df['method'].astype(str)


In [6]:
# Task by conversation variable as a control
df['task_conversation'] = df['task'].astype(str) + "_" + df['conversation_number'].astype(str)


In [7]:
# Split data
gsm8k_data = df[df['task'] == 'gsm8k']
cw_data = df[df['task'] == 'cw']


### Some Checks

In [8]:
# Get values of correct in gsm8k_data
gsm8k_correct = gsm8k_data['correct'].values
print(set(gsm8k_correct))

# Print cases where correct is not 0 or 1
#print(gsm8k_data[gsm8k_data['correct'] != 0 & gsm8k_data['correct'] != 1])


{0.0, 1.0}


### Functions for table creation

In [9]:
# Create logit df
def create_logit_results_df(model, marginal_effects, title):
    #coef_names = model.params.index
    #print(coef_names)
    coef_names_with_mes = list(model.params.index)[1:]
    #print(coef_names_with_mes)
    me_values = marginal_effects.margeff
    #print(me_values)
    me_ses = marginal_effects.margeff_se
    # print(me_ses)
    p_values = marginal_effects.pvalues
    # print(p_values)
    # Loop over items in coef_names_with_mes, enumerated
    string_entries = []
    for i, coef_name in enumerate(coef_names_with_mes):
        # Print coef_name, me_values[i], me_ses[i], p_values[i]
        #print(coef_name, me_values[i], me_ses[i], p_values[i])
        # Consolidate me_values[i], me_ses[i], p_values[i] into a string
        # me_value* (me_se), where the star is if p_value < 0.05
        if p_values[i] < 0.05:
            string_entries.append(str(round(me_values[i], 3)) + "* (" + str(round(me_ses[i], 3)) + ")")
        else:
            string_entries.append(str(round(me_values[i], 3)) + " (" + str(round(me_ses[i], 3)) + ")")

    # Make a dataframe with one row with columns of coef_names_with_mes and values of string_entries
    # Print the dataframe
    model_df = pd.DataFrame([string_entries], columns=coef_names_with_mes)
    # Add column Title
    model_df['Title'] = title
    return model_df

# Create linear regression df
def create_linear_results_df(model, title):
    coef_names = model.params.index
    # print(coef_names)
    coef_values = model.params.values
    # print(coef_values)
    sds = model.bse.values
    # print(sds)
    p_values = model.pvalues.values
    # print(p_values)

    # Loop over items in coef_names, enumerated
    string_entries = []
    for i, coef_name in enumerate(coef_names):
        # Print coef_name, coef_values[i], sds[i], p_values[i]
        #print(coef_name, coef_values[i], sds[i], p_values[i])
        # Consolidate coef_values[i], sds[i], p_values[i] into a string
        # coef_value* (sd), where the star is if p_value < 0.05
        if p_values[i] < 0.05:
            string_entries.append(str(round(coef_values[i], 3)) + "* (" + str(round(sds[i], 3)) + ")")
        else:
            string_entries.append(str(round(coef_values[i], 3)) + " (" + str(round(sds[i], 3)) + ")")

    # Make a dataframe with one row with columns of coef_names and values of string_entries
    # Print the dataframe
    model_df = pd.DataFrame([string_entries], columns=coef_names)
    # Add column Title
    model_df['Title'] = title
    #print(lpm_with_clustering_gsm8k_df)
    return model_df



### GSM8K Regressions

#### Logistic Regression (no clustering)

In [10]:
# Define the logistic regression model[T.td3][T.td3][T.td3]
logit_no_clustering_gsm8k = smf.logit('correct ~ conversation_length_thousands + consolidated_num_steps_ideas + conversation_length_thousands_2 + consolidated_num_steps_ideas_2 + model', data=gsm8k_data).fit(cov_type='HC3')

# Display the summary
print(logit_no_clustering_gsm8k.summary())

# Marginal effects
logit_no_clustering_gsm8k_marginal_effects = logit_no_clustering_gsm8k.get_margeff(at='overall')
print(logit_no_clustering_gsm8k_marginal_effects.summary())

# # Results for table
# # Get conversation_length_thousands marginal effect, star for significant, sd
# #clt2 = logit_no_clustering_gsm8k_marginal_effects
# #print(clt2)
# # Also get consolidated_num_steps_ideas marginal effect, star for significant, sd
# # Also get conversation_length_thousands_2 marginal effect, star for significant, sd
# # Also get consolidated_num_steps_ideas_2 marginal effect, star for significant, sd
# # Title: "GSM8K Correct, Logit"
# coef_names = logit_no_clustering_gsm8k.params.index
# #print(coef_names)
# coef_names_with_mes = list(logit_no_clustering_gsm8k.params.index)[1:]
# #print(coef_names_with_mes)
# me_values = logit_no_clustering_gsm8k_marginal_effects.margeff
# #print(me_values)
# me_ses = logit_no_clustering_gsm8k_marginal_effects.margeff_se
# # print(me_ses)
# p_values = logit_no_clustering_gsm8k_marginal_effects.pvalues
# # print(p_values)
# # Loop over items in coef_names_with_mes, enumerated
# string_entries = []
# for i, coef_name in enumerate(coef_names_with_mes):
#     # Print coef_name, me_values[i], me_ses[i], p_values[i]
#     #print(coef_name, me_values[i], me_ses[i], p_values[i])
#     # Consolidate me_values[i], me_ses[i], p_values[i] into a string
#     # me_value* (me_se), where the star is if p_value < 0.05
#     if p_values[i] < 0.05:
#         string_entries.append(str(round(me_values[i], 3)) + "* (" + str(round(me_ses[i], 3)) + ")")
#     else:
#         string_entries.append(str(round(me_values[i], 3)) + " (" + str(round(me_ses[i], 3)) + ")")

# # Make a dataframe with one row with columns of coef_names_with_mes and values of string_entries
# # Print the dataframe
# logit_no_clustering_gsm8k_marginal_effects_df = pd.DataFrame([string_entries], columns=coef_names_with_mes)
# # Add column Title
# logit_no_clustering_gsm8k_marginal_effects_df['Title'] = "GSM8K Correct, Logit"
# print(logit_no_clustering_gsm8k_marginal_effects_df)

# Use function to create results df
logit_no_clustering_gsm8k_df = create_logit_results_df(logit_no_clustering_gsm8k, logit_no_clustering_gsm8k_marginal_effects, "GSM8K Correct, Logit")
print(logit_no_clustering_gsm8k_df)


Optimization terminated successfully.
         Current function value: 0.576353
         Iterations 6
                           Logit Regression Results                           
Dep. Variable:                correct   No. Observations:                 1600
Model:                          Logit   Df Residuals:                     1594
Method:                           MLE   Df Model:                            5
Date:                Mon, 11 Dec 2023   Pseudo R-squ.:                  0.1496
Time:                        15:44:17   Log-Likelihood:                -922.17
converged:                       True   LL-Null:                       -1084.4
Covariance Type:                  HC3   LLR p-value:                 5.423e-68
                                      coef    std err          z      P>|z|      [0.025      0.975]
---------------------------------------------------------------------------------------------------
Intercept                           1.2184      0.179      6.792  

#### Linear Probability Model with Clustering

In [11]:
# Define and fit the OLS model with clustered standard errors
lpm_with_clustering_gsm8k = smf.ols('correct ~ conversation_length_thousands + consolidated_num_steps_ideas + conversation_length_thousands_2 + consolidated_num_steps_ideas_2 + task_conversation + model', data=gsm8k_data).fit(cov_type='cluster', cov_kwds={'groups': gsm8k_data['task_conversation_method']})

# Print the model summary
print(lpm_with_clustering_gsm8k.summary())

# # Results for table
# # Get conversation_length_thousands coefficient, star for significant, sd
# # Also get consolidated_num_steps_ideas coefficient, star for significant, sd
# # Also get conversation_length_thousands_2 coefficient, star for significant, sd
# # Also get consolidated_num_steps_ideas_2 coefficient, star for significant, sd
# # Title: "GSM8K Correct, Linear"
# coef_names = lpm_with_clustering_gsm8k.params.index
# # print(coef_names)
# coef_values = lpm_with_clustering_gsm8k.params.values
# # print(coef_values)
# sds = lpm_with_clustering_gsm8k.bse.values
# # print(sds)
# p_values = lpm_with_clustering_gsm8k.pvalues.values
# # print(p_values)

# # Loop over items in coef_names, enumerated
# string_entries = []
# for i, coef_name in enumerate(coef_names):
#     # Print coef_name, coef_values[i], sds[i], p_values[i]
#     #print(coef_name, coef_values[i], sds[i], p_values[i])
#     # Consolidate coef_values[i], sds[i], p_values[i] into a string
#     # coef_value* (sd), where the star is if p_value < 0.05
#     if p_values[i] < 0.05:
#         string_entries.append(str(round(coef_values[i], 3)) + "* (" + str(round(sds[i], 3)) + ")")
#     else:
#         string_entries.append(str(round(coef_values[i], 3)) + " (" + str(round(sds[i], 3)) + ")")

# # Make a dataframe with one row with columns of coef_names and values of string_entries
# # Print the dataframe
# lpm_with_clustering_gsm8k_df = pd.DataFrame([string_entries], columns=coef_names)
# # Add column Title
# lpm_with_clustering_gsm8k_df['Title'] = "GSM8K Correct, Linear"
# print(lpm_with_clustering_gsm8k_df)

# Use function to create results df
lpm_with_clustering_gsm8k_df = create_linear_results_df(lpm_with_clustering_gsm8k, "GSM8K Correct, Linear")
print(lpm_with_clustering_gsm8k_df)


                            OLS Regression Results                            
Dep. Variable:                correct   R-squared:                       0.348
Model:                            OLS   Adj. R-squared:                  0.303
Method:                 Least Squares   F-statistic:                     32.04
Date:                Mon, 11 Dec 2023   Prob (F-statistic):          1.17e-222
Time:                        15:44:17   Log-Likelihood:                -793.62
No. Observations:                1600   AIC:                             1797.
Df Residuals:                    1495   BIC:                             2362.
Df Model:                         104                                         
Covariance Type:              cluster                                         
                                      coef    std err          z      P>|z|      [0.025      0.975]
---------------------------------------------------------------------------------------------------
Intercept 

### CW Regressions

In [12]:
# Check
# Is avg_inter_paragraph_cosine_sim always present
#print(cw_data[cw_data['avg_inter_paragraph_cosine_sim'].isnull()])

# Check length
#print(len(cw_data['avg_inter_paragraph_cosine_sim']))
#print(len(cw_data['fres'].dropna()))

# Limit cw_data to rows where avg_inter_paragraph_cosine_sim is not null
cw_data = cw_data[cw_data['avg_inter_paragraph_cosine_sim'].notnull()]


#### Preferred cosine similarity measure

In [13]:
# Define and fit the OLS model with clustered standard errors
reg_with_clustering_cw = smf.ols("avg_inter_paragraph_cosine_sim ~ conversation_length_thousands + consolidated_num_steps_ideas + conversation_length_thousands_2 + consolidated_num_steps_ideas_2 + fres + fres_2 + task_conversation + model",
                                 data=cw_data).fit(cov_type='cluster', cov_kwds={'groups': cw_data['task_conversation_method']})

# Print the model summary
print(reg_with_clustering_cw.summary())

# # Results for table
# # Get conversation_length_thousands coefficient, star for significant, sd
# # Also get consolidated_num_steps_ideas coefficient, star for significant, sd
# # Also get conversation_length_thousands_2 coefficient, star for significant, sd
# # Also get consolidated_num_steps_ideas_2 coefficient, star for significant, sd
# # Also get fres coefficient, star for significant, sd
# # Also get fres_2 coefficient, star for significant, sd
# # Title "Creative Writing Cosine Similarity"
# coef_names = reg_with_clustering_cw.params.index
# #print(coef_names)
# coef_values = reg_with_clustering_cw.params.values
# #print(coef_values)
# sds = reg_with_clustering_cw.bse.values
# #print(sds)
# p_values = reg_with_clustering_cw.pvalues.values
# #print(p_values)

# # Loop over items in coef_names, enumerated
# string_entries = []
# for i, coef_name in enumerate(coef_names):
#     # Print coef_name, coef_values[i], sds[i], p_values[i]
#     #print(coef_name, coef_values[i], sds[i], p_values[i])
#     # Consolidate coef_values[i], sds[i], p_values[i] into a string
#     # coef_value* (sd), where the star is if p_value < 0.05
#     if p_values[i] < 0.05:
#         string_entries.append(str(round(coef_values[i], 3)) + "* (" + str(round(sds[i], 3)) + ")")
#     else:
#         string_entries.append(str(round(coef_values[i], 3)) + " (" + str(round(sds[i], 3)) + ")")

# # Make a dataframe with one row with columns of coef_names and values of string_entries
# # Print the dataframe
# reg_with_clustering_cw_df = pd.DataFrame([string_entries], columns=coef_names)
# # Add column Title
# reg_with_clustering_cw_df['Title'] = "GSM8K Correct, Linear"
# print(reg_with_clustering_cw)

# Use function to create results df
reg_with_clustering_cw_df = create_linear_results_df(reg_with_clustering_cw, "Creative Writing Cosine Similarity")
print(reg_with_clustering_cw_df)


                                  OLS Regression Results                                  
Dep. Variable:     avg_inter_paragraph_cosine_sim   R-squared:                       0.427
Model:                                        OLS   Adj. R-squared:                  0.381
Method:                             Least Squares   F-statistic:                     20.48
Date:                            Mon, 11 Dec 2023   Prob (F-statistic):          2.79e-163
Time:                                    15:44:18   Log-Likelihood:                 927.69
No. Observations:                            1434   AIC:                            -1641.
Df Residuals:                                1327   BIC:                            -1078.
Df Model:                                     106                                         
Covariance Type:                          cluster                                         
                                      coef    std err          z      P>|z|      [0.025   

#### Check task compliance as well

In [14]:
print(list(cw_data.columns))


['model_task_method', 'conversation_number', 'coherence_1_incoherent_10_very_coherent', 'compliance_OLD', 'ease_of_review_1_easy_10_hard', 'correct', 'Prediction_Based_On_First_10', 'Prediction_Based_On_Last_10', 'Aggregated_Prediction', 'Prediction_Based_On_First_10_LP', 'response_Based_On_First_10_LP', 'Prediction_Based_On_Last_10_LP', 'response_Based_On_Last_10_LP', 'response_LP', 'Aggregated_Prediction_LP', 'Prediction_Based_On_First_50_LP', 'response_Based_On_First_50_LP', 'Prediction_Based_On_Last_50_LP', 'response_Based_On_Last_50_LP', 'Aggregated_Prediction_50_LP', 'Prediction_Based_On_random_50_LP_1', 'response_Based_On_random_50_LP_1', 'Prediction_Based_On_random_50_LP_2', 'response_Based_On_random_50_LP_2', 'Aggregated_Prediction_random_50_LP', 'Unnamed: 0_x', 'response_x', 'replace_slash_n_slash_n_with_newline_x', 'replace_slash_n_slash_n_with_newline_values_x', 'replace_slash_n_with_newline_x', 'replace_slash_n_with_newline_values_x', 'avg_cosine_sim', 'num_sentences_x', '

In [15]:
# Define the logistic regression model
logit_no_clustering_cw_compliance = smf.logit('compliance ~ conversation_length_thousands + consolidated_num_steps_ideas + conversation_length_thousands_2 + consolidated_num_steps_ideas_2 + fres + fres_2 + model', data=cw_data).fit(cov_type='HC3')

print('logit no clustering cw compliance')
# Display the summary
print(logit_no_clustering_cw_compliance.summary())

# Marginal effects
logit_no_clustering_cw_compliance_marginal_effects = logit_no_clustering_cw_compliance.get_margeff(at='overall')
print(logit_no_clustering_cw_compliance_marginal_effects.summary())

# # Results for table
# # Get conversation_length_thousands marginal effect, star for significant, sd
# # Also get consolidated_num_steps_ideas marginal effect, star for significant, sd
# # Also get conversation_length_thousands_2 marginal effect, star for significant, sd
# # Also get consolidated_num_steps_ideas_2 marginal effect, star for significant, sd
# # Also get fres marginal effect, star for significant, sd
# # Also get fres_2 marginal effect, star for significant, sd
# # Title: "Creative Writing Compliance, Logit"
# coef_names = logit_no_clustering_cw_compliance.params.index
# #print(coef_names)
# coef_names_with_mes = list(logit_no_clustering_cw_compliance.params.index)[1:]
# #print(coef_names_with_mes)
# me_values = logit_no_clustering_cw_compliance_marginal_effects.margeff
# #print(me_values)
# me_ses = logit_no_clustering_cw_compliance_marginal_effects.margeff_se
# #print(me_ses)
# p_values = logit_no_clustering_cw_compliance_marginal_effects.pvalues
# #print(p_values)
# # Loop over items in coef_names_with_mes, enumerated
# string_entries = []

# for i, coef_name in enumerate(coef_names_with_mes):
#     # Print coef_name, me_values[i], me_ses[i], p_values[i]
#     #print(coef_name, me_values[i], me_ses[i], p_values[i])
#     # Consolidate me_values[i], me_ses[i], p_values[i] into a string
#     # me_value* (me_se), where the star is if p_value < 0.05
#     if p_values[i] < 0.05:
#         string_entries.append(str(round(me_values[i], 3)) + "* (" + str(round(me_ses[i], 3)) + ")")
#     else:
#         string_entries.append(str(round(me_values[i], 3)) + " (" + str(round(me_ses[i], 3)) + ")")

# # Make a dataframe with one row with columns of coef_names_with_mes and values of string_entries
# # Print the dataframe
# logit_no_clustering_cw_compliance_marginal_effects_df = pd.DataFrame([string_entries], columns=coef_names_with_mes)
# # Add column Title
# logit_no_clustering_cw_compliance_marginal_effects_df['Title'] = "Creative Writing Compliance, Logit"
# print(logit_no_clustering_cw_compliance_marginal_effects_df)

# Use function to create results df
logit_no_clustering_cw_compliance_df = create_logit_results_df(logit_no_clustering_cw_compliance, logit_no_clustering_cw_compliance_marginal_effects, "Creative Writing Compliance, Logit")
print(logit_no_clustering_cw_compliance_df)


Optimization terminated successfully.
         Current function value: 0.654309
         Iterations 5
logit no clustering cw compliance
                           Logit Regression Results                           
Dep. Variable:             compliance   No. Observations:                 1434
Model:                          Logit   Df Residuals:                     1426
Method:                           MLE   Df Model:                            7
Date:                Mon, 11 Dec 2023   Pseudo R-squ.:                 0.05297
Time:                        15:44:18   Log-Likelihood:                -938.28
converged:                       True   LL-Null:                       -990.76
Covariance Type:                  HC3   LLR p-value:                 1.019e-19
                                      coef    std err          z      P>|z|      [0.025      0.975]
---------------------------------------------------------------------------------------------------
Intercept                       

In [16]:
# Define and fit the OLS model with clustered standard errors
lpm_with_clustering_cw_compliance = smf.ols('compliance ~ conversation_length_thousands + consolidated_num_steps_ideas + conversation_length_thousands_2 + consolidated_num_steps_ideas_2 + fres + fres_2 + task_conversation + model', data=cw_data).fit(cov_type='cluster', cov_kwds={'groups': cw_data['task_conversation_method']})

# Print the model summary
print('lpm with clustering cw compliance')
print(lpm_with_clustering_cw_compliance.summary())

# # Results for table
# # Get conversation_length_thousands coefficient, star for significant, sd
# # Also get consolidated_num_steps_ideas coefficient, star for significant, sd
# # Also get conversation_length_thousands_2 coefficient, star for significant, sd
# # Also get consolidated_num_steps_ideas_2 coefficient, star for significant, sd
# # Also get fres coefficient, star for significant, sd
# # Also get fres_2 coefficient, star for significant, sd
# # Title: "Creative Writing Compliance, Linear"
# coef_names = lpm_with_clustering_cw_compliance.params.index
# #print(coef_names)
# coef_values = lpm_with_clustering_cw_compliance.params.values
# #print(coef_values)
# sds = lpm_with_clustering_cw_compliance.bse.values
# #print(sds)
# p_values = lpm_with_clustering_cw_compliance.pvalues.values
# #print(p_values)

# # Loop over items in coef_names, enumerated
# string_entries = []
# for i, coef_name in enumerate(coef_names):
#     # Print coef_name, coef_values[i], sds[i], p_values[i]
#     #print(coef_name, coef_values[i], sds[i], p_values[i])
#     # Consolidate coef_values[i], sds[i], p_values[i] into a string
#     # coef_value* (sd), where the star is if p_value < 0.05
#     if p_values[i] < 0.05:
#         string_entries.append(str(round(coef_values[i], 3)) + "* (" + str(round(sds[i], 3)) + ")")
#     else:
#         string_entries.append(str(round(coef_values[i], 3)) + " (" + str(round(sds[i], 3)) + ")")

# # Make a dataframe with one row with columns of coef_names and values of string_entries
# # Print the dataframe
# lpm_with_clustering_cw_compliance_df = pd.DataFrame([string_entries], columns=coef_names)
# # Add column Title
# lpm_with_clustering_cw_compliance_df['Title'] = "GSM8K Correct, Linear"
# print(lpm_with_clustering_cw_compliance_df)

# Use function to create results df
lpm_with_clustering_cw_compliance_df = create_linear_results_df(lpm_with_clustering_cw_compliance, "Creative Writing Compliance, Linear")
print(lpm_with_clustering_cw_compliance_df)


lpm with clustering cw compliance
                            OLS Regression Results                            
Dep. Variable:             compliance   R-squared:                       0.203
Model:                            OLS   Adj. R-squared:                  0.140
Method:                 Least Squares   F-statistic:                     12.22
Date:                Mon, 11 Dec 2023   Prob (F-statistic):          5.08e-108
Time:                        15:44:18   Log-Likelihood:                -874.55
No. Observations:                1434   AIC:                             1963.
Df Residuals:                    1327   BIC:                             2527.
Df Model:                         106                                         
Covariance Type:              cluster                                         
                                      coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------------------------------------------------

In [17]:
# Stack results dataframes together for output
results_df = pd.concat([logit_no_clustering_gsm8k_df, lpm_with_clustering_gsm8k_df, reg_with_clustering_cw_df, logit_no_clustering_cw_compliance_df, lpm_with_clustering_cw_compliance_df], ignore_index=True)

# Limit columns to Title, conversation_length_thousands, consolidated_num_steps_ideas, conversation_length_thousands_2, consolidated_num_steps_ideas_2, fres, fres_2
results_df = results_df[['Title', 'conversation_length_thousands', 'conversation_length_thousands_2', 'consolidated_num_steps_ideas', 'consolidated_num_steps_ideas_2', 'fres', 'fres_2']]

# Replace NaN with blanks
results_df = results_df.fillna('')

# Rename Title to Model, rename conversation_length_thousands to Conversation Length, rename consolidated_num_steps_ideas to Number of Steps/Ideas, rename conversation_length_thousands_2 to Conversation Length Squared, rename consolidated_num_steps_ideas_2 to Number of Steps/Ideas Squared, rename fres to FRES, rename fres_2 to FRES Squared
results_df = results_df.rename(columns={'Title': 'Model', 'conversation_length_thousands': 'Conversation Length (Thousands of Tokens)', 'consolidated_num_steps_ideas': 'Number of Steps/Ideas', 'conversation_length_thousands_2': 'Conversation Length (Thousands of Tokens) Squared', 'consolidated_num_steps_ideas_2': 'Number of Steps/Ideas Squared', 'fres': 'Flesch Reading Ease', 'fres_2': 'Flesch Reading Ease Squared'})

# Output to latex. center columns, wrap text, and remove index
latex_string = results_df.to_latex(index=False, 
                      column_format='x{1.5cm}|x{0.75cm}|x{0.75cm}|x{0.75cm}|x{0.75cm}|x{0.75cm}|x{0.75cm}', 
                      #booktabs = True
                      #longtable = True
                      )

# Add lines between rows
lines = latex_string.split('\n')
new_lines = []
for line in lines:
    new_lines.append(line)
    if '\\' in line and '&' in line:  # Identifies a row of the table
        new_lines.append('\\hline')
# Insert \\hline after \toprule
new_lines.insert(2, '\\hline')

# Rejoin the modified lines
modified_latex_table = '\n'.join(new_lines)

print(modified_latex_table)

# Save string to file
with open('../Output/regressions.tex', 'w') as f:
    f.write(modified_latex_table)

# Print results_df
print(results_df)

# Print modifed latex table
print(modified_latex_table)


\begin{tabular}{x{1.5cm}|x{0.75cm}|x{0.75cm}|x{0.75cm}|x{0.75cm}|x{0.75cm}|x{0.75cm}}
\toprule
\hline
Model & Conversation Length (Thousands of Tokens) & Conversation Length (Thousands of Tokens) Squared & Number of Steps/Ideas & Number of Steps/Ideas Squared & Flesch Reading Ease & Flesch Reading Ease Squared \\
\hline
\midrule
GSM8K Correct, Logit & 0.277 (0.143) & -0.498* (0.127) & 0.029* (0.007) & -0.002* (0.0) &  &  \\
\hline
GSM8K Correct, Linear & 0.282* (0.114) & -0.418* (0.091) & 0.043* (0.006) & -0.002* (0.0) &  &  \\
\hline
Creative Writing Cosine Similarity & 0.212* (0.045) & -0.119* (0.03) & 0.01* (0.004) & -0.0 (0.001) & -0.011* (0.004) & 0.0* (0.0) \\
\hline
Creative Writing Compliance, Logit & -0.557* (0.161) & 0.23* (0.11) & -0.025 (0.018) & 0.005 (0.003) & 0.041* (0.017) & -0.0* (0.0) \\
\hline
Creative Writing Compliance, Linear & -0.531* (0.158) & 0.226* (0.107) & -0.033* (0.016) & 0.006* (0.003) & 0.045* (0.016) & -0.0* (0.0) \\
\hline
\bottomrule
\end{tabular}

  

### GSM8K Regression with Provided Answer Complexity Interaction

In [18]:
print(list(gsm8k_data.columns))

# Length_provided in thousands
gsm8k_data['length_provided_thousands'] = gsm8k_data['length_provided']/1000

# Squared length_provided in thousands
gsm8k_data['length_provided_thousands_2'] = gsm8k_data['length_provided_thousands']**2


['model_task_method', 'conversation_number', 'coherence_1_incoherent_10_very_coherent', 'compliance_OLD', 'ease_of_review_1_easy_10_hard', 'correct', 'Prediction_Based_On_First_10', 'Prediction_Based_On_Last_10', 'Aggregated_Prediction', 'Prediction_Based_On_First_10_LP', 'response_Based_On_First_10_LP', 'Prediction_Based_On_Last_10_LP', 'response_Based_On_Last_10_LP', 'response_LP', 'Aggregated_Prediction_LP', 'Prediction_Based_On_First_50_LP', 'response_Based_On_First_50_LP', 'Prediction_Based_On_Last_50_LP', 'response_Based_On_Last_50_LP', 'Aggregated_Prediction_50_LP', 'Prediction_Based_On_random_50_LP_1', 'response_Based_On_random_50_LP_1', 'Prediction_Based_On_random_50_LP_2', 'response_Based_On_random_50_LP_2', 'Aggregated_Prediction_random_50_LP', 'Unnamed: 0_x', 'response_x', 'replace_slash_n_slash_n_with_newline_x', 'replace_slash_n_slash_n_with_newline_values_x', 'replace_slash_n_with_newline_x', 'replace_slash_n_with_newline_values_x', 'avg_cosine_sim', 'num_sentences_x', '

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  gsm8k_data['length_provided_thousands'] = gsm8k_data['length_provided']/1000
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  gsm8k_data['length_provided_thousands_2'] = gsm8k_data['length_provided_thousands']**2


In [19]:
# Define the logistic regression model
logit_no_clustering_gsm8k_provided_interaction = smf.logit('correct ~ conversation_length_thousands * length_provided_thousands + consolidated_num_steps_ideas * length_provided_thousands + conversation_length_thousands_2 * length_provided_thousands + consolidated_num_steps_ideas_2 * length_provided_thousands + model * length_provided_thousands', data=gsm8k_data).fit(cov_type='HC3')

# Display the summary
print(logit_no_clustering_gsm8k_provided_interaction.summary())

# Marginal effects
logit_no_clustering_gsm8k_provided_interaction_marginal_effects = logit_no_clustering_gsm8k_provided_interaction.get_margeff(at='overall')
print(logit_no_clustering_gsm8k_provided_interaction_marginal_effects.summary())

# Create results df
logit_no_clustering_gsm8k_provided_interaction_df = create_logit_results_df(logit_no_clustering_gsm8k_provided_interaction, logit_no_clustering_gsm8k_provided_interaction_marginal_effects, "GSM8K Correct, Logit")

# Print results df
print(logit_no_clustering_gsm8k_provided_interaction_df)


Optimization terminated successfully.
         Current function value: 0.549825
         Iterations 7
                           Logit Regression Results                           
Dep. Variable:                correct   No. Observations:                 1600
Model:                          Logit   Df Residuals:                     1588
Method:                           MLE   Df Model:                           11
Date:                Mon, 11 Dec 2023   Pseudo R-squ.:                  0.1888
Time:                        15:44:18   Log-Likelihood:                -879.72
converged:                       True   LL-Null:                       -1084.4
Covariance Type:                  HC3   LLR p-value:                 6.248e-81
                                                                coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------------------------------------------------
Intercept     

In [20]:
# Define and fit the OLS model with clustered standard errors
lpm_with_clustering_gsm8k_provided_interaction = smf.ols('correct ~ conversation_length_thousands * length_provided_thousands + consolidated_num_steps_ideas * length_provided_thousands + conversation_length_thousands_2 * length_provided_thousands + consolidated_num_steps_ideas_2 * length_provided_thousands +  task_conversation * length_provided_thousands + model * length_provided_thousands', data=gsm8k_data).fit(cov_type='cluster', cov_kwds={'groups': gsm8k_data['task_conversation_method']})

# Print the model summary
print(lpm_with_clustering_gsm8k_provided_interaction.summary())

# Create results df
lpm_with_clustering_gsm8k_provided_interaction_df = create_linear_results_df(lpm_with_clustering_gsm8k_provided_interaction, "GSM8K Correct, Linear")

# Print results df
print(lpm_with_clustering_gsm8k_provided_interaction_df)




                            OLS Regression Results                            
Dep. Variable:                correct   R-squared:                       0.407
Model:                            OLS   Adj. R-squared:                  0.319
Method:                 Least Squares   F-statistic:                     71.79
Date:                Mon, 11 Dec 2023   Prob (F-statistic):               0.00
Time:                        15:44:18   Log-Likelihood:                -718.03
No. Observations:                1600   AIC:                             1852.
Df Residuals:                    1392   BIC:                             2971.
Df Model:                         207                                         
Covariance Type:              cluster                                         
                                                                coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------

In [21]:
# Stack results dataframes together for output
results_df = pd.concat([logit_no_clustering_gsm8k_provided_interaction_df, lpm_with_clustering_gsm8k_provided_interaction_df], ignore_index=True)

# Limit columns to Title, conversation_length_thousands:length_provided_thousands, consolidated_num_steps_ideas:length_provided_thousands, conversation_length_thousands_2:length_provided_thousands, consolidated_num_steps_ideas_2:length_provided_thousands, length_provided_thousands, length_provided_thousands_2, conversation_length_thousands, consolidated_num_steps_ideas, conversation_length_thousands_2, consolidated_num_steps_ideas_2
results_df = results_df[['Title', 'conversation_length_thousands:length_provided_thousands', 'consolidated_num_steps_ideas:length_provided_thousands', 'conversation_length_thousands_2:length_provided_thousands', 'consolidated_num_steps_ideas_2:length_provided_thousands', 'conversation_length_thousands', 'consolidated_num_steps_ideas', 'conversation_length_thousands_2', 'consolidated_num_steps_ideas_2', 'length_provided_thousands']]

# Replace NaN with blanks
results_df = results_df.fillna('')

# Rename Title to Model, conversation_length_thousands:length_provided_thousands to Length * Provided Length, consolidated_num_steps_ideas:length_provided_thousands to Number of Steps/Ideas * Provided Length, conversation_length_thousands_2:length_provided_thousands to Length Squared * Provided Length, consolidated_num_steps_ideas_2:length_provided_thousands to Number of Steps/Ideas Squared * Provided Length, conversation_length_thousands to Conversation Length, consolidated_num_steps_ideas to Number of Steps/Ideas, conversation_length_thousands_2 to Conversation Length Squared, consolidated_num_steps_ideas_2 to Number of Steps/Ideas Squared, length_provided_thousands to Provided Length, length_provided_thousands_2 to Provided Length Squared
results_df = results_df.rename(columns={'Title': 'Model', 'conversation_length_thousands:length_provided_thousands': 'Length * Provided Length', 'consolidated_num_steps_ideas:length_provided_thousands': 'Number of Steps/Ideas * Provided Length', 'conversation_length_thousands_2:length_provided_thousands': 'Length Squared * Provided Length', 'consolidated_num_steps_ideas_2:length_provided_thousands': 'Number of Steps/Ideas Squared * Provided Length', 'conversation_length_thousands': 'Conversation Length', 'consolidated_num_steps_ideas': 'Number of Steps/Ideas', 'conversation_length_thousands_2': 'Conversation Length Squared', 'consolidated_num_steps_ideas_2': 'Number of Steps/Ideas Squared', 'length_provided_thousands': 'Provided Length', 'length_provided_thousands_2': 'Provided Length Squared'})

# Output to latex. center columns, wrap text, and remove index
# Output to latex. center columns, wrap text, and remove index
latex_string = results_df.to_latex(index=False, 
                      column_format='x{1.5cm}|x{0.75cm}|x{0.75cm}|x{0.75cm}|x{0.75cm}|x{0.75cm}|x{0.75cm}|x{0.75cm}|x{0.75cm}|x{0.75cm}', 
                      #booktabs = True
                      #longtable = True
                      )

# Add lines between rows
lines = latex_string.split('\n')
new_lines = []
for line in lines:
    new_lines.append(line)
    if '\\' in line and '&' in line:  # Identifies a row of the table
        new_lines.append('\\hline')
# Insert \\hline after \toprule
new_lines.insert(2, '\\hline')

# Rejoin the modified lines
modified_latex_table = '\n'.join(new_lines)

print(modified_latex_table)

# Save string to file
with open('../Output/regressions_provided_interaction.tex', 'w') as f:
    f.write(modified_latex_table)

# Print results_df
print(results_df)

# Print modifed latex table
print(modified_latex_table)


\begin{tabular}{x{1.5cm}|x{0.75cm}|x{0.75cm}|x{0.75cm}|x{0.75cm}|x{0.75cm}|x{0.75cm}|x{0.75cm}|x{0.75cm}|x{0.75cm}}
\toprule
\hline
Model & Length * Provided Length & Number of Steps/Ideas * Provided Length & Length Squared * Provided Length & Number of Steps/Ideas Squared * Provided Length & Conversation Length & Number of Steps/Ideas & Conversation Length Squared & Number of Steps/Ideas Squared & Provided Length \\
\hline
\midrule
GSM8K Correct, Logit & -5.091 (3.57) & 0.087 (0.168) & 2.908 (3.266) & 0.003 (0.011) & 0.739 (0.413) & 0.027 (0.019) & -0.699 (0.433) & -0.002 (0.001) & -1.216 (0.982) \\
\hline
GSM8K Correct, Linear & -5.303 (2.85) & 0.005 (0.176) & 1.786 (2.19) & -0.001 (0.012) & 0.71* (0.346) & 0.041* (0.02) & -0.513 (0.327) & -0.002 (0.002) & -6.541 (124.292) \\
\hline
\bottomrule
\end{tabular}

                   Model Length * Provided Length  \
0   GSM8K Correct, Logit            -5.091 (3.57)   
1  GSM8K Correct, Linear            -5.303 (2.85)   

  Number of Steps

### Add model interaction terms

In [22]:
# Define the logistic regression model
logit_no_clustering_gsm8k_model_interaction = smf.logit('correct ~ conversation_length_thousands * model + consolidated_num_steps_ideas * model + conversation_length_thousands_2 * model + consolidated_num_steps_ideas_2 * model', data=gsm8k_data).fit(cov_type='HC3')

# Display the summary
print(logit_no_clustering_gsm8k_model_interaction.summary())

# Marginal effects
logit_no_clustering_gsm8k_marginal_effects_model_interaction = logit_no_clustering_gsm8k_model_interaction.get_margeff(at='overall')
print(logit_no_clustering_gsm8k_marginal_effects_model_interaction.summary())

# Create results df
logit_no_clustering_gsm8k_model_interaction_df = create_logit_results_df(logit_no_clustering_gsm8k_model_interaction, logit_no_clustering_gsm8k_marginal_effects_model_interaction, "GSM8K Correct, Logit")

# Print results df
print(logit_no_clustering_gsm8k_model_interaction_df)


Optimization terminated successfully.
         Current function value: 0.566818
         Iterations 6
                           Logit Regression Results                           
Dep. Variable:                correct   No. Observations:                 1600
Model:                          Logit   Df Residuals:                     1590
Method:                           MLE   Df Model:                            9
Date:                Mon, 11 Dec 2023   Pseudo R-squ.:                  0.1637
Time:                        15:44:19   Log-Likelihood:                -906.91
converged:                       True   LL-Null:                       -1084.4
Covariance Type:                  HC3   LLR p-value:                 5.343e-71
                                                   coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------------------------------------------------------------------------------------
Intercept                               

In [23]:
# Define and fit the OLS model with clustered standard errors
lpm_with_clustering_gsm8k_model_interaction = smf.ols('correct ~ conversation_length_thousands * model + consolidated_num_steps_ideas * model + conversation_length_thousands_2 * model + consolidated_num_steps_ideas_2 * model + task_conversation', data=gsm8k_data).fit(cov_type='cluster', cov_kwds={'groups': gsm8k_data['task_conversation_method']})

# Print the model summary
print(lpm_with_clustering_gsm8k_model_interaction.summary())

# Create results df
lpm_with_clustering_gsm8k_model_interaction_df = create_linear_results_df(lpm_with_clustering_gsm8k_model_interaction, "GSM8K Correct, Linear")

# Print results df
print(lpm_with_clustering_gsm8k_model_interaction_df)


                            OLS Regression Results                            
Dep. Variable:                correct   R-squared:                       0.361
Model:                            OLS   Adj. R-squared:                  0.314
Method:                 Least Squares   F-statistic:                     28.54
Date:                Mon, 11 Dec 2023   Prob (F-statistic):          1.64e-210
Time:                        15:44:19   Log-Likelihood:                -778.49
No. Observations:                1600   AIC:                             1775.
Df Residuals:                    1491   BIC:                             2361.
Df Model:                         108                                         
Covariance Type:              cluster                                         
                                                   coef    std err          z      P>|z|      [0.025      0.975]
-------------------------------------------------------------------------------------------------

In [24]:
# Define and fit the OLS model with clustered standard errors
reg_with_clustering_cw_model_interaction = smf.ols("avg_inter_paragraph_cosine_sim ~ conversation_length_thousands * model + consolidated_num_steps_ideas * model + conversation_length_thousands_2 * model + consolidated_num_steps_ideas_2 * model + fres * model + fres_2 * model + task_conversation",
                                 data=cw_data).fit(cov_type='cluster', cov_kwds={'groups': cw_data['task_conversation_method']})

# Print the model summary
print(reg_with_clustering_cw_model_interaction.summary())

# Create results df
reg_with_clustering_cw_model_interaction_df = create_linear_results_df(reg_with_clustering_cw_model_interaction, "Creative Writing Cosine Similarity")

# Print results df
print(reg_with_clustering_cw_model_interaction_df)


                                  OLS Regression Results                                  
Dep. Variable:     avg_inter_paragraph_cosine_sim   R-squared:                       0.438
Model:                                        OLS   Adj. R-squared:                  0.391
Method:                             Least Squares   F-statistic:                     20.59
Date:                            Mon, 11 Dec 2023   Prob (F-statistic):          4.44e-168
Time:                                    15:44:19   Log-Likelihood:                 941.60
No. Observations:                            1434   AIC:                            -1657.
Df Residuals:                                1321   BIC:                            -1062.
Df Model:                                     112                                         
Covariance Type:                          cluster                                         
                                                   coef    std err          z      P>|z|  

In [25]:
# Define the logistic regression model
logit_no_clustering_cw_compliance_model_interaction = smf.logit('compliance ~ conversation_length_thousands * model + consolidated_num_steps_ideas * model + conversation_length_thousands_2 * model + consolidated_num_steps_ideas_2 * model + fres * model + fres_2 * model', data=cw_data).fit(cov_type='HC3')

print('logit no clustering cw compliance')
# Display the summary
print(logit_no_clustering_cw_compliance_model_interaction.summary())

# Marginal effects
logit_no_clustering_cw_compliance_marginal_effects_model_interaction = logit_no_clustering_cw_compliance_model_interaction.get_margeff(at='overall')
print(logit_no_clustering_cw_compliance_marginal_effects_model_interaction.summary())

# Create results df
logit_no_clustering_cw_compliance_model_interaction_df = create_logit_results_df(logit_no_clustering_cw_compliance_model_interaction, logit_no_clustering_cw_compliance_marginal_effects_model_interaction, "Creative Writing Compliance, Logit")

# Print results df
print(logit_no_clustering_cw_compliance_model_interaction_df)


Optimization terminated successfully.
         Current function value: 0.650621
         Iterations 5
logit no clustering cw compliance
                           Logit Regression Results                           
Dep. Variable:             compliance   No. Observations:                 1434
Model:                          Logit   Df Residuals:                     1420
Method:                           MLE   Df Model:                           13
Date:                Mon, 11 Dec 2023   Pseudo R-squ.:                 0.05831
Time:                        15:44:19   Log-Likelihood:                -932.99
converged:                       True   LL-Null:                       -990.76
Covariance Type:                  HC3   LLR p-value:                 1.531e-18
                                                   coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------------------------------------------------------------------------------------
Interc

In [26]:
# Define and fit the OLS model with clustered standard errors
lpm_with_clustering_cw_compliance_model_interaction = smf.ols('compliance ~ conversation_length_thousands * model + consolidated_num_steps_ideas * model + conversation_length_thousands_2 * model + consolidated_num_steps_ideas_2 * model + fres * model + fres_2 * model + task_conversation * model', data=cw_data).fit(cov_type='cluster', cov_kwds={'groups': cw_data['task_conversation_method']})

# Print the model summary
print('lpm with clustering cw compliance')
print(lpm_with_clustering_cw_compliance_model_interaction.summary())

# Create results df
lpm_with_clustering_cw_compliance_model_interaction_df = create_linear_results_df(lpm_with_clustering_cw_compliance_model_interaction, "Creative Writing Compliance, Linear")

# Print results df
print(lpm_with_clustering_cw_compliance_model_interaction_df)



lpm with clustering cw compliance
                            OLS Regression Results                            
Dep. Variable:             compliance   R-squared:                       0.321
Model:                            OLS   Adj. R-squared:                  0.204
Method:                 Least Squares   F-statistic:                     67.99
Date:                Mon, 11 Dec 2023   Prob (F-statistic):               0.00
Time:                        15:44:19   Log-Likelihood:                -759.90
No. Observations:                1434   AIC:                             1944.
Df Residuals:                    1222   BIC:                             3061.
Df Model:                         211                                         
Covariance Type:              cluster                                         
                                                   coef    std err          z      P>|z|      [0.025      0.975]
---------------------------------------------------------------

In [27]:
# Stack results dataframes together for output
results_df = pd.concat([logit_no_clustering_gsm8k_model_interaction_df, lpm_with_clustering_gsm8k_model_interaction_df, reg_with_clustering_cw_model_interaction_df, logit_no_clustering_cw_compliance_model_interaction_df, lpm_with_clustering_cw_compliance_model_interaction_df], ignore_index=True)

# Limit columns to Title, conversation_length_thousands:model[T.td3], consolidated_num_steps_ideas:model[T.td3], conversation_length_thousands_2:model[T.td3], consolidated_num_steps_ideas_2:model[T.td3], fres:model[T.td3], fres_2:model[T.td3], task_conversation:model[T.td3], conversation_length_thousands, consolidated_num_steps_ideas, conversation_length_thousands_2, consolidated_num_steps_ideas_2, fres, fres_2, model[T.td3]
results_df = results_df[['Title', 'conversation_length_thousands:model[T.td3]', 'consolidated_num_steps_ideas:model[T.td3]', 'conversation_length_thousands_2:model[T.td3]', 'consolidated_num_steps_ideas_2:model[T.td3]', 'fres:model[T.td3]', 'fres_2:model[T.td3]', 'conversation_length_thousands', 'consolidated_num_steps_ideas', 'conversation_length_thousands_2', 'consolidated_num_steps_ideas_2', 'fres', 'fres_2', 'model[T.td3]']]

# Replace NaN with blanks
results_df = results_df.fillna('')

# Rename Title to Model, conversation_length_thousands:model to Conversation Length * Model, consolidated_num_steps_ideas:model to Number of Steps/Ideas * Model, conversation_length_thousands_2:model to Conversation Length Squared * Model, consolidated_num_steps_ideas_2:model to Number of Steps/Ideas Squared * Model, fres:model to FRES * Model, fres_2:model to FRES Squared * Model, task_conversation:model to Task Conversation * Model, conversation_length_thousands to Conversation Length, consolidated_num_steps_ideas to Number of Steps/Ideas, conversation_length_thousands_2 to Conversation Length Squared, consolidated_num_steps_ideas_2 to Number of Steps/Ideas Squared, fres to FRES, fres_2 to FRES Squared, model to Model
results_df = results_df.rename(columns={'Title': 'Model', 'conversation_length_thousands:model[T.td3]': 'Conversation Length * Model = TD3', 'consolidated_num_steps_ideas:model[T.td3]': 'Number of Steps/Ideas * Model = TD3', 'conversation_length_thousands_2:model[T.td3]': 'Conversation Length Squared * Model = TD3', 'consolidated_num_steps_ideas_2:model[T.td3]': 'Number of Steps/Ideas Squared * Model = TD3', 'fres:model[T.td3]': 'Flesch Reading Ease * Model = TD3', 'fres_2:model[T.td3]': 'Flesch Reading Ease Squared * Model = TD3', 'conversation_length_thousands': 'Conversation Length', 'consolidated_num_steps_ideas': 'Number of Steps/Ideas', 'conversation_length_thousands_2': 'Conversation Length Squared', 'consolidated_num_steps_ideas_2': 'Number of Steps/Ideas Squared', 'fres': 'Flesch Reading Ease', 'fres_2': 'Flesch Reading Ease Squared', 'model[T.td3]': 'Model = TD3'})

# Output to latex. center columns, wrap text, and remove index
latex_string = results_df.to_latex(index=False, 
                      column_format='x{1.5cm}|x{0.75cm}|x{0.75cm}|x{0.75cm}|x{0.75cm}|x{0.75cm}|x{0.75cm}|x{0.75cm}|x{0.75cm}|x{0.75cm}|x{0.75cm}|x{0.75cm}|x{0.75cm}|x{0.75cm}|x{0.75cm}', 
                      #booktabs = True
                      #longtable = True
                      )

# Add lines between rows
lines = latex_string.split('\n')
new_lines = []
for line in lines:
    new_lines.append(line)
    if '\\' in line and '&' in line:  # Identifies a row of the table
        new_lines.append('\\hline')
# Insert \\hline after \toprule
new_lines.insert(2, '\\hline')

# Rejoin the modified lines
modified_latex_table = '\n'.join(new_lines)

print(modified_latex_table)

# Save string to file
with open('../Output/regressions_model_interaction.tex', 'w') as f:
    f.write(modified_latex_table)

# Print results_df
print(results_df)

# Print modifed latex table
print(modified_latex_table)


\begin{tabular}{x{1.5cm}|x{0.75cm}|x{0.75cm}|x{0.75cm}|x{0.75cm}|x{0.75cm}|x{0.75cm}|x{0.75cm}|x{0.75cm}|x{0.75cm}|x{0.75cm}|x{0.75cm}|x{0.75cm}|x{0.75cm}|x{0.75cm}}
\toprule
\hline
Model & Conversation Length * Model = TD3 & Number of Steps/Ideas * Model = TD3 & Conversation Length Squared * Model = TD3 & Number of Steps/Ideas Squared * Model = TD3 & Flesch Reading Ease * Model = TD3 & Flesch Reading Ease Squared * Model = TD3 & Conversation Length & Number of Steps/Ideas & Conversation Length Squared & Number of Steps/Ideas Squared & Flesch Reading Ease & Flesch Reading Ease Squared & Model = TD3 \\
\hline
\midrule
GSM8K Correct, Logit & -0.403 (0.401) & 0.003 (0.014) & 1.081* (0.439) & 0.0 (0.001) &  &  & -0.196 (0.236) & 0.031* (0.01) & -0.229 (0.181) & -0.002* (0.001) &  &  & -0.453* (0.076) \\
\hline
GSM8K Correct, Linear & -0.449 (0.365) & 0.009 (0.012) & 1.092* (0.439) & -0.0 (0.001) &  &  & -0.076 (0.146) & 0.043* (0.007) & -0.228* (0.104) & -0.002* (0.0) &  &  & -0.469* (0.05