In [None]:
from google.colab import drive
from google.colab import files
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
# The loaded CSV/Excel file contains a list of all constraints from the benchmark under the column "Constraint". It then has three columns that contain the Constraints satisfaction results for thse constraints. These are parsed and converted to 0/1 binary format.
path = "<Path to Excel file containing 4 columns - Constraints, Base, SFT, Instruct.>"
sheet = pd.read_excel(path)
sheet.head()

Unnamed: 0,Constraint,Base,SFT,Instruct
0,"1. The primary setting is in the morning, befo...",1. Yes - The primary setting of the story is i...,1. Yes - The primary setting in the story is i...,1. Yes - The story satisfies the constraint of...
1,1. The hiking trip occurs during a full moon.\...,1. Yes - The hiking trip occurs during a full ...,\n1. No - The story does not specify that the ...,"1. Yes - The protagonist, Alex, ventures out d..."
2,1. The protagonist must express his annoyance ...,\n1. No - The story mentions the protagonist's...,"\n1. Yes - The protagonist, Jeff, expresses hi...",1. Yes - The protagonist expresses his annoyan...
3,1. Use dialogue to reveal crucial plot dynamic...,"\n1. Yes - The crucial plot dynamics, urgency ...","\n1.\tYes - Constraint 1 is satisfied: ""Use d...",1. No 2. Yes - Const...
4,1. The protagonist should be surprised and dis...,1. Yes - The story accurately portrays the pro...,"1. Yes - The protagonist, Trevor, is surprised...",\n1. No - The story contains extraneous text t...


In [None]:
# Function to parse each cell and extract constraints with their responses
import re
def parse_cell(cell):
    # Normalize spaces and split using regular expression to handle numbers followed by dots
    entries = re.split(r'(?<=\D)\s(?=\d+\.)', str(cell).replace('\n', ' '))
    parsed_data = {}
    for entry in entries:
        # Find the first dot after a number to split number from the text
        match = re.match(r'(\d+)\.\s*(.*)', entry)
        if match:
            number, text = match.groups()
            if number.isdigit():
                parsed_data[int(number)] = text
    return parsed_data


In [None]:
df = pd.DataFrame(columns=['Constraint', 'Base', 'SFT', 'Instruct'])

In [None]:
# Processing the data row by row
for idx, row in sheet.iterrows():
    constraint_data = parse_cell(row['Constraint'])
    base_data = parse_cell(row['Base'])
    sft_data = parse_cell(row['SFT'])
    instruct_data = parse_cell(row['Instruct'])

    # For each constraint number and text in constraints
    for num, text in constraint_data.items():
        base_response = base_data.get(num, '').startswith('Yes')
        sft_response = sft_data.get(num, '').startswith('Yes')
        instruct_response = instruct_data.get(num, '').startswith('Yes')

        # Append data to DataFrame
        new_row = pd.DataFrame({
            'Constraint': [text],
            'Base': [1 if base_response else 0],
            'SFT': [1 if sft_response else 0],
            'Instruct': [1 if instruct_response else 0]
        })
        df = pd.concat([df, new_row], ignore_index=True)

In [None]:
print(df.head())

                                          Constraint Base SFT Instruct
0  The primary setting is in the morning, before ...    1   1        1
1  Maggy is a tough little girl shown when she re...    1   0        1
2  The mother's desperation for raising a lady is...    0   0        1
3        Maggy is not particularly a morning person.    1   1        1
4  There's a vicious dog named Jax on Maggy's way...    0   1        1


In [None]:
# Compute number of constraints satisfied by each model individually. 
column_sums = df.drop('Constraint', axis=1).sum()
column_sums

Base        411
SFT         544
Instruct    629
dtype: object

In [None]:
# Other error analysis measures. 
# 1) Number of constraints satisfied by all three columns (Base, SFT, and Instruct)
all_satisfied = df[(df['Base'] == 1) & (df['SFT'] == 1) & (df['Instruct'] == 1)].shape[0]

# 2) Not satisfied in BASE but satisfied in both SFT and Instruct
not_base_but_others = df[(df['Base'] == 0) & (df['SFT'] == 1) & (df['Instruct'] == 1)].shape[0]

# 3) Only satisfied in Instruct
only_instruct = df[(df['Base'] == 0) & (df['SFT'] == 0) & (df['Instruct'] == 1)].shape[0]

# 4) Never satisfied in all three columns
never_satisfied = df[(df['Base'] == 0) & (df['SFT'] == 0) & (df['Instruct'] == 0)].shape[0]

all_satisfied, not_base_but_others, only_instruct, never_satisfied


(221, 199, 149, 301)

In [None]:
# Only satisfied in Base
only_base = df[(df['Base'] == 1) & (df['SFT'] == 0) & (df['Instruct'] == 0)].shape[0]

# Only satisfied in SFT
only_sft = df[(df['Base'] == 0) & (df['SFT'] == 1) & (df['Instruct'] == 0)].shape[0]

only_base, only_sft

(77, 71)

In [None]:
# Calculate the constraints not satisfied in exactly one column for each column

# Not satisfied in exactly one column - Base
not_satisfied_base = df[(df['Base'] == 0) & (df['SFT'] == 1) & (df['Instruct'] == 1)].shape[0]

# Not satisfied in exactly one column - SFT
not_satisfied_sft = df[(df['SFT'] == 0) & (df['Base'] == 1) & (df['Instruct'] == 1)].shape[0]

# Not satisfied in exactly one column - Instruct
not_satisfied_instruct = df[(df['Instruct'] == 0) & (df['Base'] == 1) & (df['SFT'] == 1)].shape[0]

not_satisfied_base, not_satisfied_sft, not_satisfied_instruct

(199, 60, 53)

In [None]:
# Save CSV file. 
df.to_csv('Your_Path.csv', index=False)
files.download('Your_Path.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
"""
Prompt to GPT4 for to analyize Error Analysis: -
You are an expert reader and pattern analyzer. Go through the CSV sheet for me. It has a list of constraints in one column followed by whether the constraint is being satisfied (1) or not (0) by three models named Base, SFT, and Instruct.  For each of the following cases, go through the corresponding constraints. Find characteristics, differences, and patterns among these constraints. For example, a pattern could be that constraints satisfied by no model usually have a dialogue related constraint or have a Proper noun in it. For each case, come up with five patterns like this, and then come up with 10 examples from the CSV sheet to justify/explain your pattern.

1) Constraints satisfied by all models (1, 1, 1)
2) Constraints satisfied by SFT and Instruct, but not by Base (0, 1, 1)
3) Constraints satisfied by only Instruct (0, 0, 1)
4) Constraints satisfied by None (0, 0, 0)
"""