In [2]:
import pandas as pd

## Part 1 : Find the number of rows to get a statistical significant result

In [13]:
# get the number of rows in the data
paths_finished = pd.read_csv('data/paths_finished_unique.tsv', sep='\t', skiprows=1, names=['path_id', 'hashedIpAddress', 'timestamp', 'durationInSec', 'path', 'rating'])
print('Number of rows in paths_finished:', paths_finished.shape[0])

Number of rows in paths_finished: 28718


In [14]:
# as the data is too large, we can calculate the necessary number of rows using the Z-score
# Z-score = 1.96 for 95% confidence interval
# margin of error = 5%
# population proportion = 0.5
Z = 1.96
margin_of_error = 0.05
p = 0.5
n = (Z**2 * p * (1-p)) / margin_of_error**2
print('Number of rows needed:', n)

Number of rows needed: 384.1599999999999


Therefore we need to take randomly 400 rows from the dataset

In [18]:
# Load the merged_file_mistral.tsv file
# Load the TSV file
file_path = "merged_file_final_mistral.tsv"
df_mistral = pd.read_csv(file_path, sep="\t")


In [19]:
# load the data/llm_paths.tsv file
# Load the TSV file
file_path = "merged_file_final_openai.tsv"
df_openai = pd.read_csv(file_path, sep="\t")

In [20]:
# print the number of rows in the df_openai DataFrame
num_rows = df_openai.shape[0]
print(f"Number of rows in the file: {num_rows}")

# print the number of rows in the df_mistral DataFrame
num_rows = df_mistral.shape[0]
print(f"Number of rows in the file: {num_rows}")    

Number of rows in the file: 21000
Number of rows in the file: 355


In [6]:
# get only the rows in the openai file that have the same path_id as the mistral file
# get the path_ids from the mistral file
mistral_path_ids = df_mistral["path_id"]

# filter the openai file based on the path_ids
df_openai = df_openai[df_openai["path_id"].isin(mistral_path_ids)]

In [7]:
# get only the rows in the mistral file that have the same path_id as the openai file
# get the path_ids from the openai file
openai_path_ids = df_openai["path_id"]

# filter the mistral file based on the path_ids
df_mistral = df_mistral[df_mistral["path_id"].isin(openai_path_ids)]


In [8]:
# get the number of rows in the filtered openai file
num_rows = df_openai.shape[0]
print(f"Number of rows in the filtered file: {num_rows}")

# get the number of rows in the filtered mistral file
num_rows = df_mistral.shape[0]
print(f"Number of rows in the filtered file: {num_rows}")

Number of rows in the filtered file: 222
Number of rows in the filtered file: 222


In [11]:
# check if the path of each row contain the string "WRONG_ANSWER"
df_openai_WA = df_openai[df_openai['path'].str.contains("WRONG_ANSWER")]
df_mistral_WA = df_mistral[df_mistral['path'].str.contains("WRONG_ANSWER")]

# get the number of rows in the df_openai_WA and df_mistral_WA DataFrames
num_rows_openai_WA = df_openai_WA.shape[0]
num_rows_mistral_WA = df_mistral_WA.shape[0]

print(f"Number of rows in the OpenAI file with 'WRONG_ANSWER': {num_rows_openai_WA}")
print(f"Number of rows in the Mistral file with 'WRONG_ANSWER': {num_rows_mistral_WA}")

# get the pourcentage of wrong answers 
pourcentage_openai_WA = (num_rows_openai_WA/num_rows)*100
pourcentage_mistral_WA = (num_rows_mistral_WA/num_rows)*100

print(f"Pourcentage of wrong answers in the OpenAI file: {pourcentage_openai_WA}%")
print(f"Pourcentage of wrong answers in the Mistral file: {pourcentage_mistral_WA}%")

Number of rows in the OpenAI file with 'WRONG_ANSWER': 12
Number of rows in the Mistral file with 'WRONG_ANSWER': 74
Pourcentage of wrong answers in the OpenAI file: 5.405405405405405%
Pourcentage of wrong answers in the Mistral file: 33.33333333333333%


In [12]:
# check if the path of each row contain the string "LOOP_DETECTED"
df_openai_LD = df_openai[df_openai['path'].str.contains("LOOP_DETECTED")]
df_mistral_LD = df_mistral[df_mistral['path'].str.contains("LOOP_DETECTED")]

# get the number of rows in the df_openai_LD and df_mistral_LD DataFrames
num_rows_openai_LD = df_openai_LD.shape[0]
num_rows_mistral_LD = df_mistral_LD.shape[0]

print(f"Number of rows in the OpenAI file with 'LOOP_DETECTED': {num_rows_openai_LD}")
print(f"Number of rows in the Mistral file with 'LOOP_DETECTED': {num_rows_mistral_LD}")

# get the pourcentage of loop detected
pourcentage_openai_LD = (num_rows_openai_LD/num_rows)*100
pourcentage_mistral_LD = (num_rows_mistral_LD/num_rows)*100

print(f"Pourcentage of loop detected in the OpenAI file: {pourcentage_openai_LD}%")
print(f"Pourcentage of loop detected in the Mistral file: {pourcentage_mistral_LD}%")

Number of rows in the OpenAI file with 'LOOP_DETECTED': 88
Number of rows in the Mistral file with 'LOOP_DETECTED': 33
Pourcentage of loop detected in the OpenAI file: 39.63963963963964%
Pourcentage of loop detected in the Mistral file: 14.864864864864865%
