# Analyzing Responses of LLM to prompt

In [1]:
# first we need to import the basic libraries
# date
from datetime import datetime
now = datetime.now()
print(f"Date: {now}")
# python version
import sys
print(f"Python version: {sys.version}")
from pathlib import Path
import json
# import time for delay
import time
import requests
# print version
print(f"Requests version: {requests.__version__}")

from tqdm import tqdm

import pandas as pd
print(f"Pandas version: {pd.__version__}")

Date: 2025-02-27 21:05:04.367645
Python version: 3.12.7 (tags/v3.12.7:0b05ead, Oct  1 2024, 03:06:41) [MSC v.1941 64 bit (AMD64)]
Requests version: 2.32.3
Pandas version: 2.2.2


In [3]:
# let's see what folders are in our ../data/responses folder
data_folder = Path("../data/responses")
print(f"Data folder: {data_folder}")
print(f"Data folder exists: {data_folder.exists()}")
print(f"Data folder is dir: {data_folder.is_dir()}")
# let's see what subfolders are in our data folder
subfolders = [f for f in data_folder.iterdir() if f.is_dir()]
print(f"Subfolders:")
for subfolder in subfolders:
    print(subfolder)

Data folder: ..\data\responses
Data folder exists: True
Data folder is dir: True
Subfolders:
..\data\responses\2025_01_28_gemini_2_experimental
..\data\responses\2025_01_29_google_gemini-flash-1.5-8b_no_terms
..\data\responses\2025_01_29_google_gemini-flash-1.5-8b_with_terms
..\data\responses\2025_02_04_google_gemini-flash-1.5-8b_with_terms
..\data\responses\2025_02_26_google_gemini-flash-1.5_land_prompt_1
..\data\responses\2025_02_26_google_gemini-flash-1.5_land_prompt_2
..\data\responses\2025_02_26_openai_gpt-4o-2024-11-20_land_prompt
..\data\responses\2025_02_26_openai_gpt-4o-2024-11-20_land_prompt_2
..\data\responses\2025_02_27_google_gemini-2.0-flash-001_land_prompt
..\data\responses\2025_02_27_google_gemini-2.0-flash-001_land_prompt_2
..\data\responses\2025_02_27_google_gemini-flash-1.5_land_prompt
..\data\responses\2025_02_27_google_gemini-flash-1.5_maritime_prompt
..\data\responses\temp_responses_2025_02_26


## Consolidate openai responses

OpenAI prompts required us to break down files into smaller chunks, now we need to consolidate them back into a single file.


```python

In [4]:
# subfolders that contain openai in their name
openai_folders = [f for f in data_folder.iterdir() if f.is_dir() and "openai" in f.name]
print(f"OpenAI folders:")
for openai_folder in openai_folders:
    print(openai_folder)
    

OpenAI folders:
..\data\responses\2025_02_26_openai_gpt-4o-2024-11-20_land_prompt
..\data\responses\2025_02_26_openai_gpt-4o-2024-11-20_land_prompt_2


In [5]:
# we want to create a function that given a subfolder will return a dictionary 
# keys will be first three parts of file name when split by _
# values will be actual file names
def get_files(subfolder):
    files = {}
    for file in subfolder.iterdir():
        if file.is_file():
            parts = file.name.split("_")
            key = "_".join(parts[:3])
            if key in files:
                files[key].append(file)
            else:
                files[key] = [file]
    return files

# let's run this function on one of the openai folders
openai_files = get_files(openai_folders[0])
print(f"OpenAI files:")
for key, value in openai_files.items():
    print(f"{key}: {value}")

OpenAI files:
AustA_KaspG_948026: [WindowsPath('../data/responses/2025_02_26_openai_gpt-4o-2024-11-20_land_prompt/AustA_KaspG_948026_0.txt')]
AustA_Puisk_1047362: [WindowsPath('../data/responses/2025_02_26_openai_gpt-4o-2024-11-20_land_prompt/AustA_Puisk_1047362_0.txt'), WindowsPath('../data/responses/2025_02_26_openai_gpt-4o-2024-11-20_land_prompt/AustA_Puisk_1047362_1.txt'), WindowsPath('../data/responses/2025_02_26_openai_gpt-4o-2024-11-20_land_prompt/AustA_Puisk_1047362_2.txt')]
FimbK_KadNa_1049450: [WindowsPath('../data/responses/2025_02_26_openai_gpt-4o-2024-11-20_land_prompt/FimbK_KadNa_1049450_0.txt'), WindowsPath('../data/responses/2025_02_26_openai_gpt-4o-2024-11-20_land_prompt/FimbK_KadNa_1049450_1.txt')]
FimbK_TiltP_1049479: [WindowsPath('../data/responses/2025_02_26_openai_gpt-4o-2024-11-20_land_prompt/FimbK_TiltP_1049479_0.txt'), WindowsPath('../data/responses/2025_02_26_openai_gpt-4o-2024-11-20_land_prompt/FimbK_TiltP_1049479_1.txt')]
GulbA_Gaidi_1350352: [WindowsPath('.

In [11]:
# let's run get_files on all openai folders
# the key will be folder name and values will be dictionaries returned by get_files
openai_files = {}
for openai_folder in openai_folders:
    openai_files[openai_folder.name] = get_files(openai_folder)

# how many files are in each folder
for key, value in openai_files.items():
    print(f"{key}: {len(value)}")

2025_02_26_openai_gpt-4o-2024-11-20_land_prompt: 20
2025_02_26_openai_gpt-4o-2024-11-20_land_prompt_2: 20


In [15]:
# now let's write a function that given a file name and file list and new_subfolder will write consolidated file to new_subfolder
# logic is as follows:
# we want to read all content of files in file list up to empty line
# we want to write all this content to new file in new_subfolder
# then we want to separately read all lines starting with line that starts with "System prompt:"
# we want to write this content only once to new file in new_subfolder
# we want use utf-8 encoding
def consolidate_files(file_name, file_list, new_subfolder):
    # create new subfolder if it does not exist
    new_subfolder.mkdir(parents=True, exist_ok=True)
    with open(new_subfolder / f"{file_name}.txt", "w", encoding="utf-8") as new_file:
        system_prompts = []
        for file in file_list:
            with open(file, "r", encoding="utf-8") as old_file:
                text = old_file.read()
                # let's split on "System prompt:"
                parts = text.split("System prompt:")
                # let's write first part
                new_file.write(parts[0].strip()+"\n")
                # append second part to system_prompts
                system_prompts.append(parts[1])
        # let's write system prompts only once
        # first check if system prompts are identical
        if len(set(system_prompts)) == 1:
            new_file.write("\nSystem prompt:" + system_prompts[0])
        else:
            for system_prompt in system_prompts:
                new_file.write("System prompt:\n" + system_prompt)

# test it on second key of openai_files
# we will create a new subfolder in the data respones folder
# new_subfolder = data_folder / "consolidated"
# consolidate_files(list(openai_files.keys())[1], openai_files[list(openai_files.keys())[1]], new_subfolder)



In [16]:
# now let's write a function that will consolidate all files in all openai folders
# we will use consolidate_files function
# new subfolder will be in data folder
# it will be called consolidated_ + key of openai_files
def consolidate_all_files(openai_files, data_folder):
    for key, value in openai_files.items():
        new_subfolder = data_folder / ("consolidated_" + key)
        # value is a dictionary that contains keys that are first three parts of file name and values that are lists of files
        for key2, value2 in value.items():
            consolidate_files(key2, value2, new_subfolder)

# let's run this function
consolidate_all_files(openai_files, data_folder)