In [None]:
from google.colab import drive
drive.mount('/content/drive')

import json
import glob
import os
import pandas as pd

Mounted at /content/drive


In [None]:
folder = "/content/drive/MyDrive/NORTHEASTERN/Fall 2025/NLP/Project/responses"
data = []

for filename in os.listdir(folder):
    if not filename.endswith(".jsonl"):
        continue

    parts = filename.replace(".jsonl", "").split("_")
    model_name = parts[1] if len(parts) >= 3 else "unknown"

    temp_id = parts[-1]

    if temp_id == "95":
        temperature = 0.95
    elif temp_id == "150":
        temperature = 1.5
    else:
        temperature = None

    path = os.path.join(folder, filename)

    with open(path, "r", encoding="utf-8") as f:
        for line_number, line in enumerate(f, start=1):
            line = line.strip()
            if not line:
                continue  # skip empty lines

            try:
                record = json.loads(line)
                record["model"] = model_name
                record["temperature"] = temperature
                data.append(record)
            except json.JSONDecodeError as e:
                print(f"⚠️ JSON error in {filename}, line {line_number}: {e}")
                print("   Skipping this line.")
                continue

df = pd.DataFrame(data)

In [None]:
# Preprocessing

df['response_filled'] = df['response'].fillna("").str.strip()
df['is_blank'] = df['response_filled'].eq("")
print("=== Blank responses by model ===")
print(df.groupby("model")['is_blank'].sum(), "\n")

# Number of blank responses by prompt
print("=== Blank responses by prompt ===")
print(df.groupby("prompt")['is_blank'].sum(), "\n")

# Remove all blank responses
df = df[~df['is_blank']].copy()

=== Blank responses by model ===
model
gemini    12
grok      21
qwen       5
Name: is_blank, dtype: int64 

=== Blank responses by prompt ===
prompt
AI          4
animal      3
art         6
future      4
job         6
problem     2
rules       3
story      10
Name: is_blank, dtype: int64 



In [None]:
df.to_csv('responses.csv', index=False)