In [3]:
import glob
import os 


In [4]:
def parse_inputs_from_file(file_path: str) -> dict:
    """
    Reads a file written with key-value pairs in the format:
    
      prompt: <prompt_value>
      continuation: <continuation_value>
      newline: <newline_value>
      --------------- OUTPUTS BELOW ------------
      <output sentences...>
    
    The function extracts the portion before the separator line and reconstructs
    the original dictionary. It carefully preserves all spaces and newlines exactly
    as they were in the original values. The extra newline added by the file.write
    call is removed.
    """
    # Read the entire file lines.
    with open(file_path, "r", encoding="utf-8") as f:
        lines = f.readlines()
    
    # Find the dictionary portion: all lines up to the separator.
    separator = "--------------- OUTPUTS BELOW ------------"
    dict_lines = []
    for line in lines:
        # We use rstrip to remove the newline for the comparison.
        if line.rstrip("\n") == separator:
            break
        dict_lines.append(line)
    
    # We know the keys we expect.
    expected_keys = {"prompt", "continuation", "newline"}
    result = {}
    current_key = None
    current_value_lines = []
    
    for line in dict_lines:
        # Check if the line starts with one of the keys followed by ": "
        found_key = None
        for key in expected_keys:
            prefix = key + ": "
            if line.startswith(prefix):
                found_key = key
                break
        
        if found_key is not None:
            # If there was a previous key being processed, save its value.
            if current_key is not None:
                value = ''.join(current_value_lines)
                # Remove the one newline appended by file.write if present.
                if value.endswith("\n"):
                    value = value[:-1]
                result[current_key] = value
            # Start a new key block.
            current_key = found_key
            # Initialize with the remainder of this line (preserving all characters).
            current_value_lines = [line[len(found_key) + 2:]]  # +2 for ": "
        else:
            # Continuation of the current value (preserve exactly, including newlines).
            if current_key is not None:
                current_value_lines.append(line)
            else:
                # This case should not occur if the file is well-formed.
                pass

    # Commit the last key-value pair.
    if current_key is not None:
        value = ''.join(current_value_lines)
        if value.endswith("\n"):
            value = value[:-1]
        result[current_key] = value

    return result


In [7]:
def count_fractal(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        content = file.read()
    
    # Split at the separator
    parts = content.split("--------------- OUTPUTS BELOW ------------", 1)

    outputs = parts[1]

    return outputs.lower().count("fractal")

# Base model

base_fractal_files

In [5]:
base_fractal_files = glob.glob("base_outputs*/fractal_outputs/*.txt")

In [8]:
processed_base_files = [
    (count_fractal(file_path), parse_inputs_from_file(file_path), file_path) for file_path in base_fractal_files
]

In [10]:
processed_base_files.sort(reverse=True, key=lambda x: x[0])

In [28]:
for f in processed_base_files[:20]:
    count, curdict, file = f 

    print(file)
    print(count)
    for k, v in curdict.items():
        print(k, ":", (v,))
    
    print("------------------------------------------------\n\n\n\n")

base_outputs_2025_03_18_12_01_35\fractal_outputs\output_count_59_index_79.txt
59
prompt : ('<prompt>Explain fractals in 150 words</prompt>\n\n <response>',)
continuation : ('\n\n <response>',)
newline : ('\n\n <response>',)
------------------------------------------------




base_outputs_2025_03_18_12_01_35\fractal_outputs\output_count_41_index_189.txt
41
prompt : ('\nHuman: Explain fractals in 150 words\n\nAI: ',)
continuation : ('\n\nAI: ',)
newline : ('\n\nAI: ',)
------------------------------------------------




base_outputs_2025_03_18_12_01_35\fractal_outputs\output_count_38_index_175.txt
38
prompt : ('\n<prompt>Explain fractals in 150 words</prompt>\n\n <response>',)
continuation : ('\n\n <response>',)
newline : ('\n\n <response>',)
------------------------------------------------




base_outputs_2025_03_18_12_01_35\fractal_outputs\output_count_36_index_31.txt
36
prompt : ('<prompt>Explain fractals in 150 words</prompt>\n\n <response>',)
continuation : ('\n\n <response>',)
n

# Reasoning 

In [18]:
reasoning_fractal_files = glob.glob("reasoning_outputs*/fractal_outputs/*.txt")

In [23]:
processed_reas_files = [
    (count_fractal(file_path), parse_inputs_from_file(file_path), file_path) for file_path in reasoning_fractal_files
]

In [24]:
processed_reas_files.sort(reverse=True, key=lambda x: x[0])

In [27]:
for f in processed_reas_files[:20]:
    count, curdict, file = f 

    print(file)
    print(count)
    for k, v in curdict.items():
        print(k, ":", (v,))
    
    print("------------------------------------------------\n\n\n\n")

reasoning_outputs_2025_03_18_09_27_21\fractal_outputs\output_count_10_index_87.txt
10
prompt : ('<｜User｜>Describe the concept of fractals in 150 words or less<｜Assistant｜><think>\n',)
continuation : ('<｜Assistant｜><think>\n',)
newline : ('<｜Assistant｜><think>\n',)
------------------------------------------------




reasoning_outputs_2025_03_18_09_27_21\fractal_outputs\output_count_8_index_20.txt
8
prompt : ('<｜User｜>Describe the concept of fractals in 150 words or less<｜Assistant｜>',)
continuation : ('<｜Assistant｜>',)
newline : ('<｜Assistant｜>',)
------------------------------------------------




reasoning_outputs_2025_03_18_09_27_21\fractal_outputs\output_count_8_index_86.txt
8
prompt : ('<｜User｜>Describe the concept of fractals in 150 words or less<｜Assistant｜><think>',)
continuation : ('<｜Assistant｜><think>\n',)
newline : ('<｜Assistant｜><think>',)
------------------------------------------------




reasoning_outputs_2025_03_18_23_05_48\fractal_outputs\output_count_4_index_30.txt