In [None]:
import json

def open_file(filepath):
    with open(filepath, "r", encoding="utf-8", errors="ignore") as infile:
        return infile.read()
    
def save_file_to_json(filepath, content):
    with open(filepath, 'w', encoding='utf-8') as outfile:
        json.dump(content, outfile)

def save_file(filepath, content):
    with open(filepath, 'w', encoding='utf-8') as outfile:
        outfile.write(content)

In [None]:
import re

def clean_text(input_text):
    """
    Cleans the input text by removing or replacing special characters to make it JSON-safe.

    :param input_text: The raw input text to clean.
    :return: A cleaned version of the text.
    """
    # Replace problematic characters
    # Replace unusual unicode characters with a placeholder (like empty space or appropriate character)
    cleaned_text = input_text.encode('ascii', 'ignore').decode('ascii')  # Remove non-ASCII characters
    cleaned_text = re.sub(r'[\[\]{}]', '', cleaned_text)  # Remove brackets
    cleaned_text = re.sub(r'\s+', ' ', cleaned_text)  # Replace multiple whitespace with a single space

    return cleaned_text.strip()

In [None]:
from openai import OpenAI, RateLimitError
import backoff
from halo import Halo
import time

# setup the OpenAI Client
client = OpenAI()

@backoff.on_exception(backoff.expo, RateLimitError)
def chat(**kwargs):
    try:
        #spinner = Halo(text="Packing SPR...", spinner="dots")
        #spinner.start()
        # print(kwargs)

        start_time = time.time()  # Record the start time
        response = client.beta.chat.completions.parse(**kwargs)
        end_time = time.time()  # Record the end time

        elapsed_time = end_time - start_time  # Calculate the elapsed time in seconds
        minutes, seconds = divmod(
            elapsed_time, 60
        )  # Convert seconds to minutes and seconds
        formatted_time = (
            f"{int(minutes)} minutes and {seconds:.2f} seconds"  # Format the time
        )

        text = response.choices[0].message.parsed.text
        model = response.model
        tokens = response.usage

        #spinner.stop()

        return text, model, tokens, formatted_time
    except Exception as yikes:
        print(f'\n\nError communicating with OpenAI: "{yikes}"')
        exit(0)

In [None]:
import os
from pydantic import BaseModel

def spr_pack(input_dir: str, output_dir: str):

    class ModelResponse(BaseModel):
        text: str
        model: str

    # Create conversation
    conversation = list()
    conversation.append(
        {
            "role": "system",
            "content": open_file("../prompts/spr_pack.xml"),
        }
    )

    for filename in os.listdir(input_dir):
        print(filename)
        if filename.endswith(".txt"):
            with open(os.path.join(input_dir, filename), "r", encoding="UTF-8") as f:
                text = f.read()
                #print(text)
                conversation.append(
                    {
                        "role": "user",
                        "content": [{"type": "text", "text": clean_text(text)}],
                    }
                )

                #print(conversation)

                #save_file_to_json("test.json", conversation)

                text, model, tokens, formatted_time = chat(
                    model="gpt-4o",
                    messages=conversation,
                    max_completion_tokens=2000,
                    temperature=1,
                    response_format=ModelResponse,
                )

                save_file(f"{output_dir}/{filename}.md", text)

In [None]:
spr_pack("../docs/northside/split/", "../docs/northside/split/spr/")