# Working Good

In [3]:
import json
import logging
import os

def split_and_save_conversations(conversations_file, output_folder):
    try:
        with open(conversations_file, 'r', encoding='utf-8') as file:
            data = json.load(file)
            
            for conversation in data:
                title = conversation.get('title', 'Unknown_Title')
                title_with_underscores = title.replace(' ', '_')
                chapter_filename = f"{title_with_underscores}.json"
                chapter_filepath = os.path.join(output_folder, chapter_filename)
                
                logging.info(f"Saving data for conversation '{title}' to {chapter_filepath}")
                
                with open(chapter_filepath, 'w', encoding='utf-8') as chapter_file:
                    json.dump([conversation], chapter_file, indent=2)

    except FileNotFoundError:
        logging.error(f"File not found: {conversations_file}")
    except json.JSONDecodeError:
        logging.error(f"Error decoding JSON in file: {conversations_file}")
    except Exception as e:
        logging.error(f"An unexpected error occurred: {e}")

# Example usage
conversations_file_path = 'CHATDPT/conversations.json'
output_folder = 'CHATDPT/chapters_indexed'

# Ensure the output folder exists
os.makedirs(output_folder, exist_ok=True)

# Configure logging
logging.basicConfig(level=logging.INFO)

# Call the split and save function
split_and_save_conversations(conversations_file_path, output_folder)


In [1]:
%%writefile json2html.py
#!/home/jack/miniconda3/envs/cloned_base/bin/python
import json
from sys import argv
# Load the JSON data from the uploaded file
DIR="CHATDPT/chapters_indexed/"
filename=argv[1]
Filename = DIR+filename
print(Filename)
with open(Filename, 'r') as file:
    json_data = json.load(file)

# Initialize the result string
result_str = ""

# Define a function to get conversation messages similar to the JavaScript logic
def get_conversation_messages(conversation):
    messages = []
    current_node = conversation.get('current_node')
    while current_node:
        node = conversation['mapping'][current_node]
        message = node.get('message')
        if (message and message.get('content') and message['content'].get('content_type') == 'text' and
            len(message['content'].get('parts', [])) > 0 and len(message['content']['parts'][0]) > 0 and
                (message['author']['role'] != 'system' or message.get('metadata', {}).get('is_user_system_message'))):
            author = message['author']['role']
            if author == 'assistant':
                author = 'ChatGPT'
            elif author == 'system' and message['metadata'].get('is_user_system_message'):
                author = 'Custom user info'
            messages.append({'author': author, 'text': message['content']['parts'][0]})
        current_node = node.get('parent')
    return messages[::-1]  # Reverse the list to maintain chronological order

# Iterate over each conversation in the JSON data and process it
for conversation in json_data:
    # Get the conversation title and messages
    title = conversation.get('title', '')
    messages = get_conversation_messages(conversation)

    # Append the title and messages to the result string
    result_str += title + '\n'
    for message in messages:
        result_str += message['author'] + '\n' + message['text'] + '\n'
    result_str += '\n'  # Add a newline between conversations

# Return the processed result string
print(result_str.strip())
HTMLfile=DIR+filename[:-4]+"html"
print("HTMLfile: ",HTMLfile)
Input = open(HTMLfile,"w")
result_str = result_str.replace("/n","XXXXXXX\n")
result_str = result_str.replace("<","&lt;")
result_str = result_str.replace(">","&gt;")
for line in result_str.split("XXXXXXX"):
    line = line.replace("\n","<br />\n")
    Input.write(line)
Input.close()

Writing json2html.py


In [None]:
!chmod +x json2html.py


In [2]:
#!/home/jack/miniconda3/envs/cloned_base/bin/python
import json
import os
import glob
import subprocess
import logging

# Set up logging configuration
logging.basicConfig(filename='conversion_log.txt', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Define the directory path
DIR = "CHATDPT/chapters_indexed/"

# List all JSON files in the directory
jsonfiles = glob.glob(DIR + '*.json')

if not jsonfiles:
    logging.warning("No JSON files found in the specified directory.")
    exit()

# Iterate over each JSON file in the directory
for json_file in jsonfiles:
    # Load the JSON data from the current file
    with open(json_file, 'r') as file:
        json_data = json.load(file)

    # Initialize the result string
    result_str = ""

    # Rest of the script remains unchanged...
    # (Omitted for brevity, you can keep the rest of your script as is)

    # Process the current JSON file
    for conversation in json_data:
        title = conversation.get('title', '')
        messages = get_conversation_messages(conversation)
        result_str += title + '\n'
        for message in messages:
            result_str += message['author'] + '\n' + message['text'] + '\n'
        result_str += '\n'

    # Define the HTML file path based on the current JSON file
    HTMLfile = os.path.join(DIR, os.path.basename(json_file)[:-5] + ".html")

    print("HTMLfile: ", HTMLfile)

    # Write the HTML content to the file
    with open(HTMLfile, "w") as Input:
        result_str = result_str.replace("\n", "XXXXXXX\n")
        result_str = result_str.replace("<", "&lt;")
        result_str = result_str.replace(">", "&gt;")
        for line in result_str.split("XXXXXXX"):
            line = line.replace("\n", "<br />\n")
            Input.write(line)

    print(f"HTML conversion completed for {json_file}.")

# Logging for the entire process completion
logging.info("Conversion process completed for all JSON files.")


NameError: name 'get_conversation_messages' is not defined

In [None]:
import sqlite3
import uuid

# Connect to SQLite database
db_path = 'chat_database.db'
conn = sqlite3.connect(db_path)
cursor = conn.cursor()

def retrieve_file_content(filename):
    cursor.execute('SELECT content FROM files WHERE filename = ?', (filename,))
    result = cursor.fetchone()
    return result[0] if result else None

def search_and_print_fourth_file(search_terms):
    Data = ""

    # Prepare the SQL query for searching files based on the given terms
    query = '''
        SELECT filename
        FROM files
        WHERE {}
    '''.format(' AND '.join(['text_content LIKE ?' for _ in search_terms]))

    # Add % around search terms for a partial match with spaces
    search_terms = ['% {} %'.format(term) for term in search_terms]

    # Execute the query and retrieve matching files
    cursor.execute(query, search_terms)
    matching_files = cursor.fetchall()

    # Check if there are at least 2 matching files
    if matching_files and len(matching_files) >= 2:
        fourth_file = matching_files[1][0]  # Get the filename of the second matching file
        print(fourth_file)

        # Retrieve the content of the matching file
        content = retrieve_file_content(fourth_file)
        
        if content:
            # Decode the content and append it to the Data variable
            Data = Data + f'{content.decode("utf-8", errors="ignore")}'
            print(Data)
            return Data
        else:
            print(f'Error: Content not found for {fourth_file}')
    else:
        print('Error: No matching files found or less than two matching files.')

# Example: Search for files containing 'flask' and '5200'
search_terms = ['flask', '5200']
DATA = search_and_print_fourth_file(search_terms)

# Close the connection to the database
conn.close()

# If data is found, create a unique filename and write the content to an HTML file
if len(DATA) > 2:
    uid = str(uuid.uuid4())  # Generate a unique ID using uuid
    FileName = "_".join(search_terms) + "_" + uid + ".html"
    print(FileName)

    # Open the file for writing
    with open(FileName, "w") as IN:
        # Split the data into lines and write each line to the file with "<br />" appended
        ndata = DATA.split("<br />\n")
        for line in ndata:
            print(line)
            IN.write(line + "<br />\n")


## Explanation and Comments:

### Database Connection:
- Establishes a connection to the SQLite database.

### Function `retrieve_file_content`:
- Retrieves the content of a file from the database based on the filename.

### Function `search_and_print_fourth_file`:
- Searches for files based on given search terms using SQL LIKE.
- Retrieves the second matching file (index 1) and its content.
- Appends the decoded content to the `Data` variable.

### Example Search:
- Searches for files containing 'flask' and '5200'.

### Close Connection:
- Closes the connection to the database.

### Create Unique Filename and Write HTML File:
- If data is found, generates a unique filename with search terms and a UUID.
- Opens the file for writing and writes each line of the content with "<br />" appended.

This script is designed to search for files based on certain terms, retrieve the content of the second matching file, and then create an HTML file with a unique filename and the retrieved content. Feel free to ask if you have any specific questions or if there's anything else you'd like to understand!
