In [1]:
import openai
import json

import os
from dotenv import load_dotenv

# Load API Key from .env file
load_dotenv()
api_key = os.getenv('OPENAI_API_KEY')

# Configure OpenAI API client
openai.api_key = api_key

In [2]:
with open('testData.json', 'r') as f:
    data = json.load(f)


In [6]:

# Ensure the apiResponse directory exists
if not os.path.exists('apiResponse'):
    os.makedirs('apiResponse')

# Example of batching
batch_size = 3
for i in range(0, len(data), batch_size):
    batch = data[i:i + batch_size]
    data_str = json.dumps(batch)

    messages = [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": (
            "I need you to scrape data from this text. List only name, index number, hobbies and social organisations. "
            "Notice to list spouse's hobbies and social orgs separately. Do not list jobs, or war time occupations. "
            "Do not suggest to make an algorithm. If no social orgs detected respond: - "
            "Do not say anything but the asked information. "
            "Response in format: "
            "--"
            "IndexID: "
            "PersonName: "
            "PersonHobbies: "
            "PersonSocialOrgs: "
            "SpouseName:"
            "SpouseHobbies: "
            "SpouseSocialOrgs: "
            + data_str
        )}
    ]

    # API call
    try:
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=messages,
           temperature=0.8
        )
        
        # Splitting on '--' to separate individual responses
        responses = response['choices'][0]['message']['content'].split('--')[1:] 
        
        # Storing responses along with batch and individual indexes
        structured_responses = []
        for j, api_response in enumerate(responses):
            structured_responses.append({
                "batch_number": i//batch_size,
                "person_index": i + j,
                "api_response": api_response.strip()  # Removing leading/trailing whitespaces
            })

        output_file_path = 'apiResponse/all_responses.json'

# Save/Append the batch response to the file
        with open(output_file_path, 'a', encoding='utf-8') as file:
            for item in structured_responses:
                json.dump(item, file, ensure_ascii=False)
                file.write('\n')

    except Exception as e:
        print(f"Error in batch starting at index {i}: {str(e)}")

