In [14]:
import os
import json
import re
import time
import openai

def load_text_from_json(json_file):
    """Load text data from a JSON file."""
    with open(json_file, 'r', encoding='utf-8') as file:
        data = json.load(file)
    return data

def query_chatgpt(text, api_key):
    attempt = 0
    retries=3
    while attempt < retries:
        try:
            response = openai.ChatCompletion.create(
                model="gpt-4o",
                messages=[{"role": "user", "content": text}],
                max_tokens=4096
            )
            return response['choices'][0]['message']['content'].strip()
        except openai.error.APIError as e:
            print(f"APIError on attempt {attempt + 1}: {str(e)}")
            time.sleep(2**attempt)  # Exponential backoff
            attempt += 1

    raise Exception("OpenAI API failed after several retries")

def clean_and_validate_json(text):
    """Attempt to clean and validate the JSON string."""
    text = text.replace("\n", " ").replace("\\", " ")
    try:
        json_data = json.loads(text)
        return json_data
    except json.JSONDecodeError:
        print("Failed to decode JSON, returning original text.")
        return text  # Return original text if it fails

def run_for_reference(reference, text, api_key):
    prompt = f"""Extract the following information about the citation from the given text:
    {{
        "citation": "{text}",
        "title": "",
        "authors": "",
        "year": "",
        "journal": ""
    }}
    Citation: {text}
    References: {reference}
    Only return the dictionary containing the extracted information.
    Avoid wtitng any additional text that messes up the JSON format.
    Dont write ```json or Here is the extracted information for the citation:  kind a thing.
    """
    extracted_text = query_chatgpt(prompt, api_key)
    return clean_and_validate_json(extracted_text)

def process_claims_file(claims_file_path, references, api_key):
    combined_data = load_text_from_json(claims_file_path)
    citation_to_reference = {}
    for item in combined_data:
        citations = item.get('Citations', '')
        for citation in citations.split(';'):
            citation = citation.strip()
            if citation and citation not in citation_to_reference:
                full_reference = run_for_reference(references, citation, api_key)
                citation_to_reference[citation] = full_reference
    return citation_to_reference

def process_directory(directory):
    api_key = 'openai_api_key_here' # Replace with your OpenAI API key
    for folder_name in os.listdir(directory):
        folder_path = os.path.join(directory, folder_name)
        if os.path.isdir(folder_path):
            reference_file_path = os.path.join(folder_path, f"{folder_name}.json")
            if os.path.exists(reference_file_path):
                references = load_text_from_json(reference_file_path).get('references', '')
                claims_folder_path = os.path.join(folder_path, 'Claims')
                combined_data_file_path = os.path.join(claims_folder_path, 'combined_data.json')
                if os.path.exists(combined_data_file_path):
                    citation_to_reference = process_claims_file(combined_data_file_path, references, api_key)
                    output_file_path = os.path.join(claims_folder_path, 'citation_references.json')
                    with open(output_file_path, 'w', encoding='utf-8') as output_file:
                        json.dump(citation_to_reference, output_file, indent=4)
                    print(f"Saved citation references to {output_file_path}")
def main():
    root_directory = 'RootDirectory' # Replace with the path to your directory
    process_directory(root_directory)

if __name__ == "__main__":
    main()

Saved citation references to /Users/AliTarik/Documents/LastAttempt/2010_1814/Claims/citation_references.json
Saved citation references to /Users/AliTarik/Documents/LastAttempt/2010_1815/Claims/citation_references.json
Saved citation references to /Users/AliTarik/Documents/LastAttempt/2011_2266/Claims/citation_references.json
Saved citation references to /Users/AliTarik/Documents/LastAttempt/2010_1757/Claims/citation_references.json
Saved citation references to /Users/AliTarik/Documents/LastAttempt/2010_1759/Claims/citation_references.json
Saved citation references to /Users/AliTarik/Documents/LastAttempt/2011_2062/Claims/citation_references.json
Saved citation references to /Users/AliTarik/Documents/LastAttempt/2010_1732/Claims/citation_references.json
Saved citation references to /Users/AliTarik/Documents/LastAttempt/2010_1760/Claims/citation_references.json
Saved citation references to /Users/AliTarik/Documents/LastAttempt/2010_1758/Claims/citation_references.json
Saved citation refe