In [1]:
import os
import base64
from openai import OpenAI
from pymongo import MongoClient
import json

In [2]:


def extract_json_schema(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        lines = file.readlines()
        start = lines.index('```json\n') + 1
        end = lines.index('```\n', start)
        json_schema = ''.join(lines[start:end])
        return json.loads(json_schema)

def convert_to_mongodb_schema(file_paths):
    schemas = {}
    for file_path in file_paths:
        schema = extract_json_schema(file_path)
        schema_name = file_path.split('/')[-1].split('_')[0]
        schemas[schema_name] = schema
    return schemas

# Example usage:
file_paths = [
    'pdf_products/Atomiseurs/Atomiseurs_schema.txt',
    'pdf_products/Autres Comestibles/Autres Comestibles_schema.txt'
]

#mongodb_schemas = convert_to_mongodb_schema(file_paths)

# Print the schemas
# for name, schema in mongodb_schemas.items():
#     print(f"{name} Schema:")
#     print(json.dumps(schema, indent=4, ensure_ascii=False))

In [3]:
def resize_image(image_path):
    resized_image_path = os.path.join(FOLDER_PATH, f"resized_{os.path.basename(image_path)}")
    
    if os.path.exists(resized_image_path):
        return resized_image_path
    with Image.open(image_path) as img:
        width, height = img.size
        if width < height:
            if width > 768:
                ratio = 768 / width
                new_width = 768
                new_height = int(height * ratio)
                if new_height > 2000:
                    ratio = 2000 / height
                    new_width = int(width * ratio)
                    new_height = 2000
            elif height > 2000:
                ratio = 2000 / height
                new_width = int(width * ratio)
                new_height = 2000
            else:
                new_width, new_height = width, height
        else:
            if height > 768:
                ratio = 768 / height
                new_height = 768
                new_width = int(width * ratio)
                if new_width > 2000:
                    ratio = 2000 / width
                    new_height = int(height * ratio)
                    new_width = 2000
            elif width > 2000:
                ratio = 2000 / width
                new_width = 2000
                new_height = int(height * ratio)
            else:
                new_width, new_height = width, height
        
        img = img.resize((new_width, new_height), Image.LANCZOS)
        resized_image_path = os.path.join(FOLDER_PATH, f"resized_{os.path.basename(image_path)}")
        img.save(resized_image_path)
        return resized_image_path

In [4]:


# Set the API key and model name
MODEL = "gpt-4o-mini"
FOLDER_PATH = "pdf_products/Fleurs Séchées 1g"
API_KEY = os.environ.get("sqdc_api_key")
MONGODB_URI = os.environ.get("mongodb_uri")

client = OpenAI(api_key=API_KEY)

# MongoDB client setup
mongo_client = MongoClient(MONGODB_URI)
db = mongo_client['your_database_name']  # Replace with your database name
collection = db['your_collection_name']  # Replace with your collection name

# Function to encode image as a base64 string
def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode("utf-8")

# Function to insert schema into MongoDB
def insert_schema_into_mongo(schema, image_name):
    document = {
        "image_name": image_name,
        "schema": schema
    }
    collection.insert_one(document)

# Get all .png files in the specified folder
image_files = [f for f in os.listdir(FOLDER_PATH) if f.endswith('.png')]
schema_file = [f for f in os.listdir(FOLDER_PATH) if f.endswith('.txt')]
extracted_schema = convert_to_mongodb_schema()
# Process each image individually
for image_file in image_files:
    resized_image_path = resize_image(image_path, folder_path)
    image_path = os.path.join(FOLDER_PATH, image_file)
    base64_image = encode_image(image_path)
    
    # Create the message for the current image
    messages = [
        {"role": "system", "content": f"You are a database specialist assistant, that will responds only mongodb schema using this validation {extracted_schema}!"},
        
        {"role": "user", "content": [
            {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{base64_image}"}}
        ]}
    ]
    
    # Send the request to OpenAI API
    response = client.chat.completions.create(
        model=MODEL,
        messages=messages,
        temperature=0.0,
    )
    
    # Get the generated MongoDB schema for the current image
    schema = response.choices[0].message.content
    
    # Insert the schema into MongoDB
    insert_schema_into_mongo(schema, image_file)
    
    # Print the inserted schema for verification
    print(f"Inserted MongoDB Schema for {image_file}:\n")
    print(schema)
    print("\n" + "="*50 + "\n")


OpenAIError: The api_key client option must be set either by passing api_key to the client or by setting the OPENAI_API_KEY environment variable