In [10]:
import os
import base64
from openai import OpenAI
from pymongo import MongoClient

# Set the API key and model name
MODEL = "gpt-4o-mini"
FOLDER_PATH = "pdf_products/Fleurs Séchées 1g"
API_KEY = os.environ.get("sqdc_api_key")
MONGODB_URI = os.environ.get("mongodb_uri")

client = OpenAI(api_key=API_KEY)

# MongoDB client setup
mongo_client = MongoClient(MONGODB_URI)
db = mongo_client['your_database_name']  # Replace with your database name
collection = db['your_collection_name']  # Replace with your collection name

# Function to encode image as a base64 string
def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode("utf-8")

# Function to insert schema into MongoDB
def insert_schema_into_mongo(schema, image_name):
    document = {
        "image_name": image_name,
        "schema": schema
    }
    collection.insert_one(document)

# Get all .png files in the specified folder
image_files = [f for f in os.listdir(FOLDER_PATH) if f.endswith('.png')]

# Process each image individually
for image_file in image_files:
    image_path = os.path.join(FOLDER_PATH, image_file)
    base64_image = encode_image(image_path)
    
    # Create the message for the current image
    messages = [
        {"role": "system", "content": "You are a database specialist assistant, that will responds only mongodb schema!"},
        {"role": "user", "content": [
            {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{base64_image}"}}
        ]}
    ]
    
    # Send the request to OpenAI API
    response = client.chat.completions.create(
        model=MODEL,
        messages=messages,
        temperature=0.0,
    )
    
    # Get the generated MongoDB schema for the current image
    schema = response.choices[0].message.content
    
    # Insert the schema into MongoDB
    insert_schema_into_mongo(schema, image_file)
    
    # Print the inserted schema for verification
    print(f"Inserted MongoDB Schema for {image_file}:\n")
    print(schema)
    print("\n" + "="*50 + "\n")


```json
{
  "products": [
    {
      "name": "L'Bon voisin",
      "brand": "Fleurs de Lise",
      "species": "Sativa",
      "variety": "Alien Cookies",
      "genetics": "Girl Scout Cookies x Alien Dog",
      "format": "1 g",
      "method_of_curing": "Humid curing, hand-trimmed, followed by dry curing",
      "effects": [
        "Euphoria",
        "Relaxation",
        "Joy"
      ],
      "effect_onset_time": "Approximately 2 minutes after inhalation",
      "cultivation_method": "Hydroponic indoor cultivation",
      "location": "Sainte-Agathe-des-Monts, Québec",
    },
    {
      "name": "Le P'ti Phéno",
      "brand": "Pecko",
      "species": "1:1 - Rotatif",
      "cultivation_method": "Micro-production in Quebec",
      "format": "1 g",
      "effects": [
        "Calm",
        "Relaxing"
      ],
      "effect_onset_time": "90 seconds to 5 minutes",
      "additional_info": [
        "Cultivated indoors",
        "Top Cola only",
        "2 weeks of cold curing",
    

In [12]:
import json
def save_schema_to_json(schema, collection_name):
    filename = f"{collection_name}_fields.json"
    with open(filename, 'w') as f:
        json.dump(schema, f, indent=2, ensure_ascii=False)
    print(f"Schema for collection '{collection_name}' saved to '{filename}'")

In [14]:
import os
import base64
from openai import OpenAI

# Set the API key and model name
#collection = "Fleurs Séchées  1g"
collection = "Hashish"
MODEL = "gpt-4o-mini"
FOLDER_PATH = f"pdf_products/{collection}"
API_KEY = os.environ.get("sqdc_api_key")

client = OpenAI(api_key=API_KEY)

# Function to encode image as a base64 string
def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode("utf-8")

# Get all .png files in the specified folder
image_files = [f for f in os.listdir(FOLDER_PATH) if f.lower().endswith('.png')]

# Encode images and create the messages list
messages = [
    {"role": "system", "content": "You are a database specialist assistant, that will responds only mongodb schema!"}
    
]
messages.append({"role": "user", "content": [{"type": "text", "text": "from all images, read it and make a mongodb shema with most relevant informations"}]})

for image_file in image_files:
    image_path = os.path.join(FOLDER_PATH, image_file)
    base64_image = encode_image(image_path)
    messages.append(
        {"role": "user", "content": [
            {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{base64_image}"}}
        ]}
    )

# Send the request to OpenAI API
response = client.chat.completions.create(
    model=MODEL,
    messages=messages,
    temperature=0.0,
)

# Print the generated MongoDB schema
print(response.choices[0].message.content)
save_schema_to_json(response.choices[0].message.content,collection)


BadRequestError: Error code: 400 - {'error': {'message': "You uploaded an unsupported image. Please make sure your image is below 20 MB in size and is of one the following formats: ['png', 'jpeg', 'gif', 'webp'].", 'type': 'invalid_request_error', 'param': None, 'code': 'sanitizer_server_error'}}

In [7]:
import os
import base64
from PIL import Image
from openai import OpenAI

# Set the API key and model name
collection = "Capsules"
MODEL = "gpt-4o-mini"
FOLDER_PATH = f"pdf_products/{collection}"
API_KEY = os.environ.get("sqdc_api_key")

client = OpenAI(api_key=API_KEY)

# Function to encode image as a base64 string
def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode("utf-8")

# Function to resize image proportionally
def resize_image(image_path):
    resized_image_path = os.path.join(FOLDER_PATH, f"resized_{os.path.basename(image_path)}")
    
    if os.path.exists(resized_image_path):
        return resized_image_path
    with Image.open(image_path) as img:
        width, height = img.size
        if width < height:
            if width > 768:
                ratio = 768 / width
                new_width = 768
                new_height = int(height * ratio)
                if new_height > 2000:
                    ratio = 2000 / height
                    new_width = int(width * ratio)
                    new_height = 2000
            elif height > 2000:
                ratio = 2000 / height
                new_width = int(width * ratio)
                new_height = 2000
            else:
                new_width, new_height = width, height
        else:
            if height > 768:
                ratio = 768 / height
                new_height = 768
                new_width = int(width * ratio)
                if new_width > 2000:
                    ratio = 2000 / width
                    new_height = int(height * ratio)
                    new_width = 2000
            elif width > 2000:
                ratio = 2000 / width
                new_width = 2000
                new_height = int(height * ratio)
            else:
                new_width, new_height = width, height
        
        img = img.resize((new_width, new_height), Image.LANCZOS)
        resized_image_path = os.path.join(FOLDER_PATH, f"resized_{os.path.basename(image_path)}")
        img.save(resized_image_path)
        return resized_image_path
        

# Get all .png files in the specified folder
image_files = [f for f in os.listdir(FOLDER_PATH) if f.lower().endswith('.png')]

# Encode images and create the messages list
messages = [
    {"role": "system", "content": "You are a frensh database specialist assistant, that will responds only mongodb schema translated in frensh!"}
]
messages.append({"role": "user", "content": [{"type": "text", "text": "from all images, read it and make a mongodb shema with all possible relevant informations"}]})

for image_file in image_files:
    image_path = os.path.join(FOLDER_PATH, image_file)
    resized_image_path = resize_image(image_path)
    base64_image = encode_image(resized_image_path)
    messages.append(
        {"role": "user", "content": [
            {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{base64_image}"}}
        ]}
    )

# Send the request to OpenAI API
response = client.chat.completions.create(
    model=MODEL,
    messages=messages,
    temperature=0.0,
)

# Print the generated MongoDB schema
print(response.choices[0].message.content)

# Function to save schema to JSON
def save_schema_to_text(schema, collection_name):
    output_path = os.path.join(FOLDER_PATH, f"{collection_name}_schema.txt")
    with open(output_path, "w") as text_file:
        text_file.write(schema)

save_schema_to_text(response.choices[0].message.content, collection)


Voici un schéma MongoDB en français basé sur les informations des images fournies :

```json
{
  "produits": [
    {
      "nom": "CBD 50 Capsules",
      "format": "30 Capsules",
      "dosage": "50mg de CBD/unité",
      "effets_potentiels": ["Calme", "Relaxant"],
      "temps_appartion_effets": "Jusqu'à 60 minutes après ingestion et parfois davantage"
    },
    {
      "nom": "Gélules équilibrées",
      "format": "30 x 2-4 MG 1:1 THC:CBD",
      "effets_potentiels": ["Créativité", "Appétit", "Calme"],
      "temps_appartion_effets": "Jusqu'à 60 minutes après ingestion et parfois davantage"
    },
    {
      "nom": "Gélules de Pink Kush",
      "format": "30 Softgels",
      "effets_potentiels": ["Créativité", "Appétit", "Calme"],
      "temps_appartion_effets": "De 90 secondes à 5 minutes après inhalation"
    },
    {
      "nom": "Capsules à la rosine",
      "format": "30 capsules",
      "effets_potentiels": ["Joie", "Calme", "Relaxation"],
      "temps_appartion_effets": "Ju

In [10]:
import os
import base64
from PIL import Image
from openai import OpenAI

# Set the API key and model name
collection = "Hashish"
MODEL = "gpt-4o-mini"
FOLDER_PATH = f"pdf_products/{collection}"
API_KEY = os.environ.get("sqdc_api_key")

client = OpenAI(api_key=API_KEY)

# Function to encode image as a base64 string
def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode("utf-8")

# Function to resize image proportionally
def resize_image(image_path):
    resized_image_path = os.path.join(FOLDER_PATH, f"resized_{os.path.basename(image_path)}")

    if os.path.exists(resized_image_path):
        return resized_image_path
    
    with Image.open(image_path) as img:
        width, height = img.size
        if width <= 2000 and height <= 2000 and (width <= 768 or height <= 768):
            return image_path  # No need to resize
        
        if width < height:
            if width > 768:
                ratio = 768 / width
                new_width = 768
                new_height = int(height * ratio)
                if new_height > 2000:
                    ratio = 2000 / height
                    new_width = int(width * ratio)
                    new_height = 2000
            else:
                new_width, new_height = width, height
        else:
            if height > 768:
                ratio = 768 / height
                new_height = 768
                new_width = int(width * ratio)
                if new_width > 2000:
                    ratio = 2000 / width
                    new_height = int(height * ratio)
                    new_width = 2000
            else:
                new_width, new_height = width, height
        
        img = img.resize((new_width, new_height), Image.LANCZOS)
        img.save(resized_image_path)
        return resized_image_path

# Get all .png files in the specified folder
image_files = [f for f in os.listdir(FOLDER_PATH) if f.lower().endswith('.png') and not f.startswith('resized_')]

# Encode images and create the messages list
messages = [
    {"role": "system", "content": "You are a database specialist assistant."}
]
messages.append({"role": "user", "content": "Analyze all the following images and extract as much detailed information as possible. Use this information to create a validation MongoDB schema for future Insert, even if the schema is not perfectly structured. Prioritize the richness of the data extracted from the images."})

for image_file in image_files:
    image_path = os.path.join(FOLDER_PATH, image_file)
    resized_image_path = resize_image(image_path)
    base64_image = encode_image(resized_image_path)
    messages.append(
        {"role": "user", "content": [
            {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{base64_image}"}}
        ]}
    )

# Send the request to OpenAI API
response = client.chat.completions.create(
    model=MODEL,
    messages=messages,
    temperature=0.0,
)

# Print the generated MongoDB schema
print(response.choices[0].message.content)

# Function to save schema to a text file
def save_schema_to_text(schema, collection_name):
    output_path = os.path.join(FOLDER_PATH, f"{collection_name}_schema.txt")
    with open(output_path, "w") as text_file:
        text_file.write(schema)

save_schema_to_text(response.choices[0].message.content, collection)


Based on the detailed information extracted from the images, here is a comprehensive MongoDB schema for the various types of hashish products. Each schema is designed to capture the relevant attributes of the products, including their characteristics, effects, and production methods.

### MongoDB Schema

```json
{
  "products": [
    {
      "name": "Hash #1",
      "type": "Hashish",
      "format": "1g",
      "variety": "Mélange",
      "colors": {
        "internal": "Brun à noir",
        "external": "Brun foncé à noir"
      },
      "texture": "Malléable à résineux",
      "effects": ["Calme", "Relaxation", "Euphorie"],
      "appearance_time": "90 secondes à 5 minutes après inhalation",
      "production": {
        "method": "Tamisage à sec",
        "cultivation": "Québec"
      },
      "brand": "QcGold"
    },
    {
      "name": "Habibi Kush",
      "type": "Hashish",
      "format": "3.5g",
      "variety": "OG Kush",
      "colors": {
        "internal": "Brun foncé à no

In [9]:
import os
import base64
from PIL import Image
from openai import OpenAI

# Set the API key and model name
collection = "Hashish"
MODEL = "gpt-4o-mini"
FOLDER_PATH = f"pdf_products/{collection}"
API_KEY = os.environ.get("sqdc_api_key")

client = OpenAI(api_key=API_KEY)

# Function to encode image as a base64 string
def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode("utf-8")

# Function to resize image proportionally
def resize_image(image_path):
    resized_image_path = os.path.join(FOLDER_PATH, f"resized_{os.path.basename(image_path)}")

    if os.path.exists(resized_image_path):
        return resized_image_path
    
    with Image.open(image_path) as img:
        width, height = img.size
        if width <= 2000 and height <= 2000 and (width <= 768 or height <= 768):
            return image_path  # No need to resize
        
        if width < height:
            if width > 768:
                ratio = 768 / width
                new_width = 768
                new_height = int(height * ratio)
                if new_height > 2000:
                    ratio = 2000 / height
                    new_width = int(width * ratio)
                    new_height = 2000
            else:
                new_width, new_height = width, height
        else:
            if height > 768:
                ratio = 768 / height
                new_height = 768
                new_width = int(width * ratio)
                if new_width > 2000:
                    ratio = 2000 / width
                    new_height = int(height * ratio)
                    new_width = 2000
            else:
                new_width, new_height = width, height
        
        img = img.resize((new_width, new_height), Image.LANCZOS)
        img.save(resized_image_path)
        return resized_image_path

# Get all .png files in the specified folder
image_files = [f for f in os.listdir(FOLDER_PATH) if f.lower().endswith('.png') and not f.startswith('resized_')]

# Encode images and create the messages list
messages = [
    {"role": "system", "content": "Vous êtes un assistant spécialiste des bases de données."}
]
messages.append({"role": "user", "content": "Analysez toutes les images suivantes et extrayez le plus d'informations détaillées possible de chacune. Utilisez ces informations pour créer un schéma MongoDB complet, même si le schéma n'est pas parfaitement structuré. Priorisez la richesse des données extraites des images."})

for image_file in image_files:
    image_path = os.path.join(FOLDER_PATH, image_file)
    resized_image_path = resize_image(image_path)
    base64_image = encode_image(resized_image_path)
    messages.append(
        {"role": "user", "content": [
            {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{base64_image}"}}
        ]}
    )

# Send the request to OpenAI API
response = client.chat.completions.create(
    model=MODEL,
    messages=messages,
    temperature=0.0,
)

# Print the generated MongoDB schema
print(response.choices[0].message.content)

# Function to save schema to a text file
def save_schema_to_text(schema, collection_name):
    output_path = os.path.join(FOLDER_PATH, f"{collection_name}_schema.txt")
    with open(output_path, "w") as text_file:
        text_file.write(schema)

save_schema_to_text(response.choices[0].message.content, collection)


Based on the information extracted from the images, here is a proposed MongoDB schema for the hashish products:

```json
{
  "hashish_products": [
    {
      "name": "Hash #1",
      "brand": "QcGold",
      "format": "1g",
      "species": "Blend",
      "color": {
        "internal": "Dark brown",
        "external": "Black"
      },
      "texture": "Malleable",
      "effects": ["Calm", "Relaxation", "Euphoria"],
      "appearance_time": "90 seconds to 5 minutes",
      "cultivation": {
        "method": "Hand-rolled",
        "location": "Quebec"
      }
    },
    {
      "name": "Habibi Kush",
      "brand": "Nordique Royale",
      "format": "3.5g",
      "species": "OG Kush",
      "color": {
        "internal": "Light brown to black",
        "external": "Dark brown"
      },
      "texture": "Malleable",
      "effects": ["Euphoria", "Energy", "Appetite"],
      "appearance_time": "90 seconds to 5 minutes",
      "cultivation": {
        "method": "Dry sifting",
        "lo

In [13]:
import os
import base64
from PIL import Image
from openai import OpenAI

# Set the API key and model name
collection = "Hashish"
MODEL = "gpt-4o-mini"
FOLDER_PATH = f"pdf_products/{collection}"
API_KEY = os.environ.get("sqdc_api_key")

client = OpenAI(api_key=API_KEY)

# Function to encode image as a base64 string
def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode("utf-8")

# Function to resize image proportionally
def resize_image(image_path):
    resized_image_path = os.path.join(FOLDER_PATH, f"resized_{os.path.basename(image_path)}")

    if os.path.exists(resized_image_path):
        return resized_image_path
    
    with Image.open(image_path) as img:
        width, height = img.size
        if width <= 2000 and height <= 2000 and (width <= 768 or height <= 768):
            return image_path  # No need to resize
        
        if width < height:
            if width > 768:
                ratio = 768 / width
                new_width = 768
                new_height = int(height * ratio)
                if new_height > 2000:
                    ratio = 2000 / height
                    new_width = int(width * ratio)
                    new_height = 2000
            else:
                new_width, new_height = width, height
        else:
            if height > 768:
                ratio = 768 / height
                new_height = 768
                new_width = int(width * ratio)
                if new_width > 2000:
                    ratio = 2000 / width
                    new_height = int(height * ratio)
                    new_width = 2000
            else:
                new_width, new_height = width, height
        
        img = img.resize((new_width, new_height), Image.LANCZOS)
        img.save(resized_image_path)
        return resized_image_path

# Get all .png files in the specified folder
image_files = [f for f in os.listdir(FOLDER_PATH) if f.lower().endswith('.png') and not f.startswith('resized_')]

# Encode images and create the messages list
messages = [
    {"role": "system", "content": "Vous êtes un assistant spécialiste des bases de données."}
]
messages.append({"role": "user", "content": "Analysez les images suivantes et extrayez le plus d'informations détaillées possible. Utilisez ces informations pour créer un schéma de validation MongoDB, même si le schéma n'est pas parfaitement structuré. Priorisez la richesse des données.N'utilise que des types 'string'"})

# Adjust the number of images to fit within the token limit
max_images = 6  # Adjust based on your token calculations
image_count = 0

for image_file in image_files:
    if image_count >= max_images:
        break
    image_path = os.path.join(FOLDER_PATH, image_file)
    resized_image_path = resize_image(image_path)
    base64_image = encode_image(resized_image_path)
    messages.append(
        {"role": "user", "content": [
            {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{base64_image}"}}
        ]}
    )
    image_count += 1

# Send the request to OpenAI API
response = client.chat.completions.create(
    model=MODEL,
    messages=messages,
    temperature=0.0,
)

# Print the generated MongoDB schema
print(response.choices[0].message.content)

# Function to save schema to a text file
def save_schema_to_text(schema, collection_name):
    output_path = os.path.join(FOLDER_PATH, f"{collection_name}_schema.txt")
    with open(output_path, "w") as text_file:
        text_file.write(schema)

save_schema_to_text(response.choices[0].message.content, collection)


Voici un schéma de validation MongoDB basé sur les informations extraites des images fournies. Chaque document représente un produit de haschisch avec des champs pertinents.

```json
{
  "type": "object",
  "properties": {
    "nom": {
      "type": "string"
    },
    "format": {
      "type": "string"
    },
    "variete": {
      "type": "string"
    },
    "couleur_interne": {
      "type": "string"
    },
    "couleur_externe": {
      "type": "string"
    },
    "texture": {
      "type": "string"
    },
    "methode_extraction": {
      "type": "string"
    },
    "effets_potentiels": {
      "type": "string"
    },
    "temps_apparition_effets": {
      "type": "string"
    },
    "origine": {
      "type": "string"
    }
  },
  "required": [
    "nom",
    "format",
    "variete",
    "couleur_interne",
    "couleur_externe",
    "texture",
    "methode_extraction",
    "effets_potentiels",
    "temps_apparition_effets",
    "origine"
  ]
}
```

### Exemples de documents

1. *

In [15]:
import os
import base64
from PIL import Image
from openai import OpenAI

# Set the API key and model name
MODEL = "gpt-4o-mini"
BASE_FOLDER_PATH = "pdf_products"
API_KEY = os.environ.get("sqdc_api_key")

client = OpenAI(api_key=API_KEY)

# Function to encode image as a base64 string
def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode("utf-8")

# Function to resize image proportionally
def resize_image(image_path, output_folder):
    resized_image_path = os.path.join(output_folder, f"resized_{os.path.basename(image_path)}")

    if os.path.exists(resized_image_path):
        return resized_image_path
    
    with Image.open(image_path) as img:
        width, height = img.size
        if width <= 2000 and height <= 2000 and (width <= 768 or height <= 768):
            return image_path  # No need to resize
        
        if width < height:
            if width > 768:
                ratio = 768 / width
                new_width = 768
                new_height = int(height * ratio)
                if new_height > 2000:
                    ratio = 2000 / height
                    new_width = int(width * ratio)
                    new_height = 2000
            else:
                new_width, new_height = width, height
        else:
            if height > 768:
                ratio = 768 / height
                new_height = 768
                new_width = int(width * ratio)
                if new_width > 2000:
                    ratio = 2000 / width
                    new_height = int(height * ratio)
                    new_width = 2000
            else:
                new_width, new_height = width, height
        
        img = img.resize((new_width, new_height), Image.LANCZOS)
        img.save(resized_image_path)
        return resized_image_path

# Function to process a folder
def process_folder(folder_path):
    # Get all .png files in the specified folder
    image_files = [f for f in os.listdir(folder_path) if f.lower().endswith('.png') and not f.startswith('resized_')]

    # Encode images and create the messages list
    messages = [
        {"role": "system", "content": "Vous êtes un assistant spécialiste des bases de données."}
    ]
    messages.append({"role": "user", "content": "Analysez les images suivantes. Utilisez les informations pour créer un schéma de validation MongoDB, même si le schéma n'est pas parfaitement structuré. Priorisez la richesse des données.N'utilise que des type 'string'"})

    max_images = max(3,len(image_files)*0.2)  
    image_count = 0

    for image_file in image_files:
        if image_count >= max_images:
            break
        image_path = os.path.join(folder_path, image_file)
        resized_image_path = resize_image(image_path, folder_path)
        base64_image = encode_image(resized_image_path)
        messages.append(
            {"role": "user", "content": [
                {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{base64_image}"}}
            ]}
        )
        image_count += 1

    # Send the request to OpenAI API
    response = client.chat.completions.create(
        model=MODEL,
        messages=messages,
        temperature=0.0,
    )

    # Print the generated MongoDB schema
    schema = response.choices[0].message.content
    print(schema)

    # Function to save schema to a text file
    def save_schema_to_text(schema, collection_name):
        output_path = os.path.join(folder_path, f"{collection_name}_schema.txt")
        with open(output_path, "w") as text_file:
            text_file.write(schema)

    save_schema_to_text(schema, os.path.basename(folder_path))

# Process each subfolder within BASE_FOLDER_PATH
for collection in os.listdir(BASE_FOLDER_PATH):
    collection_path = os.path.join(BASE_FOLDER_PATH, collection)
    if os.path.isdir(collection_path):
        process_folder(collection_path)


Voici un schéma de validation MongoDB basé sur les informations des images fournies. Tous les types de données sont définis comme des chaînes de caractères (`string`).

```json
{
  "type": "object",
  "properties": {
    "produit": {
      "type": "string"
    },
    "marque": {
      "type": "string"
    },
    "description": {
      "type": "string"
    },
    "ingredients": {
      "type": "string"
    },
    "mode_de_consommation": {
      "type": "string"
    },
    "emballage": {
      "type": "string"
    },
    "allergenes": {
      "type": "string"
    },
    "effets_potentiels": {
      "type": "string"
    },
    "temps_apparition_effets": {
      "type": "string"
    },
    "avertissement": {
      "type": "string"
    }
  },
  "required": [
    "produit",
    "marque",
    "description",
    "ingredients",
    "mode_de_consommation"
  ]
}
```

### Explication des champs :
- **produit** : Nom du produit (ex. "Poudre hydrosoluble avec CBD").
- **marque** : Marque du produit 

In [1]:
import json

def extract_json_schema(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        lines = file.readlines()
        start = lines.index('```json\n') + 1
        end = lines.index('```\n', start)
        json_schema = ''.join(lines[start:end])
        return json.loads(json_schema)

def convert_to_mongodb_schema(file_paths):
    schemas = {}
    for file_path in file_paths:
        schema = extract_json_schema(file_path)
        schema_name = file_path.split('/')[-1].split('_')[0]
        schemas[schema_name] = schema
    return schemas

# Example usage:
file_paths = [
    'pdf_products/Atomiseurs/Atomiseurs_schema.txt',
    'pdf_products/Autres Comestibles/Autres Comestibles_schema.txt'
]

mongodb_schemas = convert_to_mongodb_schema(file_paths)

# Print the schemas
for name, schema in mongodb_schemas.items():
    print(f"{name} Schema:")
    print(json.dumps(schema, indent=4, ensure_ascii=False))


Atomiseurs Schema:
{
    "type": "object",
    "properties": {
        "produit": {
            "type": "string",
            "description": "Nom du produit"
        },
        "marque": {
            "type": "string",
            "description": "Marque du produit"
        },
        "format": {
            "type": "number",
            "description": "Volume net du produit en ml"
        },
        "ingredients": {
            "type": "array",
            "items": {
                "type": "string"
            },
            "description": "Liste des ingrédients"
        },
        "extraction": {
            "type": "string",
            "description": "Méthode d'extraction utilisée"
        },
        "dosage": {
            "type": "string",
            "description": "Dosage recommandé par activation"
        },
        "effets": {
            "type": "array",
            "items": {
                "type": "string"
            },
            "description": "Effets potentiels du pr