In [1]:
import os
import csv
import json
import time
import multiprocessing
from tqdm import tqdm
## Azure - GPT initialization
from azure.identity import AzureCliCredential, get_bearer_token_provider
from openai import AzureOpenAI

In [2]:
credential = AzureCliCredential()

token_provider = get_bearer_token_provider(
    credential,
    "https://cognitiveservices.azure.com/.default"
)

aoiclient = AzureOpenAI(
    azure_endpoint="https://t2vgoaigpt4o.openai.azure.com/",
    azure_ad_token_provider=token_provider,
    api_version="2024-02-15-preview",
    max_retries=5,
)

In [3]:
# Function to process a single text input (to be called by each process)
def process_single_text(text):
    messages = [{
        "role": "user",
        "content": f"Your role is to extract object nouns from the given text. \
                    From the given text, extract objects nouns, and do not include nouns that may refer to background. \
                    The objects would be used for referring the object inside object segmentation model. \
                    We aim to remove segmented object to only keep the background. \
                    Try to use the word inside the given sentence to extract the object nouns. \
                    Give me object name only, without any further context or description. \
                    The object nouns should be separated by a period '.'. {text}"
    }]
    
    try:
        # Send a single request to Azure OpenAI for each text
        response = aoiclient.chat.completions.create(
            model="gpt4o",  
            messages=messages,
            temperature=0.2,
            max_tokens=100,
            top_p=0.2,
            frequency_penalty=0,
            presence_penalty=0,
            stop=None,
        )
        # Extract object nouns from the response
        choice = response.choices[0].message.content
        time.sleep(5)  # Sleep for 3 seconds to avoid rate limiting
        return choice
    except Exception as e:
        print(f"Error processing text: {text} - {e}")
        return None

In [4]:
# Function to handle multiprocessing across all text entries
def batch_process_through_gpt_api_multiprocessing(text_list, num_workers=4):
    # Use multiprocessing to process individual texts in parallel
    with multiprocessing.Pool(processes=num_workers) as pool:
        objects_list = list(tqdm(pool.imap(process_single_text, text_list), total=len(text_list)))

    return objects_list