In [37]:
import os
import json
import openai
import pandas as pd
from dotenv import load_dotenv
from openai import AzureOpenAI
load_dotenv(override=True)

aoai_client = AzureOpenAI(
  azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT"), 
  api_key=os.getenv("AZURE_OPENAI_API_KEY"),  
  api_version="2024-07-01-preview"
)

In [38]:
names_df = pd.read_parquet("../data/processed/full_names.parquet")

### Extracting first and last names using Azure OpenAI SDK

In [39]:
#https://github.com/derek73/python-nameparser
instructions = """Can you extract the first and last name from the following text?

##Important
Please provide the output in a json with the keys 'first_name' and 'last_name'"""

def GetFirstLastNames(full_name):
    query = "Please generate a random name for me. Please use the following first and last name" + full_name
    messages = [{"role":"system","content":instructions}, 
               {"role":"user","content":query}]

    response = aoai_client.chat.completions.create(model="gpt4o",  
                                        messages = messages, 
                                        temperature=0.9,  
                                        max_tokens=2000,
                                        response_format={ "type": "json_object" })
                                        #seed = 42)
    period = response.choices[0].message.content

    return json.loads(period)

In [40]:
names_df['extraction'] = names_df['full_name'].apply(lambda x: GetFirstLastNames(x))
names_df.head()

### Extracting first and last names using Azure API

In [33]:
import requests

In [34]:
azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
api_key=os.getenv("AZURE_OPENAI_API_KEY")
api_version="2024-07-01-preview"
deployment_name = "gpt4o"

headers = {"api-key": api_key}


instructions = """Can you extract the first and last name from the following text?

##Important
Please provide the output in a json with the keys 'first_name' and 'last_name'"""

query = "Please generate a random name for me. Please use the following first and last name" + "Jose Medina Gomez"

data =  [{"role":"system","content":"test"}, {"role":"user","content":"hello"}]
payload = str({"messages": data, "temperature": 0.9, "max_tokens": 2000}).replace("'", '"')

In [35]:
def GetFirstLastNamesAPI(full_name, temperature=0.9, max_tokens=2000):

    instructions = """Can you extract the first and last name from the following text?

    ##Important
    Please provide the output in a json with the keys 'first_name' and 'last_name'"""

    question = "Please responde ina json with the first and last name for me. Please use the following full name" + full_name
    messages =  [{"role":"system","content":instructions}, {"role":"user","content":question}]

    response_format=  { "type": "json_object" }

    url = f"{azure_endpoint}/openai/deployments/{deployment_name}/chat/completions?api-version={api_version}"
    payload = {
        "messages": messages,
        "temperature": temperature,
        "max_tokens": max_tokens,
        "response_format": { "type": "json_object" }
    }
    response = requests.post(url, headers=headers, json=payload)
    print(response.json())
    return json.loads(response.json()['choices'][0]['message']['content'])

In [36]:
GetFirstLastNamesAPI(full_name = names_df['full_name'][2])

{'choices': [{'content_filter_results': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}, 'finish_reason': 'stop', 'index': 0, 'logprobs': None, 'message': {'content': '{"first_name": "Chris", "last_name": "Toussaint"}', 'role': 'assistant'}}], 'created': 1727140354, 'id': 'chatcmpl-AAoOQSna4XfyU2Mt3QsMinBuBDdUC', 'model': 'gpt-4o-2024-08-06', 'object': 'chat.completion', 'prompt_filter_results': [{'prompt_index': 0, 'content_filter_results': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}], 'system_fingerprint': 'fp_67802d9a6d', 'usage': {'completion_tokens': 16, 'prompt_tokens': 80, 'total_tokens': 96}}


{'first_name': 'Chris', 'last_name': 'Toussaint'}