## Named Entity Recognition Using OpenAI API

Installing Dependencies :

    Install OpenAI Library

In [None]:
pip install openai python-dotenv

* Extract the named entities from text using OpenAI API response

In [None]:
import os
import openai
from dotenv import load_dotenv

# Load Environment Variables
load_dotenv()

# Get OpenAI API Key from Environment Variables
OPENAI_API_KEY= os.environ["OPENAI_API_KEY"] 

# Provide OpenAI API Key
#OPENAI_API_KEY= "<Your_OpenAI_API_Key>"

# Set OpenAI API Key
openai.api_key = OPENAI_API_KEY

# Example input text
text = "Virat kohli is one of the best cricketer india has found."

# Create prompt for getting named entities from text
# prompt = f"""Run named entity recognition (NER) on the following text delimited with ###.
#             Identify named entites from text along with its type in this format [\"text\": , \"entity\": ]. 
#             ###{text}###"""
            
prompt = f"""Run named entity recognition (NER) on the following text delimited with ###.
            Identify named entites from text along with its type in json format. 
            ###{text}###"""

# Use the OpenAI API to perform named-entity recognition
response = openai.Completion.create(
    engine="text-davinci-002",  # Engine to use for completion
    prompt=prompt,              # Prompt to generate completion
    temperature=0,              # Higher temperature results in more random completions
    n=1,                        # Number of completions to generate
    max_tokens=1024,            # 1024 is the maximum token limit    
)

# Print the response
entities = response.choices[0]['text']
print(entities)

* Format Completion API output into taular format

In [None]:
import json

# Convert the entities to a list of dictionaries
data = json.loads(entities)

# Calculate the maximum length of "Text" and "Entity" values
max_text_length = max(len(item["text"]) for item in data)
max_entity_length = max(len(item["type"]) for item in data)

# Header row
header = f"{'Text':<{max_text_length}} | {'Type':<{max_entity_length}}"

# Separator row
separator = '-' * (max_text_length + max_entity_length + 3)

# Data rows
data_rows = [f"{item['text']:<{max_text_length}} | {item['type']:<{max_entity_length}}" for item in data]

# Print the table
print(header)
print(separator)
for row in data_rows:
    print(row)
