In [None]:
import os 
from openai import OpenAI
from dotenv import load_dotenv

from typing import Literal
from pydantic import BaseModel
from pydantic import Field
import json

load_dotenv()
open_api_key=os.getenv("OPEN_API_KEY")

#Choosing model save tokens cost as expecting just generating text
openai_model_id = "gpt-4o-mini"


In [6]:

## OpenAI is a wrapper in langchain used to access openai API internally
#openai_client = OpenAI()
llm = OpenAI(
    model=openai_model_id,         
    temperature=0,# make sure the same reply and standard responses especially in foundation
    api_key=open_api_key
)

import tiktoken

def count_input_tokens(messages, model=openai_model_id):
    encoding = tiktoken.encoding_for_model(model)

    # Rough estimate; OpenAI uses specific rules depending on the model
    num_tokens = 0
    for message in messages:
        # Each message has {role, content}, sometimes name
        num_tokens += 4  # every message has a role and tokens overhead
        for key, value in message.items():
            num_tokens += len(encoding.encode(value))
    num_tokens += 2  # every reply is primed with <|start|>assistant
    return num_tokens
# Compute cost
def compute_cost(input_tokens, output_tokens, model="gpt-4o-mini"):
    if model == "gpt-3.5-turbo":
        input_cost = (input_tokens / 1000) * 0.0015
        output_cost = (output_tokens / 1000) * 0.002
    elif model == "gpt-4":
        input_cost = (input_tokens / 1000) * 0.03
        output_cost = (output_tokens / 1000) * 0.06
    elif model == "gpt-4-turbo":
        input_cost = (input_tokens / 1000) * 0.01
        output_cost = (output_tokens / 1000) * 0.03
    elif model == "gpt-4o-mini":
        input_cost = (input_tokens / 1_000_000) * 0.15
        output_cost = (output_tokens / 1_000_000) * 0.60
    else:
        raise ValueError(f"Unknown model: {model}")

    return input_cost + output_cost

TypeError: OpenAI.__init__() got an unexpected keyword argument 'model'

In [15]:
story = """ USA the leader of the world
"""

In [None]:
##just like defining allowed choices in a listbox (dropdown) 
StoryCategory = Literal[
    "politics", "sports", "art", "technology", "economy",
    "health", "entertainment", "science",
    "not_specified"
]

In [None]:
# we Define the object class that will map the outout to this class
# this is not a simple class , but class with validation as ORM 
class NewsDetails(BaseModel):
    
 #... means mandatory
 # description help the model to more concentrate on the task
    story_category: StoryCategory = Field(..., description="Category of the news story.")

  

In [None]:
NewsDetails.model_json_schema()

{'properties': {'story_category': {'description': 'Category of the news story.',
   'enum': ['politics',
    'sports',
    'art',
    'technology',
    'economy',
    'health',
    'entertainment',
    'science',
    'not_specified'],
   'title': 'Story Category',
   'type': 'string'}},
 'required': ['story_category'],
 'title': 'NewsDetails',
 'type': 'object'}

In [16]:
details_extraction_messages = [
    {
        "role": "system",
        "content": "\n".join([
            "You are an NLP data paraser.",
            "Extract the category as mentioned in text.",
            "Do not generate any introduction or conclusion, be straight to the point"
        ])
    },
    {
        "role": "user",
        "content": "\n".join([
            "## Story:",
            story.strip(),
            "",

            "## Pydantic Details:",
            json.dumps(
                NewsDetails.model_json_schema(), ensure_ascii=False
            ),
            "",

            "## Story Details:",
            "```json"
        ])
    }
]

In [19]:
details_extraction_messages_evaluate = [
    {
        "role": "system",
        "content": "\n".join([
            "provide the type of this news"
        ])
    },
    {
        "role": "user",
        "content": "\n".join([
            "## Story:",
            story.strip(),
            "",


            "## Story Details:",
            "```json"
        ])
    }
]

In [20]:
## evaluate the output without fine-yuning
chat_completion = openai_client.chat.completions.create(
    messages=details_extraction_messages_evaluate,
    model=openai_model_id,
    temperature=0.2,
)
print(chat_completion.choices[0].message.content)

The type of this news is likely a political or international relations commentary or analysis. It appears to focus on the role of the United States in global leadership.


In [22]:
## evaluate the output without fine-yuning
chat_completion = openai_client.chat.completions.create(
    messages=details_extraction_messages,
    model=openai_model_id,
    temperature=0.2,
)
#print(chat_completion.choices[0].message.content)
print(json.loads(chat_completion.choices[0].message.content)["story_category"])

input_tokens = count_input_tokens(details_extraction_messages, model=openai_model_id)
output_tokens = chat_completion.usage.completion_tokens
total_tokens = chat_completion.usage.total_tokens

print("Input Tokens:", input_tokens)
print("Output Tokens:", output_tokens)
print("Total Tokens:", total_tokens)

# Display cost
cost = compute_cost(input_tokens, output_tokens, model=openai_model_id)
print(f"Estimated Cost: ${cost:.6f}")

politics
Input Tokens: 150
Output Tokens: 8
Total Tokens: 157
Estimated Cost: $0.000027
