In [6]:
from openai import OpenAI
import tiktoken
import os

In [2]:
print(os.environ['OPENAI_API_KEY'])
client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))

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


The way the ChatGPT API works is you need to query the model. Since these models often make use of chat history/context, every query needs to, or can, include a full message history context. 

Keep in mind, however that the maximum context length is 4096 tokens, so you need to stay under that. There are lots of options to work around this, the simplest being truncating earlier messages or to summarize and condense the previous message history.

In [3]:
ModelCosts = {
    'gpt-4-128k': {'input': 10.00, 'output': 30.00},
    'gpt-4-8k': {'input': 30.00, 'output': 60.00},
    'gpt-4-32k': {'input': 60.00, 'output': 120.00},
    'gpt-3.5-turbo-1106': {'input': 1.00, 'output': 2.00},
    'gpt-3.5-turbo-0613': {'input': 1.50, 'output': 2.00},
    'gpt-3.5-turbo-16k-0613': {'input': 3.00, 'output': 4.00},
    'gpt-3.5-turbo-0301': {'input': 1.50, 'output': 2.00},
    'davinci-002': 2.00,
    'babbage-002': 0.40,
    'text-embedding-3-small': 0.02,
    'text-embedding-3-large': 0.13,
    'ada v2': 0.10,
}
ModelNames=list(ModelCosts.keys())
DefaultModel = ModelNames[3]

In [15]:
# Dictionary to hold different scenarios and their estimated output-to-input token ratios
token_ratio_estimates = {
    'email_response': 1.5,    # Outputs are generally 1.5 times longer than the inputs
    'content_summary': 0.8,   # Summaries are usually shorter than the original content
    'factual_answer': 0.5,    # Direct answers to factual questions tend to be concise
    'creative_story': 3.0,    # Creative stories may be much longer than the initial prompts
    'detailed_explanation': 20,  # Detailed explanations or complex answers can be much longer
    'limit': 0  # This will be treated specially to limit output tokens
}

# Maximum tokens for the 'limit' scenario
max_output_tokens = 100

def calculate_cost(text, model_name = DefaultModel, scenario='limit'):

    enc = tiktoken.encoding_for_model(model_name)
    input_tokens = enc.encode(text)   
    input_tokens_count = len(input_tokens) 

    # Determine the output token count based on the scenario
    if scenario == 'limit':
        output_token_count = max_output_tokens
    else:
        ratio = token_ratio_estimates.get(scenario, 1)  # Use 1 as a default ratio if the scenario is not found
        output_token_count = int(input_tokens_count * ratio)
    
    # Calculate the total token count (input + estimated output)
    total_token_count = input_tokens_count + output_token_count
    
    # Retrieve cost per million tokens for the model, assuming it can be a dictionary or a single value
    cost_per_million = ModelCosts[model_name]
    if isinstance(cost_per_million, dict):
        # Assuming the model has separate costs for input and output, typically not the case but for example
        input_cost_per_million = cost_per_million.get('input', 0)
        output_cost_per_million = cost_per_million.get('output', 0)
        total_cost = (input_tokens_count / 1_000_000) * input_cost_per_million + (output_token_count / 1_000_000) * output_cost_per_million
    else:
        total_cost = (total_token_count / 1_000_000) * cost_per_million
    
    return round(total_cost,3)


In [21]:
message = "Say this is a test"
chat_completion = client.chat.completions.create(messages=[{"role": "user","content": message,}],model=DefaultModel)

In [28]:
chat_completion.usage.total_tokens
chat_completion.choices[0].message.content

'This is a test.'

### Roles
There are 3 roles:

- User - This is meant to mimic the end-user that is interacting with the assistant. This is the role that you will be using most of the time.
- System - This role can mimic sort of background nudges and prompts that you might want to inject into the conversation, but that dont need a response. At the moment, system is weighted less than "user," so it still seems more useful to use the user for encouraging specific behaviors in my opinion.
- Assistant - This is the agent's response. Often this will be actual responses, but keep in mind... you will be able to inject your own responses here, so you can actually have the agent say whatever you want. This is a bit of a hack, but it's a fun one and can be useful in certain situations.

### History
API itself doesn't manage your history. For now - essage history variable, but you might use a database or some other storage method. 

In [None]:
message_history = []

def chat(user_input, role="user", model=DefaultModel):
    message_history.append({"role": role, "content": f"{user_input}"})
    completion = client.chat.completions.create(messages=message_history, model=model)
    reply_content = completion.choices[0].message.content
    message_history.append({"role": "assistant", "content": f"{reply_content}"})
    return reply_content