In [1]:
import ollama

def ask_question_local_llm(prompt):
    # print(f"User asked: {prompt}")
    # my_client.chat.completions.create

    # Run a prompt against a local model (e.g., llama2)
    response = ollama.chat(
        model='llama3',
        messages=[
            {"role": "system", "content": "You are a helpful AI assitant - Respond in one line"},
            {"role": "user", "content": prompt}
        ]
    )
    return response['message']['content']

In [5]:

import os
from dotenv import load_dotenv
from openai import OpenAI

load_dotenv(override=True, dotenv_path="../.env")
my_api_key = os.getenv("OPENAI_API_KEY")

my_client = OpenAI(api_key=my_api_key)
# my_client

def ask_question_open_ai(prompt):

    # print(f"User asked: {prompt}")
    # my_client.chat.completions.create

    llm_response = my_client.chat.completions.create(
        model="gpt-5-nano",
        messages=[
            {"role": "system", "content": "You are a helpful assistant. Answer as concisely as possible."},
            {"role": "user", "content": prompt}
        ]
    )
    return llm_response.choices[0].message.content  


In [6]:
prompt = "What is the capital of France?"

response_local_llm = ask_question_local_llm(prompt)
response_open_ai = ask_question_open_ai(prompt)

print(f"Response from local LLM: {response_local_llm}")
print(f"Response from OpenAI: {response_open_ai}")

Response from local LLM: The capital of France is Paris.
Response from OpenAI: Paris.


In [7]:
import time
while True:
    # Ask user for a question
    user_prompt = input("Ask something: ")

    if (user_prompt.lower() != 'quit'):
        # Get and print the response
        print ("-------------------OPEN AI RESPONSE-------------------")
        start = time.time()
        response_openai = ask_question_open_ai(user_prompt)
        end = time.time()   
        print(f"Time taken by OpenAI: {end - start} seconds")
        print("\nOpenAI says:", response_openai)

        print ("\n\n-------------------LOCAL LLM RESPONSE-------------------")
        start = time.time()
        response_local = ask_question_local_llm(user_prompt)
        end = time.time()   
        print(f"Time taken by Local LLM: {end - start} seconds")
        print("\nLocal LLM says:", response_local)        

        # add delay of 3 seconds
        time.sleep(3)
    else:
        print("Exiting...")
        break    

-------------------OPEN AI RESPONSE-------------------
Time taken by OpenAI: 7.947659015655518 seconds

OpenAI says: Best practice: treat API keys as secrets and store them in a dedicated secret management system, not in code or config files. Key steps:

- Use a secrets manager (e.g., AWS Secrets Manager, Azure Key Vault, Google Secret Manager, HashiCorp Vault) or an external vault.
- Never hard-code keys or commit them to VCS; avoid storing in config files checked into source control.
- Restrict access with least privilege: per-service/per-environment keys, and fine-grained IAM roles or policies.
- Fetch keys at runtime (or on deployment) rather than bundling them in images; if possible, inject via environment variables or the secret store API.
- Rotate keys regularly; enable automatic rotation if available; revoke old keys promptly.
- Encrypt secrets at rest and in transit; use your cloud KMS or built-in encryption features.
- Enable auditing and alerts for secret access and usage an