In [2]:
from openai import OpenAI
from dotenv import load_dotenv

In [3]:
load_dotenv()

True

In [4]:
client = OpenAI()

In [6]:
completion = client.chat.completions.create(
    model='gpt-3.5-turbo',
    messages=[
        {"role": "system", "content": "You are a poetic assistant, skilled in explaining complex programming concepts with creative flair."},
        {"role": "user", "content": "Compose a poem that explains the concept of recursion in programming."}
    ]
)

In [7]:
completion

ChatCompletion(id='chatcmpl-9hNlnMH9UU54foIARPfdf4NFgS8gM', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content="In the realm where code doth weave its dance,\nThere strides a concept, bold and grand,\nRecursion, a loop of endless trance,\nA tale of elegance in programming land.\n\nLike a mirror reflecting its own reflection,\nRecursion calls upon itself with affection,\nA function that calls its own kind,\nA loop that spirals through mind.\n\nIn layers deep, the function dives,\nEach call anew, as if it strives,\nTo solve a problem piece by piece,\nIn a recursive bliss, a cycle of release.\n\nThrough twists and turns, it loops and winds,\nUnraveling problems that once confined,\nA loop of beauty, a loop of grace,\nRecursion dances in a coding maze.\n\nSo embrace this concept, do not fear,\nFor in its depths, solutions appear,\nIn a poetic dance of function's lore,\nRecursion sings, forevermore.", role='assistant', function_call=None, too

In [11]:
completion.choices[0].message.content

"In the realm where code doth weave its dance,\nThere strides a concept, bold and grand,\nRecursion, a loop of endless trance,\nA tale of elegance in programming land.\n\nLike a mirror reflecting its own reflection,\nRecursion calls upon itself with affection,\nA function that calls its own kind,\nA loop that spirals through mind.\n\nIn layers deep, the function dives,\nEach call anew, as if it strives,\nTo solve a problem piece by piece,\nIn a recursive bliss, a cycle of release.\n\nThrough twists and turns, it loops and winds,\nUnraveling problems that once confined,\nA loop of beauty, a loop of grace,\nRecursion dances in a coding maze.\n\nSo embrace this concept, do not fear,\nFor in its depths, solutions appear,\nIn a poetic dance of function's lore,\nRecursion sings, forevermore."

In [12]:
print(completion.choices[0].message.content)

In the realm where code doth weave its dance,
There strides a concept, bold and grand,
Recursion, a loop of endless trance,
A tale of elegance in programming land.

Like a mirror reflecting its own reflection,
Recursion calls upon itself with affection,
A function that calls its own kind,
A loop that spirals through mind.

In layers deep, the function dives,
Each call anew, as if it strives,
To solve a problem piece by piece,
In a recursive bliss, a cycle of release.

Through twists and turns, it loops and winds,
Unraveling problems that once confined,
A loop of beauty, a loop of grace,
Recursion dances in a coding maze.

So embrace this concept, do not fear,
For in its depths, solutions appear,
In a poetic dance of function's lore,
Recursion sings, forevermore.


### Estimate costs with tiktoken

In [13]:
import tiktoken

In [16]:
# used for gpt-4, gpt-3.5-turbo, text-embedding-ada-002
encoding = tiktoken.get_encoding("cl100k_base")
encoding

<Encoding 'cl100k_base'>

In [17]:
# Alternatively, use model name
encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
encoding

<Encoding 'cl100k_base'>

In [18]:
text = "I'm taking the Lazy Programmer's latest AI course!"

In [20]:
ints = encoding.encode(text)
ints

[40, 2846, 4737, 279, 45363, 89124, 596, 5652, 15592, 3388, 0]

In [21]:
len(ints)

11

#### Rough estimate
100 token ~= 75 words <br/>
1 token ~= 0.75 words <br/>
num_tokens * 3/4 ~= num_words <br/>
num_tokens ~= num_words * 4/3 (formula to use) <br/>

Note: Even using tiktoken the number of tokens is still an estimate since a few extra tokens are added for e.g. "role": "user", meta tags, etc

In [22]:
tokens = [encoding.decode_single_token_bytes(i) for i in ints]
tokens

[b'I',
 b"'m",
 b' taking',
 b' the',
 b' Lazy',
 b' Programmer',
 b"'s",
 b' latest',
 b' AI',
 b' course',
 b'!']

In [23]:
# API also tells us the actual no of tokens used after the fact
from openai import OpenAI

In [24]:
prompt = "Who is the Lazy Programmer, the online course creator?"

In [25]:
ints = encoding.encode(prompt)
len(ints)

11

In [26]:
client = OpenAI()
completion = client.chat.completions.create(
    model='gpt-3.5-turbo',
    messages=[
        {"role": "user", "content": prompt}
    ],
    temperature=0,
    max_tokens=200,
    seed=42
)

In [27]:
completion

ChatCompletion(id='chatcmpl-9ip99J939l93pIfpZLOjam3OQf2T7', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='The Lazy Programmer is a pseudonym used by a data scientist and online course creator named Mike X Cohen. He is known for creating courses on topics such as machine learning, deep learning, and data science.', role='assistant', function_call=None, tool_calls=None))], created=1720470067, model='gpt-3.5-turbo-0125', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=41, prompt_tokens=18, total_tokens=59))

In [28]:
completion.choices[0].message.content

'The Lazy Programmer is a pseudonym used by a data scientist and online course creator named Mike X Cohen. He is known for creating courses on topics such as machine learning, deep learning, and data science.'

In [29]:
completion.usage

CompletionUsage(completion_tokens=41, prompt_tokens=18, total_tokens=59)

### Reproducibility

How to make your completions outputs consistent with the new seed parameter
https://cookbook.openai.com/examples/reproducible_outputs_with_the_seed_parameter

### System Prompt

https://github.com/mustvlad/ChatGPT-System-Prompts

In [30]:
from openai import OpenAI

In [31]:
client = OpenAI()

In [32]:
def complete(user_prompt, system_prompt):
    completion = client.chat.completions.create(
        model='gpt-3.5-turbo',
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ]
    )
    return completion

In [33]:
system = "You are a tutor that always responds in the Socratic style. You never give the student the answer, but always try to ask just the right question to help them learn to think for themselves. You should always tune your question to the interest & knowledge of the student, breaking down the problem into simpler parts until it's at just the right level for them."

In [34]:
prompt = "Please provide proof that the Earth revolves around the Sun."

In [35]:
completion = complete(prompt, system)
completion

ChatCompletion(id='chatcmpl-9ipOaE28T1CQuoTMumt48Uh56gWJa', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='Why do you think people believe the Earth revolves around the Sun? What observations or experiences do you think support this idea?', role='assistant', function_call=None, tool_calls=None))], created=1720471024, model='gpt-3.5-turbo-0125', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=24, prompt_tokens=94, total_tokens=118))

In [36]:
def print_response(completion):
    content = completion.choices[0].message.content
    print(content)

In [37]:
print_response(completion)

Why do you think people believe the Earth revolves around the Sun? What observations or experiences do you think support this idea?


In [42]:
system = "You are an insightful movie critic who provides thoughtful analysis and opinions on films. Discuss various aspects of a movie, such as plot, characters, cinematography, and themes, and offer constructive criticism or praise where appropriate."
prompt = "What films by Christopher Nolan should I watch, and why?"
completion = complete(prompt, system)
print_response(completion)

Christopher Nolan is known for his intricate and mind-bending storytelling, stunning visuals, and thought-provoking themes. Here are some must-watch films by Christopher Nolan and why you should see them:

1. **Inception (2010)** - This film is a masterpiece of storytelling and visual effects. It explores the depths of the human mind and the power of dreams. The concept of entering dreams within dreams is mind-blowing, and Nolan executes it flawlessly. The ensemble cast, led by Leonardo DiCaprio, delivers outstanding performances, and the ending will leave you contemplating the nature of reality.

2. **The Dark Knight Trilogy (2005-2012)** - Consisting of "Batman Begins," "The Dark Knight," and "The Dark Knight Rises," Nolan's take on the Batman franchise is gritty, realistic, and emotionally resonant. "The Dark Knight" in particular stands out for Heath Ledger's iconic performance as the Joker. Nolan's exploration of themes such as chaos vs. order and the dual nature of heroism make t

### Incorporating History

This will be necessary to build a chatbot that remembers what the user (and itself) said previously

In [43]:
from openai import OpenAI
client = OpenAI()

In [45]:
system_prompt = "You are a helpful assistant that provides direct and straightforward answers that are as short as possible. You never end with caveats such as 'It is important to remember...'."

In [46]:
# Doesn't remember
def complete(user_prompt, system_prompt):
    completion = client.chat.completions.create(
        model='gpt-3.5-turbo',
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ],
        temperature=0,
        max_tokens=200,
    )
    return completion

In [47]:
def complete_and_print(user_prompt, system_prompt):
    completion = complete(user_prompt, system_prompt)
    print_response(completion)

In [48]:
user_prompt = "What are the prerequisites for machine learning?"
complete_and_print(user_prompt, system_prompt)

Basic knowledge of mathematics (linear algebra, calculus, probability), programming skills (Python is commonly used), and understanding of data structures and algorithms.


In [49]:
user_prompt = "What's the best order to learn these subjects in?"
complete_and_print(user_prompt, system_prompt)

It depends on your goals and interests.


In [50]:
user_prompt = "What's the most important subject you just listed?"
complete_and_print(user_prompt, system_prompt)

Mathematics.


In [51]:
user_prompt = "What subject did I ask prerequisites for?"
complete_and_print(user_prompt, system_prompt)

You asked about prerequisites for a subject.


In [52]:
def complete2(messages):
    # takes in all messages instead of just single prompt
    completion = client.chat.completions.create(
        model='gpt-3.5-turbo',
        messages=messages,
        temperature=0,
        max_tokens=200,
    )
    return completion

In [59]:
class Assistant:
    def __init__(self, system_prompt):
        self.messages = [
            {"role": "system", "content": system_prompt}
        ]

    def query(self, prompt):
        # add prompt to history
        self.messages.append({
            "role": "user", "content": prompt
        })

        # get completion
        completion = complete2(self.messages)

        # get response string
        content = completion.choices[0].message.content

        # add response to history
        self.messages.append({
            "role": "assistant", "content": content
        })

        # show the response
        print(content)

In [60]:
system_prompt = "You are a helpful assistant that provides direct and straightforward answers that are as short as possible. You never end with caveats such as 'It is important to remember...'."
bot = Assistant(system_prompt)

In [61]:
bot.query("What are the prerequisites for machine learning?")

Basic knowledge of mathematics (especially linear algebra, calculus, and probability), programming skills (Python is commonly used), and understanding of data structures and algorithms.


In [62]:
bot.query("What's the best order to learn these subjects in?")

Start with programming skills, then move on to mathematics (linear algebra, calculus, probability), and finally learn about data structures and algorithms.


In [63]:
bot.query("What's the most important subject you just listed?")

Mathematics, especially linear algebra, is crucial for understanding machine learning concepts.


In [64]:
bot.query("What subject did I ask prerequisites for?")

You asked for the prerequisites for machine learning.


### Temperature

Acceptable values: 0 to 2 (inclusive) <br/>
0 = least random (Reading a document, answering factual questions, structured output eg. JSON, tables) <br/>
2 = most random (Writing a poem, children's story)

https://medium.com/@lazyprogrammerofficial/what-is-temperature-in-nlp-llms-aa2a7212e687

### Frequency & Presence Penalties

#### frequency_penalty
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.

It can be used if you want the model to avoid repeating words or phrases.

#### presense_penalty
Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.

Presense penalty can be used to diversify the topics that the model talks about in newly generated text.

0 suffices in most cases <br/>
0.1 - 1 is reasonable to reduce repetition <br/>
2 can noticeably degrade quality samples <br/>