In [1]:
import os
import json
import jsonlines
import time
import pandas as pd

import wandb
from openai import OpenAI

client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"),)
system_prompt = "The following is a conversation with an AI assistant. The assistant is helpful, creative, clever, and very friendly. The assistant will attempt to give a response that is concise but ensures that all the key points are included when relevant."

In [2]:
def complete_chat(message, temperature, seed, model, system_prompt=system_prompt, client=client):
    chat_completion = client.chat.completions.create(
        messages=[
            {
                "role": "system", 
                "content": system_prompt
            }, 
            {
                "role": "user",
                "content": message,
            },
        ],
        model=model,
        temperature=temperature,
        seed=seed,
    )
    # Extract the response from the chat completion
    message = chat_completion.choices[0].message.content
    return message

# Summarization of a big document

In [None]:
# Read a jsonl file with jsonl library 
def read_jsonl(filepath):
    with jsonlines.open(filepath) as reader:
        posts = []
        for obj in reader:
            posts.append(json.dumps(obj))

    return '\n\n'.join(posts)
blog_posts = read_jsonl('data/blog_trim.jsonl')

blog_prompt = "Summarize the following: " + blog_posts
data = complete_chat(
    blog_prompt, seed=42, temperature=1.0, model="gpt-4o"
)

print(data[:100])

In [None]:
def run_summarize_experiment(data, seed, temperature, model, iterations=10):
    entries = []
    for i in range(iterations):
        print(f'Iteration {i+1}')
        prompt = "Summarize the following: " + data
        response = complete_chat(prompt, 
                                 seed=seed, 
                                 temperature=temperature, 
                                 model=model if i > 0 else 'gpt-4o')
        entry = {
            "iteration": i,
            "prompt": prompt,
            "seed": seed,
            "model": model,
            "temperature": temperature,
            "response": response,
            "length": len(response),
        }
        #Log as table in wandb
        wandb.log(entry)

        print(f'* data: {len(data)}')
        print(f'* len: {entry["length"]}')
        print('')

        data = response
        entries.append(entry)

    return pd.DataFrame(entries)

In [None]:
seed = 42
iterations = 50

def make_run_name(config):
    return f"run-{config['model']}_{config['seed']}_{config['temperature']}"

dfs = []
for temperature in [0.0, 0.5, 1.0, 2.0]:
    for model in ['gpt-4o', 'gpt-3.5-turbo']:
        config = {
            "seed": seed,
            "temperature": temperature,
            "model": model,
            "iterations": iterations,
        }

        wandb.init(project="llm_fixed_point",
                   name=make_run_name(config),
                   config=config)

        df = run_summarize_experiment(data=data, **config)
        dfs.append((config, df))

        wandb.finish()

        # wait 30 seconds to avoid rate limiting
        # time.sleep(30)

In [None]:
df = pd.concat([x for _, x in dfs])
df.head()

df.to_csv('data/summarize_blog_posts.csv', index=False)

In [None]:
read_df = pd.read_csv('data/summarize_blog_posts.csv')

def compare(df, model, temperature):
    df = df[(df.model == model) & (df.temperature == temperature)]['response']
    print(df.iloc[0])
    print('#' * 100)
    print(df.iloc[-1])

compare(df=read_df, model='gpt-4o', temperature=0.5)

# Summarize List of Items

In [3]:
def run_list_summarize_experiment(data, seed, temperature, model, prompt, iterations=10):
    entries = []
    for i in range(iterations):
        print(f'Iteration {i+1}')
        full_prompt = prompt + data
        response = complete_chat(full_prompt, 
                                 seed=seed, 
                                 temperature=temperature, 
                                 model=model if i > 0 else 'gpt-4o')
        entry = {
            "iteration": i,
            "prompt": full_prompt,
            "seed": seed,
            "model": model,
            "temperature": temperature,
            "response": response,
            "length": len(response),
            "lines": len(response.split('\n')),
        }
        #Log as table in wandb
        wandb.log(entry)

        print(f'* data: {len(data)}')
        print(f'* len: {entry["length"]}')
        print(f'* lines: {entry["lines"]}')
        print('')

        data = response
        entries.append(entry)

    return pd.DataFrame(entries)

In [4]:
list_prompt = "Generate a list of exactly 100 random facts.  Don't add any additional text, just output the list."

data = complete_chat(
    list_prompt, seed=42, temperature=1.0, model="gpt-4o"
)

print(len(data.split('\n')))
print(data)

100
1. Honey never spoils.
2. Bananas are berries, but strawberries aren't.
3. The Eiffel Tower can be 15 cm taller during the summer.
4. There are more stars in the universe than grains of sand on all the Earth's beaches.
5. Octopuses have three hearts.
6. The shortest war in history was between Britain and Zanzibar on August 27, 1896. It lasted 38 minutes.
7. A bolt of lightning contains enough energy to toast 100,000 slices of bread.
8. The hashtag symbol is technically called an octothorpe.
9. The average person will spend six months of their life waiting for red lights to turn green.
10. A snail can sleep for three years.
11. An adult human is made up of approximately 7,000,000,000,000,000,000,000,000,000 atoms.
12. The word "typewriter" can be typed using only the top row of keys on a standard keyboard.
13. Leonardo da Vinci could write with one hand and draw with the other at the same time.
14. An ostrich's eye is bigger than its brain.
15. The largest snowflake on record was 15

In [17]:
seed = 42
iterations = 50

def make_run_name(config):
    return f"{config['prompt'].split()[0]}-run-{config['model']}_{config['seed']}_{config['temperature']}"

dfs = []

api = wandb.Api()
for temperature in [0.0, 0.5, 1.0]:
    for model in ['gpt-4o', 'gpt-3.5-turbo']:
        for prompt in ['Summarize the following: ', 'Rephrase the following: ']:
            config = {
                "prompt": prompt,
                "seed": seed,
                "temperature": temperature,
                "model": model,
                "iterations": iterations,
            }
            # List all W&B runs so far
            existing_run = api.runs(f"bjlkeng/llm_fixed_point",
                                    filters={"display_name": make_run_name(config),}) 
            
            if len(existing_run) > 0:
                print(f"Skipping {make_run_name(config)} that already exists")
                continue
            
            wandb.init(project="llm_fixed_point",
                       name=make_run_name(config),
                       config=config)

            df = run_list_summarize_experiment(data, **config)
            dfs.append((config, df))

            wandb.finish()
            time.sleep(30)

Skipping Summarize-run-gpt-4o_42_0.0 that already exists
Skipping Rephrase-run-gpt-4o_42_0.0 that already exists
Skipping Summarize-run-gpt-3.5-turbo_42_0.0 that already exists
Skipping Rephrase-run-gpt-3.5-turbo_42_0.0 that already exists
Skipping Summarize-run-gpt-4o_42_0.5 that already exists
Skipping Rephrase-run-gpt-4o_42_0.5 that already exists
Skipping Summarize-run-gpt-3.5-turbo_42_0.5 that already exists
Skipping Rephrase-run-gpt-3.5-turbo_42_0.5 that already exists
Skipping Summarize-run-gpt-4o_42_1.0 that already exists
Skipping Rephrase-run-gpt-4o_42_1.0 that already exists
Skipping Summarize-run-gpt-3.5-turbo_42_1.0 that already exists
Skipping Rephrase-run-gpt-3.5-turbo_42_1.0 that already exists
