In [7]:
%load_ext autoreload
%autoreload 2
    
import json
import os
import time
import openai
import pandas as pd
from tqdm.notebook import tqdm
from openai_interface import GPT_Turbo
import tiktoken
pd.set_option('display.max_rows', 400)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
data_path = './data/first107.json'
with open(data_path) as f:
    data = json.load(f)

In [49]:
openai.api_key = os.environ['OPENAI_API_KEY']
encode = tiktoken.encoding_for_model('gpt-3.5-turbo-0613')

In [59]:
gpt = GPT_Turbo(model='gpt-3.5-turbo-16k')

In [54]:
system_msg = 'You are an expert at creating summaries of podcast shows'

prompt = "Given a podcast episode transcript surrounded by triple backticks, and the guest of the show, write a summary of the episode between 500 and 750 tokens in length:\n\
Guest: {guest}\n\
```{transcript}```\
"

system_msg_count = len(encode.encode(system_msg))
prompt_count = len(encode.encode(prompt))
total_input = system_msg_count + prompt_count
total_input

55

In [89]:
def single_sum(num: int, prompt: str=prompt, temp: float=0.0):
    d = data[num]
    content = ' '.join(d['content'].split()[:6000])
    guest = d['guest']
    summary_prompt = prompt.format(guest=guest, transcript=content)
    try:
        response = gpt.get_completion_from_messages(prompt=summary_prompt, temperature=temp, system_message=system_msg)
        return response
    except Exception as e:
        return e

In [90]:
def update(num: int, response: str):
    data[num]['summary'] = response

## Sequential Processing

In [92]:
for num in range(161, 162):
    guest = data[num]['guest']
    resp = single_sum(num, temp=0.0)
    print(f'GUEST: {guest}')
    print()
    print(f'SUMMARY: {resp}')
    # update(num, resp)

GUEST: Gary Litefoot Davis

SUMMARY: In this episode of Impact Theory, host Tom Bilyeu interviews Gary Litefoot Davis, a multi-talented individual who has achieved success as a rapper, actor, author, entrepreneur, and political figure. Davis shares his unconventional journey to success, which began with his family experiencing financial collapse. Despite the hardships, Davis learned the value of hard work and resilience from his father and grandfather. He emphasizes the importance of taking responsibility for one's own success and setting aggressive timelines to achieve goals. Davis also discusses the need to lead by example and the power of staying focused and moving forward, even in the face of adversity. He encourages individuals to break down their goals into manageable steps and learn from successful people in their field. Davis believes in the importance of self-belief and self-worth, especially for Native Americans, and aims to inspire others through his music and storytelling. 

In [93]:
update(num, resp)

In [34]:
data[num]['summary'], data[num]['guest']

("digest everything that I've read now, I'm really struck by the idea that if we're evolving culturally, we need to be certain that we're not limiting ourselves based on previously held beliefs. So when I think about this, if there is something that could be designed into the system that would allow people to be more loosely connected, where they would have access to information that's otherwise quite difficult to come by, and that would lead to faster growth and development, if you're growing and evolving at a faster rate, rates of self-evolution, you're more likely to be able to solve problems. So I think consumer capitalism, and I'm going to speak pretty roughly here, but it fixates people on things that are, they're not purpose built to solve. So the reason that I think is this immersion in consumer capitalism is leading people away from their sense of purpose, and it's making them less happy, and it's putting them into a position where they're less capable of solving problems. So 

In [110]:
contents = [d['content'] for d in data]
sums = [d['summary'] for d in data]

In [112]:
for d in data:
    del d['content']

In [115]:
for i, d in enumerate(data):
    d['content'] = contents[i]

In [119]:
with open('final_copy.json', 'w') as f:
    json.dump(data, f)

## Batch Processing

In [84]:
for i, sum in enumerate(summaries[28:], 162):
    update(i, sum)

In [88]:
data[161]['guest']

'Gary Litefoot Davis'

In [60]:
summaries = []
token_count = 0
rate_limit = 80000
batch = 1
start = 134
pause = 30
for i, d in enumerate(tqdm(data[start:]), start):
    content = ' '.join(d['content'].split()[:5000])
    input_count = len(encode.encode(content))
    token_count += (input_count + total_input)
    # print(f'Total Token Count: {token_count}')
    if token_count > rate_limit * batch:
        print(f'Bumping against Token Limit sleeping for {pause} seconds...')
        time.sleep(pause)
        batch += 1
    guest = d['guest']
    summary_prompt = prompt.format(guest=guest, transcript=content)
    try:
        response = gpt.get_completion_from_messages(prompt=summary_prompt, system_message=system_msg)
        summaries.append(response)
        print(f'EPISODE: {i}')
        print(f'GUEST: {guest}')
        print(f'SUMMARY: {response}')
        print('\n\n')
    except Exception as e:
        print(e)
        summaries.append(f'Error: {e}')

  0%|          | 0/250 [00:00<?, ?it/s]

EPISODE: 134
GUEST: Tom Bilyeu
SUMMARY: In this podcast episode, Tom Bilyeu, co-founder of Impact Theory and Quest Nutrition, shares his insights on how to build an unbreakable mindset. He emphasizes the importance of taking ownership of one's life and not tolerating weakness. Bilyeu believes that life is tough, but individuals have a choice to be strong and take responsibility for their own success. He debunks the idea of a fixed mindset and encourages listeners to constantly seek ways to improve and learn. Bilyeu also discusses the cost of weakness, which includes a loss of self-respect and a lack of progress in life. On the other hand, he highlights the benefits of strength, such as earning respect and achieving fulfillment. Bilyeu provides concrete steps to develop an unbreakable mindset, including owning one's life, creating new values, building a value stack, and developing mental toughness. He emphasizes the importance of self-reliance, self-respect, growth, and resilience in th

In [61]:
len(summaries)

250

In [48]:
for i, sum in enumerate(data, 134):
    print()
    print(f'Guest: {data[i]["guest"]}')
    print(f'Episode #: {i}')
    print()
    print(f'Summary: {data[i]["summary"]}')
    print('-'*150)


Guest: Tom Bilyeu
Episode #: 134

Summary: In this episode, Tom Bilyeu, co-founder of Impact Theory and Quest Nutrition, shares his personal journey of overcoming emotional weakness and insecurity to achieve success. He emphasizes the importance of building a mindset that makes one unbreakable and unstoppable. Bilyeu discusses the cost of weakness, the value of strength, and the need to take complete ownership of one's life. He highlights the significance of developing a growth mindset, challenging fixed beliefs, and pursuing fulfillment over external success. Bilyeu also stresses the importance of self-reliance, self-respect, growth, and resilience as key values to become unstoppable. He encourages listeners to develop mental toughness, grit, and the ability to move towards fear. Throughout the episode, Bilyeu provides practical steps and strategies for individuals to take ownership of their lives and build the mindset needed to achieve their goals. He also offers a free workshop on 

KeyError: 'summary'

In [377]:
for i, num in enumerate(range(89,97)):
    print(i, num)
    data[num]['summary'] = summaries[i]

0 89
1 90
2 91
3 92
4 93
5 94
6 95
7 96


In [40]:
with open('first133.json', 'w') as f:
    json.dump(data, f)

In [17]:
########################
# START YOUR CODE HERE #
########################
# Replace None with your code
import unittest
from testing import test_lora_config_initialization

def create_lora_config():
    lora_config = {'r': 8, 'lora_alpha': 16}

    return lora_config

######################
# END YOUR CODE HERE #
######################

In [20]:
#test function
test_lora_config_initialization(create_lora_config)

'All tests passed!'

Bad pipe message: %s [b"\xffS8\x816\xb9\xeb\xb9\x9b3\x18L\x05yA\xc7\xf4\xc6\x00\x00|\xc0,\xc00\x00\xa3\x00\x9f\xcc\xa9\xcc\xa8\xcc\xaa\xc0\xaf\xc0\xad\xc0\xa3\xc0\x9f\xc0]\xc0a\xc0W\xc0S\xc0+\xc0/\x00\xa2\x00\x9e\xc0\xae\xc0\xac\xc0\xa2\xc0\x9e\xc0\\\xc0`\xc0V\xc0R\xc0$\xc0(\x00k\x00j\xc0#\xc0'\x00g\x00@\xc0\n\xc0\x14\x009\x008\xc0\t\xc0\x13\x003\x002\x00\x9d\xc0\xa1\xc0\x9d\xc0Q\x00\x9c\xc0\xa0\xc0\x9c\xc0P\x00=\x00<\x005\x00/\x00\x9a\x00\x99\xc0\x07\xc0\x11\x00\x96\x00\x05\x00\xff\x01\x00\x00j\x00\x00\x00\x0e\x00\x0c\x00\x00\t127.0.0.1\x00\x0b\x00\x04\x03\x00\x01\x02\x00\n\x00\x0c\x00\n\x00\x1d\x00\x17\x00\x1e\x00\x19\x00\x18\x00#\x00\x00\x00\x16\x00\x00\x00\x17\x00\x00\x00\r\x000\x00.\x04\x03\x05\x03\x06\x03\x08\x07\x08\x08\x08\t\x08\n\x08\x0b\x08\x04\x08\x05\x08\x06\x04\x01\x05\x01\x06\x01\x03\x03\x02\x03\x03\x01", b'\x03', b'\x02']
Bad pipe message: %s [b'\x05\x02\x06']
Bad pipe message: %s [b"a{\xf8\x9d\x16/\x08\xf3\xad\x97\xab\xac3\xb8\xd1\xed\xe9\xc3\x00\x00\xa6\xc0,\xc00\x00\x