In [7]:
import json
import os
import time
import openai

openai.api_key = os.getenv("OPENAI_API_KEY")

In [8]:
def link_snippets(logs):
    # Load messages array
    messages = logs['messages']

    # Create a string combining messages
    refactored_msgs = []
    for message in messages:
        msg = ''
        # Concatenate ['author']['name']: ['content']
        msg += f"{message['author']['name']}: {message['content']}"

        # Concatenate title and description for each embed
        for embed in message['embeds']:
            desc = embed['description']
            # strip all urls from the description
            for word in desc.split():
                if 'http' in word:
                    desc = desc.replace(word, '')

            msg += f"\nLink: \"{embed['title']}\" Description: \"{desc}\""

        refactored_msgs.append(msg)

    link_indices = []
    for i, message in enumerate(refactored_msgs):
        if 'http' in message:
            link_indices.append(i)

    link_contexts = []
    for index in link_indices:
        link_context = ''
        link_context_count = 0
        # Check forward
        for i in range(index, min(index+17, len(refactored_msgs))):
            if 'http' in refactored_msgs[i]:
                break
            link_context += refactored_msgs[i] + '\n\n\n'
            link_context_count += 1
            if link_context_count >= 16:
                break
        link_context += refactored_msgs[index] + '\n\n\n'
        # Check backward in reverse
        for i in range(index-1, max(index-17, -1), -1):
            if 'http' in refactored_msgs[i]:
                break
            link_context += refactored_msgs[i] + '\n\n\n'
            link_context_count += 1
            if link_context_count >= 16:
                break
        link_contexts.append(link_context)

    return link_contexts


# call link_snippets for each json file in ./discord_logs, and concatenate the resulting array to ./links.json
def link_snippets_all():
    link_contexts = []
    for filename in os.listdir('./discord_logs'):
        if filename.endswith('.json'):
            with open(f'./discord_logs/{filename}', 'r') as f:
                logs = json.load(f)
                link_contexts += link_snippets(logs)
    with open('./links.json', 'w') as f:
        json.dump(link_contexts, f)

link_snippets_all()


In [None]:
# load links.json, then, for each string in the list, generate a summary with gpt3, and add the link context + summary pair to a new json file called link_pairs.json

def summarize(context):
  return openai.ChatCompletion.create(
      model="gpt-3.5-turbo",
      messages=[
        {
          "role": "system",
          "content": "I'd like you to help the user summarize the context of linked content in the chat snippet."
        },
        {
          "role": "user",
          "content": "snippet:\n```hazelwillow: https://thenextweb.com/news/new-experiment-demonstrates-reality-might-actually-be-real @Goldsteel, I don\'t know if this falls in the scope of your studies, but I figured I\'d ask if this is something you\'d be willing to lecture us about in VRC at some point?\nLink: \"New experiment demonstrates that reality might actually be real\" Description: \"What if reality wasn\'t real? Can we prove it? A team of physicists may have just brought us a little closer to the answer.\"\n\n\nhazelwillow: Exploring the intersection of atypical neurology and BCIs, and ethical considerations as to development of BCIs for minority populations? Panel discussion style\n\n\nyellow0720: :duckthumbs:\n\n\nyellow0720: hamp appreciation presentation owo\n\n\n_hamp: Do you have a project, an idea, or just something that you think is cool and interesting, that you\'d like to give a short talk or a presentation on at one of our meetups? If so then please share your idea briefly here, or send it to me via DM. \n\nYour presentation can be about anything that is at least somewhat related to transhumanism. Short presentations around 3-7 minutes long are usually preferred. Please don\'t worry too much about your presentation not being good enough, even super simple talks are very welcome!\n\nIf we like your idea then we\'ll gladly give you the floor at one of our meetups to give your presentation 😊```\n\n\n\n summarize the linked content in the chat snippet in 10 words or less. Do not mention the users involved in the conversation, only summarize information about the content. Avoid any direct copying existing link descriptions."
        },
        {
          "role": "assistant",
          "content": "an experiment exploring the reality of existence"
        },
        {
          "role": "user",
          "content": f"snippet:\n```{context}```\n\n\n\n summarize the linked content in the chat snippet in 10 words or less. Do not mention the users involved in the conversation, only summarize information about the content. Avoid any direct copying existing link descriptions."
        }
      ],
      temperature=1,
      max_tokens=256,
      top_p=1,
      frequency_penalty=0,
      presence_penalty=0,
      n=4
    )

with open('links.json') as f:
  links = json.load(f)

link_pairs = []
for link in links[1:]:
  response = None
  while response is None:
    try:
      response = summarize(link)
    except Exception as e:
      print("error")
      print(e)
      time.sleep(10)
    
  summaries = []
  for choice in response.choices:
    summaries.append(choice.message.content)
  print(summaries)

  link_pairs.append({
      "link": link,
      "summaries": summaries
  })
    
  with open('link_pairs.json', 'w') as f:
    json.dump(link_pairs, f)




In [6]:
print(len(link_pairs))

43
