# Get ChatGPT responses

In [3]:
import json
import openai
import os
import pickle
import sys
from tenacity import (
    retry,
    before_sleep_log,
    stop_after_attempt,
    wait_exponential,
)  # for exponential backoff
from tqdm import tqdm

In [4]:
client = openai.OpenAI(
    api_key=os.environ.get("OPENAI_API_KEY"),
)

In [5]:
@retry(wait=wait_exponential(min=10, max=120, exp_base=1.62), stop=stop_after_attempt(10), 
       before_sleep=before_sleep_log(sys.stdout, 20)
      )
def call_create_wrapper(**kwargs):
    return client.chat.completions.create(**kwargs)

In [8]:
with open('../data/search_results/neurips2023_serp_outputs.json') as fin:
    search_results = json.load(fin)

In [51]:
search_results['Moritz Hardt (Max Planck Institute for Intelligent Systems, Tübingen)']['organic_results']

[{'position': 1,
  'title': 'Moritz Hardt',
  'link': 'https://mrtz.org/',
  'redirect_link': 'https://www.google.com/url?sa=t&source=web&rct=j&opi=89978449&url=https://mrtz.org/&ved=2ahUKEwjH06jO5tyEAxW2RTABHRQ8BGMQFnoECA0QAQ',
  'displayed_link': 'https://mrtz.org',
  'snippet': "I'm a director at the Max Planck Institute for Intelligent Systems. Prior to joining the institute, I was Associate Professor for Electrical Engineering and ...",
  'snippet_highlighted_words': ['Max Planck Institute for Intelligent Systems'],
  'missing': ['bio'],
  'must_include': {'word': 'bio',
   'link': 'https://www.google.com/search?safe=active&sca_esv=02097b259e5129ad&q=write+a+%22bio%22+for+Moritz+Hardt+(Max+Planck+Institute+for+Intelligent+Systems,+T%C3%BCbingen)&sa=X&ved=2ahUKEwjH06jO5tyEAxW2RTABHRQ8BGMQ5t4CegQIFBAB'},
  'source': 'Moritz Hardt'},
 {'position': 2,
  'title': 'Moritz Hardt appointed a Director at the Max Planck Institute ...',
  'link': 'https://is.mpg.de/en/news/moritz-hardt-appoi

In [22]:
RESPONSE_MODEL = "gpt-3.5-turbo"
TOP_P=1.0
TEMPERATURE=1.0

search_res_gpt_35 = []
for query in tqdm(search_results, desc='Query'):
    one_res = search_results[query]
    one_q = f"Write a bio for {query}\nSearch Results:\n"
    for osr in one_res['organic_results']:
        one_q += f"title: {osr['title']}\nsnippet: {osr['snippet']}\n---\n"
    
    messages = [
                {
                    "role": "system",
                    "content": "Write a short response (one paragraph) using only the information available in the search results."
                },
                {
                    "role": "user",
                    "content": one_q
                }
            ]
    
    # print(messages[0])
    # print(messages[1]['content'])
    # break
    response = call_create_wrapper(
                model=RESPONSE_MODEL,
                messages=messages,
                n=1,
                temperature=TEMPERATURE,
                top_p=TOP_P,
                logprobs=True
    )
    out_to_save = response.dict().copy()
    out_to_save['query'] = query
    out_to_save['organic_results'] = one_res['organic_results']
    search_res_gpt_35.append(out_to_save)
    # print(response)
    # break

Query: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 53/53 [01:39<00:00,  1.87s/it]


In [23]:
with open("../outputs/gpt_35_turbo_gen/neurips2023_serp_gen.pkl", 'wb') as fout:
    pickle.dump(search_res_gpt_35, fout)

## Study generations

In [29]:
import numpy as np

In [32]:
rng = np.random.default_rng(42)

In [33]:
rand_idx = rng.choice(len(search_res_gpt_35), 10)

In [54]:
def display_output(one_out):
    print(f"Query: {one_out['query']}\n\n")
    print(f"Search Results:\n")
    for osr in one_out['organic_results']:
        print(f"title: {osr['title']}\nsnippet: {osr['snippet']}\n---\n")
    print(f"\n{one_out['choices'][0]['message']['content']}\n---\n")
    for one_tok in one_out['choices'][0]['logprobs']['content']:
        print(f"{one_tok['token']}\t{one_tok['logprob']}\n")
    print('\n++++\n')

In [55]:
for viz_idx in rand_idx:
    display_output(search_res_gpt_35[viz_idx])

Query: Moritz Hardt (Max Planck Institute for Intelligent Systems, Tübingen)


Search Results:

title: Moritz Hardt
snippet: I'm a director at the Max Planck Institute for Intelligent Systems. Prior to joining the institute, I was Associate Professor for Electrical Engineering and ...
---

title: Moritz Hardt appointed a Director at the Max Planck Institute ...
snippet: Tübingen – Computer scientist Moritz Hardt, Ph.D., has accepted a call to the Max Planck Institute for Intelligent Systems. He joined the ...
---

title: Moritz Hardt
snippet: Education & Career History. Principal Researcher. Max-Planck-Institute for Intelligent Systems, Max-Planck Institute ... Select a topic or type what you need help ...
---

title: Moritz Hardt | EECS at UC Berkeley
snippet: Moritz Hardt is an Associate Professor in the Department of Electrical Engineering and Computer Sciences at the University of California, Berkeley.
---

title: Discovering discrimination in supervised learning
snippet: Biography