In [1]:
from datetime import datetime
import json
import os
import pickle
import re
import requests

In [2]:
from tqdm.notebook import tqdm

In [3]:
try:
  import cohere
except:
  raise RuntimeError("Cohere library is not installed")

In [4]:
if not os.getenv("YOU_API_KEY") or not os.getenv("COHERE_API_KEY"):
  raise RuntimeError("You need to set both YOU_API_KEY and COHERE_API_KEY environment variables to proceed")

In [5]:
cohere_client = cohere.Client(os.getenv("COHERE_API_KEY"))

## Read 20 queries from AllSouls and DaVinciDebate

In [6]:
all_souls_queries = []
with open('../evaluating-verifiability-in-generative-search-engines/allsouls/allsouls.jsonl') as fin:
    for line in fin:
        all_souls_queries.append(json.loads(line.strip()))

In [7]:
# Subsample 10 queries
re_you = re.compile(r"\byou\b", re.I)
re_your = re.compile(r"\byour\b", re.I)
all_souls_queries = [q for q in all_souls_queries if not re_you.findall(q['prompt_text']) and not re_your.findall(q['prompt_text'])]

all_souls_queries = all_souls_queries[-10:]

In [8]:
all_souls_queries = [{"query": q['prompt_text'], "id": q['exam_name'] + ' ' + q['prompt_id'], "source": "allsouls"} for q in all_souls_queries]
all_souls_queries

[{'query': 'Should a political community be governed in the same way as a family?',
  'id': '2022 General Paper II 18',
  'source': 'allsouls'},
 {'query': 'What is the value in precision?',
  'id': '2022 General Paper II 19',
  'source': 'allsouls'},
 {'query': 'Does the ability to think presuppose linguistic competence?',
  'id': '2022 General Paper II 20a',
  'source': 'allsouls'},
 {'query': 'Does linguistic competence presuppose the ability to think?',
  'id': '2022 General Paper II 20b',
  'source': 'allsouls'},
 {'query': 'Ought everyone to pursue the most morally valuable career possible for them?',
  'id': '2022 General Paper II 21',
  'source': 'allsouls'},
 {'query': 'Is God gendered?',
  'id': '2022 General Paper II 23',
  'source': 'allsouls'},
 {'query': 'Should universities accept donations from oligarchs?',
  'id': '2022 General Paper II 24',
  'source': 'allsouls'},
 {'query': 'Should strip-searches be banned?',
  'id': '2022 General Paper II 25',
  'source': 'allsouls

In [9]:
debate_queries = []
with open('../evaluating-verifiability-in-generative-search-engines/davinci_debate/davinci-debate-1k.jsonl') as fin:
    for line_ctr, line in enumerate(fin):
        one_query = json.loads(line.strip())
        one_query["id"] = str(line_ctr)
        one_query["source"] = "davinci_debate"
        debate_queries.append(one_query)

In [10]:
# Subsample 10 queries
re_you = re.compile(r"\byou\b", re.I)
re_your = re.compile(r"\byour\b", re.I)
debate_queries = [q for q in debate_queries if not re_you.findall(q['query']) and not re_your.findall(q['query'])]

debate_queries = debate_queries[-10:]

In [11]:
debate_queries

[{'query': 'Should genetically modified foods be more tightly regulated?',
  'id': '990',
  'source': 'davinci_debate'},
 {'query': 'Should drugs be legalized?',
  'id': '991',
  'source': 'davinci_debate'},
 {'query': 'Should foreign corporate ownership of U.S. companies be restricted?',
  'id': '992',
  'source': 'davinci_debate'},
 {'query': 'Should the use of animals for medical research be banned?',
  'id': '993',
  'source': 'davinci_debate'},
 {'query': 'Should genetically modified foods be allowed in grocery stores?',
  'id': '994',
  'source': 'davinci_debate'},
 {'query': 'Should lawmakers be subject to term limits?',
  'id': '995',
  'source': 'davinci_debate'},
 {'query': 'Should countries practice universal basic income?',
  'id': '996',
  'source': 'davinci_debate'},
 {'query': 'Should people be able to grow their own food in metro cities?',
  'id': '997',
  'source': 'davinci_debate'},
 {'query': 'Should governments provide universal basic income?',
  'id': '998',
  'sou

In [12]:
all_queries = all_souls_queries + debate_queries
print(len(all_queries))

20


## Run Cohere search query generation (skip for now)

In [52]:
# # Trial
# first_query_sr_command = cohere_client.chat(
#     model='command',
#     message=all_queries[0]["query"],
#     search_queries_only=True
# )

In [55]:
# first_query_sr_command_r = cohere_client.chat(
#     model='command-r',
#     message=all_queries[0]["query"],
#     search_queries_only=True
# )

In [68]:
# first_query_sr_command

cohere.Chat {
	id: 711ac97b-6fc4-43ee-93ad-ca88258b371e
	response_id: 711ac97b-6fc4-43ee-93ad-ca88258b371e
	generation_id: 6ee025c4-5561-4a00-a684-e8e8484e4f84
	message: Should a political community be governed in the same way as a family?
	text: 
	conversation_id: None
	prompt: None
	chat_history: None
	preamble: None
	client: <cohere.client.Client object at 0x7f1203e1e160>
	token_count: {'response_tokens': 16, 'billed_tokens': 16}
	meta: {'api_version': {'version': '1'}, 'billed_units': {'output_tokens': 16}}
	is_search_required: True
	citations: None
	documents: None
	search_results: None
	search_queries: [{'text': 'Should a political community be governed in the same way as a family?', 'generation_id': '6ee025c4-5561-4a00-a684-e8e8484e4f84'}]
	tool_calls: None
	finish_reason: COMPLETE
}

In [69]:
# first_query_sr_command_r

cohere.Chat {
	id: bc05b8bb-3928-4dae-a871-71a9f5d58a02
	response_id: bc05b8bb-3928-4dae-a871-71a9f5d58a02
	generation_id: 19c095d0-0179-4903-b425-d2f7c9feb6d9
	message: Should a political community be governed in the same way as a family?
	text: 
	conversation_id: None
	prompt: None
	chat_history: None
	preamble: None
	client: <cohere.client.Client object at 0x7f1203e1e160>
	token_count: {'response_tokens': 12, 'billed_tokens': 12}
	meta: {'api_version': {'version': '1'}, 'billed_units': {'output_tokens': 12}}
	is_search_required: True
	citations: None
	documents: None
	search_results: None
	search_queries: [{'text': 'political community governance like family', 'generation_id': '19c095d0-0179-4903-b425-d2f7c9feb6d9'}, {'text': 'should countries be run like families', 'generation_id': '19c095d0-0179-4903-b425-d2f7c9feb6d9'}]
	tool_calls: None
	finish_reason: COMPLETE
}

In [72]:
# type(first_query_sr_command.meta)

dict

In [None]:
# for one_query in all_queries:
#     one_ex_search_queries = cohere_client.chat(
#         model='command-r',
#         message=one_query["query"],
#         search_queries_only=True
#         )
#     one_query["separate_search_queries"] = one_ex_search_queries.search_queries
#     one_query["separate_search_queries_meta"] = one_ex_search_queries.meta

## Run You.com search

In [77]:
def get_ai_snippets_for_query(query):
    headers = {"X-API-Key": os.environ["YOU_API_KEY"]}
    results = requests.get(
        f"https://api.ydc-index.io/search?query={query}",
        headers=headers,
    ).json()

    return results

#### Trial

In [74]:
first_query_sr_you = get_ai_snippets_for_query(all_queries[0]["query"])

In [76]:
print(json.dumps(first_query_sr_you, indent=2))

{
  "hits": [
    {
      "description": "It has been universally remarked that in our time the several members of a family stand upon an entirely new footing toward each other, that the distance which formerly separated a father from his son",
      "snippets": [
        "Thus the parent has not only a natural right, but he acquires a political right to command them: he is the author and the support of his family, but he is also its constituted ruler. In democracies, where the government picks out every individual singly from the mass to make him subservient to the general laws of the community, no such intermediate person is required: a father is there, in the eye of the law, only a member of the community, older and richer than his sons.",
        "Democracy loosens social ties, but it draws the ties of nature more tight; it brings kindred more closely together, while it places the various members of the community more widely apart. ... From Democracy in America. At the age of twent

In [120]:
list_of_search_results = []
for one_hit in first_query_sr_you["hits"]:
    list_of_search_results.append({"title": one_hit["title"], "snippet": one_hit["description"]})
    list_of_search_results.extend([{"title": one_hit["title"], "snippet": one_snippet} for one_snippet in one_hit["snippets"]])
print(list_of_search_results)
full_text_of_search_results = "\n".join([f"title: {osr['title']}\ntext: {osr['snippet']}" for osr in list_of_search_results[:20]])
print(len(re.findall(r'\w+|[^\s\w]+', full_text_of_search_results)))
print(len(list_of_search_results), len(full_text_of_search_results))

[{'title': 'Family Politics | Lapham’s Quarterly', 'snippet': 'It has been universally remarked that in our time the several members of a family stand upon an entirely new footing toward each other, that the distance which formerly separated a father from his son'}, {'title': 'Family Politics | Lapham’s Quarterly', 'snippet': 'Thus the parent has not only a natural right, but he acquires a political right to command them: he is the author and the support of his family, but he is also its constituted ruler. In democracies, where the government picks out every individual singly from the mass to make him subservient to the general laws of the community, no such intermediate person is required: a father is there, in the eye of the law, only a member of the community, older and richer than his sons.'}, {'title': 'Family Politics | Lapham’s Quarterly', 'snippet': 'Democracy loosens social ties, but it draws the ties of nature more tight; it brings kindred more closely together, while it plac

#### End Trial

In [88]:
for one_query in tqdm(all_queries):
    ex_you_search_results = get_ai_snippets_for_query(one_query["query"])
    ex_you_search_results["datetime"] = datetime.now().isoformat()
    one_query["raw_search_results"] = ex_you_search_results
    list_of_search_results = []
    for one_hit in ex_you_search_results["hits"]:
        list_of_search_results.append({"title": one_hit["title"], "snippet": one_hit["description"]})
        list_of_search_results.extend([{"title": one_hit["title"], "snippet": one_snippet} for one_snippet in one_hit["snippets"]])
    one_query["search_results"] = list_of_search_results

  0%|          | 0/20 [00:00<?, ?it/s]

In [93]:
for one_query in all_queries:
    # print(one_query["query"])
    # print([osr['title'] for osr in one_query["search_results"]])
    print('n_snippets:', len(one_query["search_results"]))
    print('---')

n_snippets: 80
---
n_snippets: 79
---
n_snippets: 87
---
n_snippets: 78
---
n_snippets: 87
---
n_snippets: 97
---
n_snippets: 60
---
n_snippets: 77
---
n_snippets: 86
---
n_snippets: 65
---
n_snippets: 90
---
n_snippets: 82
---
n_snippets: 94
---
n_snippets: 78
---
n_snippets: 82
---
n_snippets: 86
---
n_snippets: 73
---
n_snippets: 88
---
n_snippets: 77
---
n_snippets: 70
---


In [100]:
with open("../outputs/cohere_you_20nelsonqueries/search_results.json", 'w') as fout:
    json.dump(all_queries, fout, indent=2)

## Run Cohere Command-R RAG

#### Trial

In [101]:
first_query_res_command_r = cohere_client.chat(
          model="command-r",
          message=all_queries[0]["query"],
          documents=all_queries[0]["search_results"][:20]
    )

In [102]:
first_query_res_command_r

cohere.Chat {
	id: 907c342c-72d1-4427-9859-aa0e25a638fa
	response_id: 907c342c-72d1-4427-9859-aa0e25a638fa
	generation_id: 6f35620e-6441-4337-a46d-ec2b6fb53c17
	message: Should a political community be governed in the same way as a family?
	text: The idea that a political community should be governed in the same way as a family is a theory of political philosophy called 'Family as a model for the state'. This theory explains the structure of certain kinds of state in terms of the structure of the family. 

Aristotle, in his Politics, was a strong advocate for this approach, stating that "the government of a household is a monarchy, since every house is governed by a single ruler." He also said that husbands exercise a republican government over their wives and monarchical government over their children. Aristotle's view implies that the state is a reflection of the patriarchal family, much like a king is reflected by his subjects. 

This theory has been largely employed by monarchists 

In [108]:
len(first_query_res_command_r.documents)

7

In [111]:
import time

In [136]:
all_outputs, all_outputs_raw = [], []
for one_query in tqdm(all_queries):
    st_time = time.time()
    one_generation = cohere_client.chat(
          model="command-r",
          message=one_query["query"],
          documents=one_query["search_results"][:20],
          k=2
    )
    one_out, one_out_raw = one_query.copy(), one_query.copy()
    del one_out_raw["raw_search_results"]
    one_out_raw['raw_cohere_gen'] = one_generation.__repr__()
    all_outputs_raw.append(one_out_raw)

    del one_out["raw_search_results"]
    one_out["citations"] = one_generation.citations
    one_out["used_documents"] = one_generation.documents
    one_out["response"] = one_generation.text
    one_out["meta"] = one_generation.meta
    all_outputs.append(one_out)

    # Rate limit
    time.sleep(max(0.0, 12 - (time.time() - st_time)))

  0%|          | 0/20 [00:00<?, ?it/s]

In [138]:
for one_query in all_outputs:
    print(one_query["query"])
    print(one_query["response"])
    # print(one_query["meta"])
    print('n_snippets:', len(one_query["used_documents"]))
    print('---')

Should a political community be governed in the same way as a family?
The idea that a political community should be governed in the same way as a family is a theory of political philosophy called 'Family as a model for the state'. This theory explains the structure of certain kinds of state in terms of the structure of the family. 

Aristotle, in his Politics, was a strong advocate for this approach, stating that "the government of a household is a monarchy, since every house is governed by a single ruler." He also said that husbands exercise a republican government over their wives and monarchical government over their children. Aristotle's view implies that the state is a reflection of the patriarchal family, much like a king is reflected by his subjects. 

This theory has been largely employed by monarchists and aristocratics to mirror the hierarchical dynamics of the family unit. In aristocratic nations, the social institutions only recognise the father as the authority figure; thu

In [139]:
# for one_query in all_outputs_raw:
#     one_query['raw_cohere_gen'] = one_query['raw_cohere_gen'].__repr__()

with open('../outputs/cohere_you_20nelsonqueries/cohere_topk2_you_20snippet_raw.pkl', 'wb') as fout:
    pickle.dump(all_outputs_raw, fout)

In [141]:
with open('../outputs/cohere_you_20nelsonqueries/cohere_topk2_you_20snippet.json', 'w') as fout:
    json.dump(all_outputs, fout, indent=2)

#### Analyze outputs

In [13]:
with open('../outputs/cohere_you_20nelsonqueries/cohere_topk2_you_20snippet.json', 'r') as fin:
    all_outputs = json.load(fin)

In [31]:
out_str_list = []
for one_query in all_outputs:
    out_str_list.append(f"Query: {one_query['query']}\n")
    out_str_list.append(f"Response:\n{one_query['response']}\n")
    out_str_list.append('\nCitations:')
    out_str_list.extend([f"text: {one_cit['text']}\tdocs:{one_cit['document_ids']}" for one_cit in one_query["citations"]])
    
    out_str_list.append('\nSnippets:')
    out_str_list.extend([f"doc_id: {one_doc['id']}\ttitle:{one_doc['title']}\nsnippet:{one_doc['snippet']}\n" for one_doc in one_query["used_documents"]])
    
    out_str_list.append('\nAll Snippets:')
    out_str_list.extend([f"title:{one_doc['title']}\nsnippet:{one_doc['snippet']}\n" for one_doc in one_query["search_results"][:20]])
    out_str_list.append('---\n')

In [32]:
with open('../outputs/cohere_you_20nelsonqueries/cohere_topk2_you_20snippet_viz.txt', 'w') as fout:
    fout.write('\n'.join(out_str_list))