In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os, time, json
import pandas as pd
from openai import OpenAI
from anthropic import Anthropic
from tqdm.auto import tqdm
import spacy

import sys
sys.path.append("../../")
import os

import logging
from src.utils import logging_utils
from src.utils import env_utils
from src import functional

logger = logging.getLogger(__name__)

logging.basicConfig(
    level=logging.DEBUG,
    format=logging_utils.DEFAULT_FORMAT,
    datefmt=logging_utils.DEFAULT_DATEFMT,
    stream=sys.stdout,
)

import torch
import transformers

logger.info(f"{torch.__version__=}, {torch.version.cuda=}")
logger.info(f"{torch.cuda.is_available()=}, {torch.cuda.device_count()=}, {torch.cuda.get_device_name()=}")
logger.info(f"{transformers.__version__=}")

  from .autonotebook import tqdm as notebook_tqdm


2025-01-29 12:29:35 __main__ INFO     torch.__version__='2.5.0+cu124', torch.version.cuda='12.4'
2025-01-29 12:29:35 __main__ INFO     torch.cuda.is_available()=True, torch.cuda.device_count()=1, torch.cuda.get_device_name()='NVIDIA RTX A6000'
2025-01-29 12:29:35 __main__ INFO     transformers.__version__='4.48.1'


In [78]:
# movie,actor,character
prompt = """
Give me a list of famous writers, as many as you can think of (at least 100). The list should be in array format.

[
    "William Shakespeare",
    "Charles Dickens",
    "George R. R. Martin",
]
"""

In [79]:
from src.functional import ask_claude

response = ask_claude(prompt)
print(response)

2025-01-29 15:27:13 httpx DEBUG    load_ssl_context verify=True cert=None trust_env=True http2=False
2025-01-29 15:27:13 httpx DEBUG    load_verify_locations cafile='/home/local_arnab/miniconda3/envs/retrieval/lib/python3.11/site-packages/certifi/cacert.pem'
2025-01-29 15:27:13 anthropic._base_client DEBUG    Request options: {'method': 'post', 'url': '/v1/messages', 'timeout': 600, 'files': None, 'json_data': {'max_tokens': 4000, 'messages': [{'role': 'user', 'content': [{'type': 'text', 'text': '\nGive me a list of famous writers, as many as you can think of (at least 100). The list should be in array format.\n\n[\n    "William Shakespeare",\n    "Charles Dickens",\n    "George R. R. Martin",\n]\n'}]}], 'model': 'claude-3-5-sonnet-20240620', 'system': 'You are a helpful assistant.', 'temperature': 0}}
2025-01-29 15:27:13 anthropic._base_client DEBUG    Sending HTTP Request: POST https://api.anthropic.com/v1/messages
2025-01-29 15:27:13 httpcore.connection DEBUG    connect_tcp.started

In [80]:
claude_list = [
    "William Shakespeare",
    "Charles Dickens",
    "George R. R. Martin",
    "Jane Austen",
    "Ernest Hemingway",
    "F. Scott Fitzgerald",
    "Virginia Woolf",
    "Mark Twain",
    "Leo Tolstoy",
    "Fyodor Dostoevsky",
    "Gabriel García Márquez",
    "J.K. Rowling",
    "Stephen King",
    "George Orwell",
    "J.R.R. Tolkien",
    "Agatha Christie",
    "Edgar Allan Poe",
    "Oscar Wilde",
    "Emily Brontë",
    "Charlotte Brontë",
    "James Joyce",
    "Franz Kafka",
    "Victor Hugo",
    "Homer",
    "Dante Alighieri",
    "Miguel de Cervantes",
    "William Faulkner",
    "John Steinbeck",
    "Harper Lee",
    "Toni Morrison",
    "Maya Angelou",
    "Sylvia Plath",
    "Emily Dickinson",
    "Walt Whitman",
    "T.S. Eliot",
    "Charles Bukowski",
    "Arthur Conan Doyle",
    "H.G. Wells",
    "Jules Verne",
    "Ray Bradbury",
    "Isaac Asimov",
    "Philip K. Dick",
    "George Eliot",
    "Virginia Woolf",
    "Aldous Huxley",
    "Kurt Vonnegut",
    "Jack Kerouac",
    "Albert Camus",
    "Jean-Paul Sartre",
    "Simone de Beauvoir",
    "Marcel Proust",
    "Gustave Flaubert",
    "Alexandre Dumas",
    "Honoré de Balzac",
    "Émile Zola",
    "Anton Chekhov",
    "Nikolai Gogol",
    "Ivan Turgenev",
    "Boris Pasternak",
    "Vladimir Nabokov",
    "Haruki Murakami",
    "Yukio Mishima",
    "Kazuo Ishiguro",
    "Salman Rushdie",
    "Chinua Achebe",
    "Wole Soyinka",
    "Ngũgĩ wa Thiong'o",
    "Chimamanda Ngozi Adichie",
    "Jorge Luis Borges",
    "Pablo Neruda",
    "Octavio Paz",
    "Isabel Allende",
    "Mario Vargas Llosa",
    "Carlos Fuentes",
    "Julio Cortázar",
    "Roberto Bolaño",
    "Italo Calvino",
    "Umberto Eco",
    "Elena Ferrante",
    "Orhan Pamuk",
    "Naguib Mahfouz",
    "Khalil Gibran",
    "Rabindranath Tagore",
    "Arundhati Roy",
    "Vikram Seth",
    "Jhumpa Lahiri",
    "Khaled Hosseini",
    "Amy Tan",
    "Margaret Atwood",
    "Alice Munro",
    "Michael Ondaatje",
    "Douglas Adams",
    "Terry Pratchett",
    "Neil Gaiman",
    "Ursula K. Le Guin",
    "Arthur C. Clarke",
    "Roald Dahl",
    "C.S. Lewis",
    "Philip Pullman",
    "Suzanne Collins",
    "John Green",
    "Dan Brown",
    "Tom Clancy",
    "John Grisham",
    "Michael Crichton",
    "Ken Follett",
    "Ian McEwan",
    "Zadie Smith",
    "Hilary Mantel",
    "Donna Tartt",
    "David Foster Wallace",
    "Jonathan Franzen",
    "Cormac McCarthy",
    "Don DeLillo",
    "Thomas Pynchon",
    "Junot Díaz",
    "Colson Whitehead",
    "Ta-Nehisi Coates",
    "Roxane Gay",
    "Celeste Ng",
    "Sally Rooney",
    "Elena Ferrante",
    "Karl Ove Knausgård",
    "Roberto Bolaño",
    "Olga Tokarczuk",
    "Hanya Yanagihara",
    "Mohsin Hamid",
    "Yaa Gyasi",
    "Ocean Vuong",
    "Viet Thanh Nguyen",
    "Min Jin Lee",
    "Marlon James",
    "Bernardine Evaristo",
    "George Saunders",
    "Marilynne Robinson",
    "Louise Erdrich",
    "Sherman Alexie",
    "Junot Díaz",
    "Jesmyn Ward",
    "Colson Whitehead",
    "Ta-Nehisi Coates"
]

2025-01-29 15:27:43 httpcore.connection DEBUG    close.started
2025-01-29 15:27:43 httpcore.connection DEBUG    close.complete


In [81]:
from src.functional import ask_gpt4o

response = ask_gpt4o(prompt)
print(response)

2025-01-29 15:27:44 httpx DEBUG    load_ssl_context verify=True cert=None trust_env=True http2=False
2025-01-29 15:27:44 httpx DEBUG    load_verify_locations cafile='/home/local_arnab/miniconda3/envs/retrieval/lib/python3.11/site-packages/certifi/cacert.pem'
2025-01-29 15:27:44 openai._base_client DEBUG    Request options: {'method': 'post', 'url': '/chat/completions', 'files': None, 'json_data': {'messages': [{'role': 'system', 'content': 'You are a helpful assistant.'}, {'role': 'user', 'content': '\nGive me a list of famous writers, as many as you can think of (at least 100). The list should be in array format.\n\n[\n    "William Shakespeare",\n    "Charles Dickens",\n    "George R. R. Martin",\n]\n'}], 'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0}}
2025-01-29 15:27:44 openai._base_client DEBUG    Sending HTTP Request: POST https://api.openai.com/v1/chat/completions
2025-01-29 15:27:44 httpcore.connection DEBUG    connect_tcp.started host='api.openai.com' port=443 local_a

In [82]:
gpt_list = [
    "William Shakespeare",
    "Charles Dickens",
    "George R. R. Martin",
    "Jane Austen",
    "Mark Twain",
    "J.K. Rowling",
    "Ernest Hemingway",
    "F. Scott Fitzgerald",
    "Leo Tolstoy",
    "Fyodor Dostoevsky",
    "Gabriel Garcia Marquez",
    "J.R.R. Tolkien",
    "Agatha Christie",
    "Virginia Woolf",
    "Homer",
    "James Joyce",
    "Franz Kafka",
    "Herman Melville",
    "Emily Dickinson",
    "Edgar Allan Poe",
    "Victor Hugo",
    "Harper Lee",
    "George Orwell",
    "Aldous Huxley",
    "John Steinbeck",
    "Kurt Vonnegut",
    "Toni Morrison",
    "Margaret Atwood",
    "Stephen King",
    "Isaac Asimov",
    "Arthur Conan Doyle",
    "Oscar Wilde",
    "Marcel Proust",
    "Miguel de Cervantes",
    "Dante Alighieri",
    "H.G. Wells",
    "J.D. Salinger",
    "Ralph Ellison",
    "Chinua Achebe",
    "Gabriel Garcia Marquez",
    "Leo Tolstoy",
    "Homer",
    "Herman Melville",
    "Emily Brontë",
    "Charlotte Brontë",
    "Mary Shelley",
    "George Eliot",
    "Louisa May Alcott",
    "Nathaniel Hawthorne",
    "Henry James",
    "Joseph Conrad",
    "Thomas Hardy",
    "Robert Louis Stevenson",
    "Jack London",
    "Jules Verne",
    "Ray Bradbury",
    "Philip K. Dick",
    "Ursula K. Le Guin",
    "C.S. Lewis",
    "Roald Dahl",
    "Lewis Carroll",
    "Hans Christian Andersen",
    "J.M. Barrie",
    "L. Frank Baum",
    "Beatrix Potter",
    "Dr. Seuss",
    "E.B. White",
    "Antoine de Saint-Exupéry",
    "Jorge Luis Borges",
    "Italo Calvino",
    "Umberto Eco",
    "Salman Rushdie",
    "Kazuo Ishiguro",
    "Haruki Murakami",
    "Orhan Pamuk",
    "Gabriel Garcia Marquez",
    "Isabel Allende",
    "Paulo Coelho",
    "Carlos Ruiz Zafón",
    "Khaled Hosseini",
    "Jhumpa Lahiri",
    "Arundhati Roy",
    "Vikram Seth",
    "R.K. Narayan",
    "Rabindranath Tagore",
    "V.S. Naipaul",
    "Chimamanda Ngozi Adichie",
    "Zadie Smith",
    "Ian McEwan",
    "Julian Barnes",
    "Hilary Mantel",
    "Margaret Atwood",
    "Alice Munro",
    "Doris Lessing",
    "Nadine Gordimer",
    "Gabriel Garcia Marquez",
    "Mario Vargas Llosa",
    "Julio Cortázar",
    "Roberto Bolaño",
    "Gabriel Garcia Marquez",
    "Gabriel Garcia Marquez"
]

In [83]:
import json
combined_list = list(set(claude_list + gpt_list))
print(json.dumps(combined_list, indent=4))

[
    "Arundhati Roy",
    "Ursula K. Le Guin",
    "Jack London",
    "Vladimir Nabokov",
    "R.K. Narayan",
    "Dan Brown",
    "Philip K. Dick",
    "Margaret Atwood",
    "Lewis Carroll",
    "Gabriel Garc\u00eda M\u00e1rquez",
    "Donna Tartt",
    "Celeste Ng",
    "Sherman Alexie",
    "Emily Dickinson",
    "Italo Calvino",
    "Kazuo Ishiguro",
    "Julian Barnes",
    "Boris Pasternak",
    "James Joyce",
    "Chimamanda Ngozi Adichie",
    "Jorge Luis Borges",
    "Ian McEwan",
    "Anton Chekhov",
    "Jhumpa Lahiri",
    "Ralph Ellison",
    "Emily Bront\u00eb",
    "Aldous Huxley",
    "F. Scott Fitzgerald",
    "Miguel de Cervantes",
    "Thomas Hardy",
    "Terry Pratchett",
    "Louise Erdrich",
    "Jack Kerouac",
    "Arthur Conan Doyle",
    "Hilary Mantel",
    "Gustave Flaubert",
    "Jesmyn Ward",
    "E.B. White",
    "Ken Follett",
    "Viet Thanh Nguyen",
    "Zadie Smith",
    "V.S. Naipaul",
    "Louisa May Alcott",
    "Bernardine Evaristo",
    "H.G. We