In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os, time, json
import pandas as pd
from openai import OpenAI
from anthropic import Anthropic
from tqdm.auto import tqdm
import spacy

import sys
sys.path.append("../../")
import os

import logging
from src.utils import logging_utils
from src.utils import env_utils
from src import functional

logger = logging.getLogger(__name__)

logging.basicConfig(
    level=logging.DEBUG,
    format=logging_utils.DEFAULT_FORMAT,
    datefmt=logging_utils.DEFAULT_DATEFMT,
    stream=sys.stdout,
)

import torch
import transformers

logger.info(f"{torch.__version__=}, {torch.version.cuda=}")
logger.info(f"{torch.cuda.is_available()=}, {torch.cuda.device_count()=}, {torch.cuda.get_device_name()=}")
logger.info(f"{transformers.__version__=}")

  from .autonotebook import tqdm as notebook_tqdm


2025-01-30 17:20:33 __main__ INFO     torch.__version__='2.5.0+cu124', torch.version.cuda='12.4'
2025-01-30 17:20:33 __main__ INFO     torch.cuda.is_available()=True, torch.cuda.device_count()=1, torch.cuda.get_device_name()='NVIDIA RTX A6000'
2025-01-30 17:20:33 __main__ INFO     transformers.__version__='4.48.1'


In [3]:
# movie,actor,character
prompt = """
Give me a list of baseball players, as many as you can think of (at least 100). The list should be in array format.

[
    "Babe Ruth",
    "Hank Aaron",
    "Willie Mays",
]
"""

In [4]:
from src.functional import ask_claude

response = ask_claude(prompt)
print(response)

2025-01-30 17:20:35 httpx DEBUG    load_ssl_context verify=True cert=None trust_env=True http2=False
2025-01-30 17:20:35 httpx DEBUG    load_verify_locations cafile='/home/local_arnab/miniconda3/envs/retrieval/lib/python3.11/site-packages/certifi/cacert.pem'


2025-01-30 17:20:35 anthropic._base_client DEBUG    Request options: {'method': 'post', 'url': '/v1/messages', 'timeout': 600, 'files': None, 'json_data': {'max_tokens': 4000, 'messages': [{'role': 'user', 'content': [{'type': 'text', 'text': '\nGive me a list of baseball players, as many as you can think of (at least 100). The list should be in array format.\n\n[\n    "Babe Ruth",\n    "Hank Aaron",\n    "Willie Mays",\n]\n'}]}], 'model': 'claude-3-5-sonnet-20240620', 'system': 'You are a helpful assistant.', 'temperature': 0}}
2025-01-30 17:20:35 anthropic._base_client DEBUG    Sending HTTP Request: POST https://api.anthropic.com/v1/messages
2025-01-30 17:20:35 httpcore.connection DEBUG    connect_tcp.started host='api.anthropic.com' port=443 local_address=None timeout=600 socket_options=None
2025-01-30 17:20:35 httpcore.connection DEBUG    connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x7f4e34134110>
2025-01-30 17:20:35 httpcore.connection DEBUG    

In [5]:
claude_list = [
    "Babe Ruth",
    "Hank Aaron",
    "Willie Mays",
    "Ted Williams",
    "Barry Bonds",
    "Ty Cobb",
    "Mickey Mantle",
    "Stan Musial",
    "Lou Gehrig",
    "Joe DiMaggio",
    "Jackie Robinson",
    "Nolan Ryan",
    "Cy Young",
    "Walter Johnson",
    "Honus Wagner",
    "Ken Griffey Jr.",
    "Pete Rose",
    "Greg Maddux",
    "Randy Johnson",
    "Roger Clemens",
    "Mike Schmidt",
    "Cal Ripken Jr.",
    "Derek Jeter",
    "Rickey Henderson",
    "Yogi Berra",
    "Roberto Clemente",
    "Sandy Koufax",
    "Reggie Jackson",
    "Frank Robinson",
    "Ernie Banks",
    "Tom Seaver",
    "Bob Gibson",
    "Johnny Bench",
    "Mike Trout",
    "Albert Pujols",
    "Ichiro Suzuki",
    "Mariano Rivera",
    "Pedro Martinez",
    "Chipper Jones",
    "Tony Gwynn",
    "Rod Carew",
    "Wade Boggs",
    "George Brett",
    "Carl Yastrzemski",
    "Frank Thomas",
    "Jim Thome",
    "Eddie Murray",
    "Harmon Killebrew",
    "Vladimir Guerrero",
    "Manny Ramirez",
    "David Ortiz",
    "Alex Rodriguez",
    "Ken Griffey Sr.",
    "Ozzie Smith",
    "Brooks Robinson",
    "Robin Yount",
    "Paul Molitor",
    "Eddie Mathews",
    "Mel Ott",
    "Al Kaline",
    "Willie McCovey",
    "Duke Snider",
    "Sammy Sosa",
    "Mark McGwire",
    "Jeff Bagwell",
    "Craig Biggio",
    "Ryne Sandberg",
    "Andre Dawson",
    "Carlton Fisk",
    "Gary Carter",
    "Mike Piazza",
    "Ivan Rodriguez",
    "Roy Campanella",
    "Yogi Berra",
    "Bill Dickey",
    "Whitey Ford",
    "Don Drysdale",
    "Juan Marichal",
    "Warren Spahn",
    "Christy Mathewson",
    "Lefty Grove",
    "Steve Carlton",
    "Bob Feller",
    "Satchel Paige",
    "Dizzy Dean",
    "Rollie Fingers",
    "Dennis Eckersley",
    "Trevor Hoffman",
    "Lee Smith",
    "Goose Gossage",
    "Bruce Sutter",
    "Hoyt Wilhelm",
    "Jim Palmer",
    "Catfish Hunter",
    "Gaylord Perry",
    "Phil Niekro",
    "Don Sutton",
    "Bert Blyleven",
    "Ferguson Jenkins",
    "Jack Morris",
    "John Smoltz",
    "Curt Schilling",
    "Roy Halladay",
    "Clayton Kershaw",
    "Max Scherzer",
    "Justin Verlander",
    "Zack Greinke",
    "Bryce Harper",
    "Mookie Betts",
    "Aaron Judge",
    "Shohei Ohtani",
    "Fernando Tatis Jr.",
    "Ronald Acuña Jr.",
    "Juan Soto",
    "Vladimir Guerrero Jr.",
    "Freddie Freeman",
    "Nolan Arenado",
    "Manny Machado",
    "Jose Altuve",
    "Buster Posey",
    "Yadier Molina",
    "Joey Votto",
    "Miguel Cabrera",
    "Adrián Beltré",
    "Ichiro Suzuki",
    "David Wright",
    "Robinson Canó",
    "Dustin Pedroia",
    "Chase Utley",
    "Jimmy Rollins",
    "Ryan Howard",
    "Prince Fielder",
    "Joe Mauer",
    "Todd Helton",
    "Larry Walker",
    "Tim Raines",
    "Kenny Lofton",
    "Jim Edmonds",
    "Andruw Jones",
    "Bernie Williams",
    "Andy Pettitte",
    "CC Sabathia",
    "Felix Hernandez",
    "Cole Hamels",
    "Madison Bumgarner",
    "Chris Sale",
    "Jacob deGrom",
    "Gerrit Cole",
    "Stephen Strasburg",
    "David Price",
    "Corey Kluber",
    "Dallas Keuchel",
    "Jake Arrieta",
    "Jon Lester",
    "Zack Wheeler",
    "Yu Darvish",
    "Aroldis Chapman",
    "Craig Kimbrel",
    "Kenley Jansen",
    "Andrew Miller",
    "Dellin Betances",
    "Wade Davis",
    "Greg Holland",
    "Francisco Lindor",
    "Carlos Correa",
    "Corey Seager",
    "Xander Bogaerts",
    "Trea Turner",
    "Bo Bichette",
    "Wander Franco",
    "Cody Bellinger",
    "Christian Yelich",
    "Giancarlo Stanton",
    "J.D. Martinez",
    "Nelson Cruz",
    "Josh Donaldson",
    "Kris Bryant",
    "Anthony Rizzo",
    "Paul Goldschmidt",
    "Matt Olson",
    "Pete Alonso",
    "Yordan Alvarez",
    "Kyle Tucker",
    "Luis Robert",
    "Eloy Jiménez",
    "Randy Arozarena",
    "Julio Rodríguez",
    "Adley Rutschman",
    "Bobby Witt Jr.",
    "Corbin Carroll",
    "Gunnar Henderson"
]

2025-01-30 17:24:39 httpcore.connection DEBUG    close.started
2025-01-30 17:24:39 httpcore.connection DEBUG    close.complete


In [6]:
from src.functional import ask_gpt4o

response = ask_gpt4o(prompt)
print(response)

2025-01-30 17:24:40 httpx DEBUG    load_ssl_context verify=True cert=None trust_env=True http2=False
2025-01-30 17:24:40 httpx DEBUG    load_verify_locations cafile='/home/local_arnab/miniconda3/envs/retrieval/lib/python3.11/site-packages/certifi/cacert.pem'
2025-01-30 17:24:40 openai._base_client DEBUG    Request options: {'method': 'post', 'url': '/chat/completions', 'files': None, 'json_data': {'messages': [{'role': 'system', 'content': 'You are a helpful assistant.'}, {'role': 'user', 'content': '\nGive me a list of baseball players, as many as you can think of (at least 100). The list should be in array format.\n\n[\n    "Babe Ruth",\n    "Hank Aaron",\n    "Willie Mays",\n]\n'}], 'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0}}
2025-01-30 17:24:40 openai._base_client DEBUG    Sending HTTP Request: POST https://api.openai.com/v1/chat/completions
2025-01-30 17:24:40 httpcore.connection DEBUG    connect_tcp.started host='api.openai.com' port=443 local_address=None timeout=5

In [7]:
gpt_list = [
    "Babe Ruth",
    "Hank Aaron",
    "Willie Mays",
    "Ty Cobb",
    "Lou Gehrig",
    "Ted Williams",
    "Mickey Mantle",
    "Joe DiMaggio",
    "Jackie Robinson",
    "Stan Musial",
    "Roger Clemens",
    "Barry Bonds",
    "Derek Jeter",
    "Pete Rose",
    "Nolan Ryan",
    "Cal Ripken Jr.",
    "Ken Griffey Jr.",
    "Randy Johnson",
    "Greg Maddux",
    "Tom Seaver",
    "Honus Wagner",
    "Cy Young",
    "Walter Johnson",
    "Christy Mathewson",
    "Sandy Koufax",
    "Bob Gibson",
    "Roberto Clemente",
    "Frank Robinson",
    "Mike Schmidt",
    "Ernie Banks",
    "Yogi Berra",
    "Johnny Bench",
    "Carl Yastrzemski",
    "Reggie Jackson",
    "George Brett",
    "Tony Gwynn",
    "Ichiro Suzuki",
    "Albert Pujols",
    "Alex Rodriguez",
    "Mariano Rivera",
    "Pedro Martinez",
    "Steve Carlton",
    "Wade Boggs",
    "Ozzie Smith",
    "Eddie Murray",
    "Chipper Jones",
    "Rickey Henderson",
    "Harmon Killebrew",
    "Al Kaline",
    "Brooks Robinson",
    "Jim Thome",
    "Paul Molitor",
    "Robin Yount",
    "Joe Morgan",
    "Ryne Sandberg",
    "Craig Biggio",
    "Jeff Bagwell",
    "Frank Thomas",
    "Vladimir Guerrero",
    "Roy Halladay",
    "Mike Piazza",
    "Gary Carter",
    "Carlton Fisk",
    "Duke Snider",
    "Hank Greenberg",
    "Ralph Kiner",
    "Eddie Mathews",
    "Mel Ott",
    "Joe Medwick",
    "Lou Brock",
    "Billy Williams",
    "Willie McCovey",
    "Orlando Cepeda",
    "Tony Perez",
    "Dave Winfield",
    "Andre Dawson",
    "Tim Raines",
    "Larry Walker",
    "Jim Rice",
    "Don Sutton",
    "Phil Niekro",
    "Gaylord Perry",
    "Bert Blyleven",
    "Fergie Jenkins",
    "Juan Marichal",
    "Whitey Ford",
    "Catfish Hunter",
    "Rollie Fingers",
    "Bruce Sutter",
    "Trevor Hoffman",
    "Goose Gossage",
    "Lee Smith",
    "Dennis Eckersley",
    "Hoyt Wilhelm",
    "Mordecai Brown",
    "Lefty Grove",
    "Eddie Plank",
    "Red Ruffing",
    "Dizzy Dean",
    "Bob Feller",
    "Don Drysdale",
    "Jim Palmer",
    "Tom Glavine",
    "John Smoltz",
    "Curt Schilling"
]

In [8]:
import json
combined_list = list(set(claude_list + gpt_list))
print(json.dumps(combined_list, indent=4))

[
    "Chipper Jones",
    "Pete Alonso",
    "Frank Robinson",
    "George Brett",
    "Cole Hamels",
    "Wander Franco",
    "Gaylord Perry",
    "Wade Davis",
    "Bob Gibson",
    "Rollie Fingers",
    "Carlos Correa",
    "Paul Goldschmidt",
    "Madison Bumgarner",
    "Kris Bryant",
    "David Wright",
    "Corey Seager",
    "Dizzy Dean",
    "Walter Johnson",
    "Carlton Fisk",
    "Freddie Freeman",
    "Craig Biggio",
    "Yogi Berra",
    "Jake Arrieta",
    "Manny Ramirez",
    "Miguel Cabrera",
    "Dustin Pedroia",
    "Mariano Rivera",
    "Johnny Bench",
    "Prince Fielder",
    "Buster Posey",
    "Hoyt Wilhelm",
    "Phil Niekro",
    "Max Scherzer",
    "Francisco Lindor",
    "Pedro Martinez",
    "Andruw Jones",
    "Zack Wheeler",
    "Ryne Sandberg",
    "Joe Medwick",
    "Pete Rose",
    "Corbin Carroll",
    "Ivan Rodriguez",
    "Greg Holland",
    "Steve Carlton",
    "Derek Jeter",
    "Roy Campanella",
    "Hank Greenberg",
    "Tom Seaver",
    "Felix