In [2]:
from linecache import cache

from IPython.core.debugger import prompt
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import FAISS
from langchain.storage import LocalFileStore
from langchain.chains import RetrievalQA
import nltk
import ssl
from dotenv import load_dotenv
from nltk.data import retrieve
from sympy.physics.units import temperature

load_dotenv(dotenv_path=".env")
cache_dir = LocalFileStore("./.cache/")

# SSL 인증서 무시
ssl._create_default_https_context = ssl._create_unverified_context
nltk.download('punkt')

llm = ChatOpenAI()

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100
)
loader = UnstructuredFileLoader("./files/ohtani.txt")

docs = loader.load_and_split(splitter)

embeddings = OpenAIEmbeddings()

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(
    embeddings, cache_dir
)

vectorstore = FAISS.from_documents(docs, cached_embeddings)

chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="refine",
    retriever=vectorstore.as_retriever(),
)

chain.run("Which team does ohtani play baseball")

chain.run("Describe Los Angeles Angels")

[nltk_data] Downloading package punkt to /Users/leehamin/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


'The Los Angeles Angels are a professional baseball team based in Anaheim, California, and are a member of the American League West division in Major League Baseball. The team has a dedicated fan base and plays their home games at Angel Stadium of Anaheim. One of the standout players on the Angels in recent years has been Shohei Ohtani, a two-way player who excels both as a pitcher and a hitter. Ohtani signed with the Angels in 2017 with a limited bonus due to international signing rules. \n\nInitially, Ohtani faced challenges during his debut MLB spring training in 2018, with many American news sources expressing disappointment in his performance and questioning his abilities as a two-way player. However, Ohtani proved his critics wrong during the 2018 MLB regular season, showcasing his exceptional talent both as a pitcher and a hitter. He went on to win the American League Rookie of the Year award, solidifying his status as one of the top players in Major League Baseball. His impress

In [4]:
from linecache import cache

from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import FAISS
from langchain.storage import LocalFileStore
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough

import nltk
import ssl
from dotenv import load_dotenv

load_dotenv(dotenv_path=".env")
cache_dir = LocalFileStore("./.cache/")

# SSL 인증서 무시
ssl._create_default_https_context = ssl._create_unverified_context
nltk.download('punkt')

llm = ChatOpenAI(
    temperature=0.1
)

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100
)
loader = UnstructuredFileLoader("./files/ohtani.txt")

docs = loader.load_and_split(splitter)

embeddings = OpenAIEmbeddings()

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(
    embeddings, cache_dir
)

vectorstore = FAISS.from_documents(docs, cached_embeddings)

retriever = vectorstore.as_retriever()

prompt = ChatPromptTemplate.from_messages([
    (
        "system",
        "You are a helpful assistant. Answer questions using only the following context. If you don't know the answer just say you don't know, don't make it up:\n\n{context}",
    ),
    ("human", "{question}"),
])

chain = (
        {"context": retriever, "question": RunnablePassthrough()} 
         | prompt
         | llm)

chain.invoke("Describe ohtani")

[nltk_data] Downloading package punkt to /Users/leehamin/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


AIMessage(content='Shohei Ohtani is a Japanese professional baseball player who is known for his exceptional skills as both a pitcher and a designated hitter. He has played for the Los Angeles Angels and currently plays for the Los Angeles Dodgers. Ohtani has been compared to Babe Ruth for his rare two-way abilities. He has won multiple awards, including the American League Most Valuable Player Award. In 2023, he signed a record-breaking 10-year, $700 million contract with the Dodgers.')

In [6]:
from langchain.agents import create_sql_agent, AgentType
from langchain.chat_models import ChatOpenAI
from langchain.agents.agent_toolkits import SQLDatabaseToolkit
from langchain.sql_database import SQLDatabase

llm = ChatOpenAI(
    temperature=0.1,
    model_name="gpt-4-1106-preview",
)
db = SQLDatabase.from_uri("sqlite:///movies.sqlite")
toolkit = SQLDatabaseToolkit(db=db, llm=llm)

agent = create_sql_agent(
    llm=llm,
    toolkit=toolkit,
    agent_type=AgentType.OPENAI_FUNCTIONS,
    verbose=True,
)

agent.invoke(
    "Give me the movies that have the highest votes but the lowest budgets and give me the name of their directors also include their gross revenue."
)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `sql_db_list_tables` with ``


[0m[38;5;200m[1;3mdirectors, movies[0m[32;1m[1;3m
Invoking: `sql_db_schema` with `movies, directors`


[0m[33;1m[1;3m
CREATE TABLE directors (
	name TEXT, 
	id INTEGER, 
	gender INTEGER, 
	uid INTEGER, 
	department TEXT, 
	PRIMARY KEY (id)
)

/*
3 rows from directors table:
name	id	gender	uid	department
James Cameron	4762	2	2710	Directing
Gore Verbinski	4763	2	1704	Directing
Sam Mendes	4764	2	39	Directing
*/


CREATE TABLE movies (
	id INTEGER, 
	original_title VARCHAR, 
	budget INTEGER, 
	popularity INTEGER, 
	release_date TEXT, 
	revenue INTEGER, 
	title TEXT, 
	vote_average REAL, 
	vote_count INTEGER, 
	overview TEXT, 
	tagline TEXT, 
	uid INTEGER, 
	director_id INTEGER DEFAULT 0 NOT NULL, 
	PRIMARY KEY (id)
)

/*
3 rows from movies table:
id	original_title	budget	popularity	release_date	revenue	title	vote_average	vote_count	overview	tagline	uid	director_id
43597	Avatar	23

{'input': 'Give me the movies that have the highest votes but the lowest budgets and give me the name of their directors also include their gross revenue.',
 'output': 'Here are the movies with the highest votes but the lowest budgets, along with the names of their directors and their gross revenue:\n\n1. "Inception" directed by Christopher Nolan with a gross revenue of $825,532,764.\n2. "The Dark Knight" directed by Christopher Nolan with a gross revenue of $1,004,558,444.\n3. "Avatar" directed by James Cameron with a gross revenue of $2,787,965,087.\n4. "The Avengers" directed by Joss Whedon with a gross revenue of $1,519,557,910.\n5. "Deadpool" directed by Tim Miller with a gross revenue of $783,112,979.\n6. "Interstellar" directed by Christopher Nolan with a gross revenue of $675,120,017.\n7. "Django Unchained" directed by Quentin Tarantino with a gross revenue of $425,368,238.\n8. "Guardians of the Galaxy" directed by James Gunn with a gross revenue of $773,328,629.\n9. "The Hunge