# Yfinance_Usecase 
- developing python definition funneled into langchain tool thus to be selected  

In [27]:
from typing import Type
from langchain.llms import OpenAI
from langchain.tools import BaseTool
from pydantic import BaseModel, Field

from datetime import datetime, timedelta

import yfinance as yf

In [38]:
def get_current_stock_price(ticker):
    """Method to get current stock price"""

    ticker_data = yf.Ticker(ticker)
    recent = ticker_data.history(period="6m")
    return {"price": recent.iloc[0]["Close"], \
            #"currency": ticker_data.info["currency"]
            }

def get_stock_performance(ticker, past_date):
    """Method to get stock price change in percentage"""
    
    ticker_data = yf.Ticker(ticker)
    history = ticker_data.history(start=past_date)
    old_price = history.iloc[0]["Close"]
    current_price = history.iloc[-1]["Close"]
    return {"percent_change": ((current_price - old_price) / old_price) * 100}

class CurrentStockPriceInput(BaseModel):
    """Inputs for get_current_stock_price"""

    ticker: str = Field(description="Ticker symbol of the stock")


class CurrentStockPriceTool(BaseTool):
    name = "get_current_stock_price"
    description = """
        Useful when you want to get current stock price.
        You should enter the stock ticker symbol recognized by the yahoo finance
        """
    args_schema: Type[BaseModel] = CurrentStockPriceInput

    def _run(self, ticker: str):
        price_response = get_current_stock_price(ticker)
        return price_response

    def _arun(self, ticker: str):
        raise NotImplementedError("get_current_stock_price does not support async")



In [39]:
class StockPerformanceInput(BaseModel):
    """Inputs for get_stock_performance"""

    ticker: str = Field(description="Ticker symbol of the stock")
    past_date: str = Field(description="past_date asked to compare the price for the stock")


class StockPerformanceTool(BaseTool):
    name = "get_stock_performance"
    description = """
        Useful when you want to get the stock performance over the given period.
        You should enter the stock ticker symbol recognized by the yahoo finance
        """
    args_schema: Type[BaseModel] = StockPerformanceInput

    def _run(self, ticker: str , past_date: str):
        performance_response = get_stock_performance(ticker,past_date)
        return performance_response

    def _arun(self, ticker: str , past_date: str):
        raise NotImplementedError("get_stock_performance does not support async")

In [29]:
from langchain.agents import AgentType, initialize_agent, Tool
from langchain.chat_models import ChatOpenAI
from langchain.llms import OpenAI
from langchain.tools.tavily_search import TavilySearchResults
from langchain.utilities.tavily_search import TavilySearchAPIWrapper

In [41]:
from dotenv import load_dotenv
load_dotenv()

True

In [42]:
llm = ChatOpenAI(model="gpt-4", temperature=0, openai_api_key="")
search = TavilySearchAPIWrapper()
tavily_tool = TavilySearchResults(api_wrapper=search, max_results=10)
tools = [CurrentStockPriceTool(),StockPerformanceTool(),
         Tool(
        name="Search",
        func=tavily_tool.run,
        description="useful for when you need to get ticker and current event about the stock in question"
    )]
agent = initialize_agent(tools=tools,
                          llm=llm,
                          agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION, verbose=True)
#(최근 거래가, 공매도일자, 수익률)
agent.run("get me 3 data \
          [1. the latest trading day stock price of Ecopro(KOSDAQ),\
           2. the date in which Korea totally banned short-selling,\
           3. its performance since the ban].\n"
          "Do not assume any random dates if the article searched doesn't say about it, you can tell me\
            that article's release date and ultimately the performance since then")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: First, I need to find the ticker symbol for Ecopro on the KOSDAQ. I will use the Search tool for this. 
Action:
```
{
  "action": "Search",
  "action_input": "Ecopro KOSDAQ ticker symbol"
}
```[0m
Observation: [38;5;200m[1;3m[{'url': 'https://finance.yahoo.com/quote/086520.KQ/', 'content': "Ecopro Co., Ltd. (086520.KQ) Subscribe to Yahoo Finance Plus to view Fair Value for 086520.KQ  Ecopro Materials eyes $498 million in potentially largest South Korean IPO since early 2022  Ecopro, Samsung Ink Massive $34 Billion Battery Deal Despite EV Slump  South Korea's Ecopro Materials, a battery materials producer, is gearing up for what could be the country's largestFind the latest Ecopro Co., Ltd. (086520.KQ) stock quote, history, news and other vital information to help you with your stock trading and investing."}, {'url': 'https://uk.finance.yahoo.com/quote/086520.KQ/', 'content': "You might not be getting everything yo

'1. The latest trading day stock price for Ecopro (KOSDAQ: 086520.KQ) is 660,000 KRW. 2. South Korea totally banned short-selling on November 5, 2023. 3. Since the ban, the performance of Ecopro has decreased by approximately 20.29%.'

In [47]:
llm = ChatOpenAI(model="gpt-4", temperature=0, openai_api_key="")
tavily_tool = TavilySearchResults(api_wrapper=search, max_results=3)
tools = [Tool(
        name="Search",
        func=tavily_tool.run,
        description="useful for when you need to get ticker"
    )
]

ticker_agent = initialize_agent(tools=tools,
                          llm=llm,
                          agent=AgentType.OPENAI_FUNCTIONS, verbose=True)

In [48]:
ticker= ticker_agent.run("get me the ticker for Ecopro(KOSDAQ), you should return only ticker, no strings attached")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `Search` with `Ecopro(KOSDAQ)`


[0m[36;1m[1;3m[{'url': 'https://koreajoongangdaily.joins.com/2023/04/20/business/industry/Korea-ecopro-ecopro-bm/20230420174402783.html', 'content': "korea joongAng daily Home > Business > Industry [NEWS IN FOCUS] EcoPro stocks skyrocket, but some analysts spooked  EcoPro BM officials celebrate the company's public listing in March 2019 at Korea Exchange headquarters in Yeouido,  Climbing higher The price is right, EcoPro Materials CEO says ahead of IPO  EcoPro BM's building in Pohang, North Gyeongsang [YONHAP]EcoPro, a chemicals group that owns a battery materials unit, soared almost 500 percent in the first four months on the Kosdaq stock market, prompting investors and analysts to scrutinize the company. Solid demand and optimism for electric vehicles are major factors driving up the stock, but the breakneck rally has some analysts worried."}, {'url': 'https://finance.yahoo.co

In [49]:
ticker

'086520.KQ'

In [21]:
from langchain.document_loaders import PyPDFLoader

loader = PyPDFLoader("../frontend/static/하나증권_에코프로.pdf")
pages = loader.load_and_split()

In [22]:
pages[0]

Document(page_content='2023년 11월 8일 I 기업분석 _Earnings Review  \n에코프로  (086520)  \n인기투표와  저울 \n \n3Q23 Review :  비상장  자회사  실적 악화 \n3분기 연결 실적은  매출 1.9조원(YoY + 17%, QoQ -6%), 영업이익  650억원(YoY -69%, \nQoQ -62%)으로 부진했다. BM 영업이익이 메탈 가격 하락에  따른 수익성  악화로  인해 \nYoY -68%, QoQ -60% 감소한  데 따른 영향이  컸다. 리튬 및 리싸이클링  자회사  실적 상\n세는 공시되지  않았으나 , 지주사  연결 이익에서 상장 자회사  이익 제외한 영업이익이 76억\n원(4Q22 611 억원, 1Q23 630 억원, 2Q23 444 억원)에 불과한  것 감안하면 , 에코프로  이노베이\n션 등 비상장  법인 실적은  매우 부진했을  것으로  추정된다 . 4분기의 경우, 전기차  수요 둔화\n에 따른 양극재  수요 감소로  양극재  출하 증가 기대하기  어려운  상황에서  메탈 가격 하락에 \n따른 수익성  악화로  인해 3분기 대비 매출 및 영업이익  모두 감익 불가피할  전망이다 . \n명확한  판단 기준 필요 \n주가는  단기적으로  인기투표와  같지만  장기적으로는  저울과  같다. 인기투표는  Social \nmedia , 지수 편입 등과 같이 수급의  쏠림을  만들어내는 , 내재가치와  무관한  이슈로  주인공\n을 만들어 내지만 , 저울의 눈금은  결국 장기적으로  기업의  내재가치를  가리킨다 . 열기가  가\n라앉은  후 저울이  가리킬  지점에  대한 판단 기준을  갖고 있어야  손실을  최소화할  수 있고, \n언젠가  도래할  시장의  외면 속에서도 다시 기회를  찾을 수 있다. \nValuation 공백 상태 \nBM의 장기 내재 가치는 32.9조원(금리 상승 및 가중평균자본비용 (WACC) 상승으로 , 현가 \n할인한  시가총액  감소 반영)이며, 이 중 

In [10]:
len(pages)

6

- An advantage of this approach is that documents can be retrieved with page numbers.
- We want to use OpenAIEmbeddings so we have to get the OpenAI API Key.

In [22]:
from langchain.vectorstores import FAISS
from langchain.embeddings.openai import OpenAIEmbeddings

faiss_index = FAISS.from_documents(pages, OpenAIEmbeddings())
docs = faiss_index.similarity_search("what is the stock's target price?", k=4)
for doc in docs:
    print(str(doc.metadata["page"]) + ":", doc.page_content[:])

NameError: name 'pages' is not defined

In [53]:
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import PyMuPDFLoader

In [2]:
def get_pdf_text(pdf_docs):
    text = "" #init
    for pdf in pdf_docs:
        pdf_reader = PdfReader(pdf)
        for page in pdf_reader.pages:
            text += page.extract_text()
    return text


In [3]:

def get_text_chunks(text):
    text_splitter = CharacterTextSplitter(
        separator="\n",
        chunk_size=1000,
        chunk_overlap=200,
        length_function=len
    )
    chunks = text_splitter.split_text(text)
    return chunks

In [5]:
def get_vectorstore(text_chunks):
    embeddings = OpenAIEmbeddings()
    vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
    return vectorstore

In [26]:
def get_retriever_chain(vectorstore):
    llm = ChatOpenAI()

    retriever_chain= RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=vectorstore.as_retriever())
    return retriever_chain

In [18]:
import os

# Define the directory
dir_path = 'frontend/static'

# Initialize an empty list to store the PyPDFLoader objects
pdf_docs = []

# Iterate over all files in the directory
for filename in os.listdir(dir_path):
    # Check if the file is a PDF
    if filename.endswith('.pdf'):
        # Construct full file path
        file_path = os.path.join(dir_path, filename)
        pdf_docs.append(file_path)

In [66]:
pdf_docs

['../frontend/static/키움증권_삼성전자.pdf',
 '../frontend/static/키움증권_삼성전자_20231208074621.pdf',
 '../frontend/static/하나증권_에코프로.pdf',
 '../frontend/static/현대차증권_삼성전자_20231205082639.pdf']

In [73]:
PyMuPDFLoader(pdf_docs[0]).load()[0]

Document(page_content="반도체 \n \n \n6 \n \n삼성전자 \n \n \n삼성전자 (005930) \n \nBUY(Maintain) \n주가(11/29) 72,700원  \n목표주가 90,000원  \n \n반도체 Analyst 박유악 \nyuak.pak@kiwoom.com \n \nDRAM 산업은 올 연말 공급 부족에 재진\n입할 전망. NAND 역시 유통재고의 급격한 \n소진과 함께 가격 급등이 나타날 전망. 이\n에 따라 삼성전자의 영업이익은 4Q23 4.0\n조원(+64%QoQ), 2024년 37조원(+380% \nYoY)으로 성장세 이어질 전망. 목표주가 \n90,000원을 유지하며, 업종 top pick으로 \n매수 추천함. \n \nStock Data \nKOSPI (11/29) \n \n2,519.81pt \n시가총액 \n 4,340,032억원 \n52주 주가동향  \n최고가 \n최저가 \n \n73,400 원 \n55,300원 \n최고/최저가대비 등락 \n-1.0% \n31.5% \n주가수익률 \n절대 \n상대 \n1M \n8.0% \n-1.4% \n6M \n3.4% \n4.9% \n1Y \n21.0% \n15.5% \n \n \n \nComany Data \n발행주식수 \n \n5,969,783천주 \n일평균 거래량(3M)  \n14,119 천주 \n외국인 지분율 \n \n53.5% \n배당수익률(23.E) \n \n2.4% \nBPS�(23.E) \n \n52,159 원 \n주요 주주 \n삼성생명보험 외 15인 \n20.7% \n \n \n \nPrice Trend \n \n \n \n \n \n연말·연초 메모리가격 급등 예상 \n 과잉재고 해소에 따른 업황 회복 \n2024년 글로벌 반도체 업황은 상승 사이클에 진입할 것으\n로 예상되나, 사이클의 속도는 유통재고 소진 뒤에 나타날 \n수요의 개선 강도에 따라 달라질 전망이다. 오랜 기간 반도\n체 업황을 괴롭혀 왔던 과잉 재고는 올 연말을 지나면서 점\n차 해소될

In [78]:
def get_pdf_text(pdf_docs):
    text = "" #init
    for i in range(len(pdf_docs)):
        doc_text = PyMuPDFLoader(pdf_docs[i]).load()
        for j in range(len(doc_text)):
            text += doc_text[j].page_content
    return text

In [79]:
raw_text = get_pdf_text(pdf_docs)
text_chunks = get_text_chunks(raw_text)
vectorstore = get_vectorstore(text_chunks)
llm_papers = get_retriever_chain(vectorstore)

In [80]:
# Generate the stock ticker symbol using LLM
#search = TavilySearchAPIWrapper()
#tavily_tool = TavilySearchResults(api_wrapper=search, max_results=5)
tools = [
#    Tool(
#        name="Search",
#        func=tavily_tool.run,
#        description="useful for getting ticker"
#    ),
    Tool(
        name = "Document Store",
        func = llm_papers.run,
        description = "Use it to lookup information from document uploaded from pdfs \
                        Use it more than the normal search if the question is relevant to pdf",
#        handle_tool_error=True
    ),
]
llm = ChatOpenAI(model="gpt-4", temperature=0, openai_api_key="")

document_agent = initialize_agent(tools=tools,
                  llm=llm,
                  agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True)

document_agent.run(f'please return me the common characteristics or layout of documents uploaded,\
                   this could be anything such as target price, stock name is always followed by its ticker\
                    and so forth.')



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mTo answer this question, I need to analyze the layout and common characteristics of the documents uploaded in the document store.
Action: Document Store
Action Input: Analyze the layout and common characteristics of the documents uploaded.[0m
Observation: [36;1m[1;3mThe documents uploaded appear to be financial data and analysis reports from a research center at a securities company. The layout of the documents is consistent, with clearly organized sections and tables presenting various data points. Each document includes information on different business sectors, such as mobile phones, tablets, and semiconductors. 

The common characteristics of the documents are as follows:

1. Data Presentation: The documents provide quantitative data in the form of percentages, quantities, average selling prices (ASP), and cost figures. The data is often presented in tables and charts for easy comprehension.

2. Time Period Analysis: T

"The common characteristics of the documents uploaded are as follows:\n\n1. Data Presentation: The documents provide quantitative data in the form of percentages, quantities, average selling prices (ASP), and cost figures. The data is often presented in tables and charts for easy comprehension.\n\n2. Time Period Analysis: The documents analyze the performance of various sectors and products over different time periods, including quarterly and yearly comparisons. This allows for tracking trends and changes in the market.\n\n3. QoQ and YoY Comparison: The documents frequently use the concepts of quarter-on-quarter (QoQ) and year-on-year (YoY) comparisons to assess growth rates and performance fluctuations.\n\n4. Investment Recommendations: The documents include investment recommendations for specific companies or sectors, along with target prices. These recommendations are subject to change based on market conditions.\n\n5. Currency Exchange Rates: The documents provide information on th