In [40]:
import os
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores.faiss import FAISS
import numpy as np
import openai
from langchain_core.output_parsers import StrOutputParser
from langchain.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
from langchain_community.chat_models import ChatOpenAI
import yfinance as yf
from dotenv import load_dotenv


In [25]:
def get_yfinance(ticker): 
    stock = yf.Ticker(ticker)
    stock.info
    print(stock.info)

    # print stock history and metadata
    hist = stock.history(period = "1mo")
    print(stock.history_metadata)
    print(hist)

    # show actions (dividends, splits, capital gains)
    print(stock.actions)
    print(stock.dividends)
    print(stock.splits)
    #stock.capital_gains  only for mutual funds & etfs


    # show financials:
    print(stock.calendar)
    print(stock.sec_filings)
    # - income statement
    print(stock.income_stmt)
    print(stock.quarterly_income_stmt)
    # - balance sheet
    print(stock.balance_sheet)
    print(stock.quarterly_balance_sheet)
    # - cash flow statement
    print(stock.cashflow)
    print(stock.quarterly_cashflow)




In [26]:
def get_yfinance_data(ticker, period="1mo"):
    """
    Fetches various financial data points for a stock from Yahoo Finance.
    
    Parameters:
    - ticker: str, stock ticker symbol (e.g., "NVDA").
    - period: str, duration for historical data (default is "1mo").
    
    Returns:
    - data: dict, a dictionary containing historical data, actions, financials, and statements.
    """
    stock = yf.Ticker(ticker)
    
    # Dictionary to store all data for easy access
    data = {}
    
    # Stock metadata
    data['info'] = stock.info

    # Stock history and metadata
    data['history'] = stock.history(period=period)
    data['history_metadata'] = stock.history_metadata

    # Stock actions
    data['actions'] = stock.actions
    data['dividends'] = stock.dividends
    data['splits'] = stock.splits
    
    # Financials
    data['calendar'] = stock.calendar
    data['sec_filings'] = stock.sec_filings
    data['income_stmt'] = stock.income_stmt
    data['quarterly_income_stmt'] = stock.quarterly_income_stmt
    data['balance_sheet'] = stock.balance_sheet
    data['quarterly_balance_sheet'] = stock.quarterly_balance_sheet
    data['cashflow'] = stock.cashflow
    data['quarterly_cashflow'] = stock.quarterly_cashflow

    return data

In [27]:
nvda_data = get_yfinance_data("NVDA")

In [28]:
print(nvda_data['income_stmt'])
print(nvda_data['history'].head())

                                                       2024-01-31  \
Tax Effect Of Unusual Items                                   0.0   
Tax Rate For Calcs                                           0.12   
Normalized EBITDA                                   35583000000.0   
Total Unusual Items                                           0.0   
Total Unusual Items Excluding Goodwill                        0.0   
Net Income From Continuing Operation Net Minori...  29760000000.0   
Reconciled Depreciation                              1508000000.0   
Reconciled Cost Of Revenue                          16621000000.0   
EBITDA                                              35583000000.0   
EBIT                                                34075000000.0   
Net Interest Income                                   609000000.0   
Interest Expense                                      257000000.0   
Interest Income                                       866000000.0   
Normalized Income                 

1. Extract and Preprocess Data

In [31]:
# Check the structure of the income statement
print(nvda_data['income_stmt'].head())

                                           2024-01-31    2023-01-31  \
Tax Effect Of Unusual Items                       0.0  -284130000.0   
Tax Rate For Calcs                               0.12          0.21   
Normalized EBITDA                       35583000000.0  7340000000.0   
Total Unusual Items                               0.0 -1353000000.0   
Total Unusual Items Excluding Goodwill            0.0 -1353000000.0   

                                           2022-01-31    2021-01-31  
Tax Effect Of Unusual Items                       0.0           0.0  
Tax Rate For Calcs                              0.019         0.017  
Normalized EBITDA                       11351000000.0  5691000000.0  
Total Unusual Items                               0.0           0.0  
Total Unusual Items Excluding Goodwill            0.0           0.0  


In [32]:
# Print the column names to see if the fields are correct
print(nvda_data['income_stmt'].columns)


DatetimeIndex(['2024-01-31', '2023-01-31', '2022-01-31', '2021-01-31'], dtype='datetime64[ns]', freq=None)


In [33]:
# Check if columns exist before accessing them
if 'EBITDA' in nvda_data['income_stmt'].columns:
    print(nvda_data['income_stmt']['EBITDA'])
else:
    print("EBITDA data is not available.")


EBITDA data is not available.


In [35]:
def preprocess_stock_data(nvda_data):
    # Extract historical data (e.g., last few days of stock data)
    history = nvda_data['history'].tail(5)  # Last 5 days of data
    history_summary = "\n".join([f"Date: {row.name}, Open: {row['Open']}, Close: {row['Close']}" for index, row in history.iterrows()])

    # Extract key financial metrics from the income statement
    income_stmt = nvda_data['income_stmt'].tail(1)  # Latest available data
    
    # Safely extract the required financial metrics
    ebitda = income_stmt['EBITDA'].values[0] if 'EBITDA' in income_stmt.columns else "N/A"
    net_income = income_stmt['Net Income'].values[0] if 'Net Income' in income_stmt.columns else "N/A"
    basic_eps = income_stmt['Basic EPS'].values[0] if 'Basic EPS' in income_stmt.columns else "N/A"
    
    financial_summary = f"EBITDA: {ebitda}, Net Income: {net_income}, Basic EPS: {basic_eps}"
    
    # Combine everything into one summary string
    stock_summary = f"Stock History:\n{history_summary}\n\nFinancial Metrics:\n{financial_summary}"
    
    return stock_summary


In [36]:
# Get preprocessed stock data summary
stock_summary = preprocess_stock_data(nvda_data)
print(stock_summary)

Stock History:
Date: 2024-11-04 00:00:00-05:00, Open: 137.2100067138672, Close: 136.0500030517578
Date: 2024-11-05 00:00:00-05:00, Open: 137.4499969482422, Close: 139.91000366210938
Date: 2024-11-06 00:00:00-05:00, Open: 142.9600067138672, Close: 145.61000061035156
Date: 2024-11-07 00:00:00-05:00, Open: 146.38999938964844, Close: 148.8800048828125
Date: 2024-11-08 00:00:00-05:00, Open: 148.77000427246094, Close: 147.6300048828125

Financial Metrics:
EBITDA: N/A, Net Income: N/A, Basic EPS: N/A


2. Convert Data into Vector Embeddings

In [41]:
# Load environment variables from the .env file
load_dotenv()

False

In [42]:
# Retrieve the OpenAI API key from the environment variable
openai.api_key = os.getenv("OPENAI_API_KEY")

In [43]:
# Instantiate OpenAI Embeddings model
embedding_model = OpenAIEmbeddings()

# Convert stock summary into embedding
stock_embedding = embedding_model.embed([stock_summary])
print(stock_embedding)

  embedding_model = OpenAIEmbeddings()


ValidationError: 1 validation error for OpenAIEmbeddings
  Value error, Did not find openai_api_key, please add an environment variable `OPENAI_API_KEY` which contains it, or pass `openai_api_key` as a named parameter. [type=value_error, input_value={'model_kwargs': {}, 'cli...20, 'http_client': None}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.9/v/value_error

In [None]:
# Convert the list of embeddings into a numpy array
stock_embedding_np = np.array(stock_embedding)

# Create FAISS index and store the embedding
faiss_index = FAISS.from_embeddings(stock_embedding_np)

# Optionally, store additional metadata with the embedding, such as ticker name
faiss_index.add(stock_embedding_np, ["NVDA"])

In [None]:
# Query the FAISS index
query = "What is the current financial status of NVDA?"
response = faiss_index.similarity_search(query)

# Print the response
print(response)