In [1]:
! pip install yfinance langchain_pinecone openai python-dotenv langchain-community sentence_transformers



In [2]:
from langchain_pinecone import PineconeVectorStore
from openai import OpenAI
import dotenv
import json
import yfinance as yf
import concurrent.futures
from langchain_community.embeddings import HuggingFaceEmbeddings
from google.colab import userdata
from langchain.schema import Document
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer
from pinecone import Pinecone
import numpy as np
import requests
import os

In [3]:
def get_stock_info(symbol: str) -> dict:
    """
    Retrieves and formats detailed information about a stock from Yahoo Finance.

    Args:
        symbol (str): The stock ticker symbol to look up.

    Returns:
        dict: A dictionary containing detailed stock information, including ticker, name,
              business summary, city, state, country, industry, and sector.
    """
    data = yf.Ticker(symbol)
    stock_info = data.info

    properties = {
        "Ticker": stock_info.get('symbol', 'Information not available'),
        'Name': stock_info.get('longName', 'Information not available'),
        'Business Summary': stock_info.get('longBusinessSummary', 'Information not available'),
        'City': stock_info.get('city', 'Information not available'),
        'State': stock_info.get('state', 'Information not available'),
        'Country': stock_info.get('country', 'Information not available'),
        'Industry': stock_info.get('industry', 'Information not available'),
        'Sector': stock_info.get('sector', 'Information not available')
    }

    return properties

In [4]:
#testing to see if everything works thus far
data = yf.Ticker("CRM")
stock_info = data.info
print(stock_info)

{'address1': 'Salesforce Tower', 'address2': '3rd Floor 415 Mission Street', 'city': 'San Francisco', 'state': 'CA', 'zip': '94105', 'country': 'United States', 'phone': '415 901 7000', 'fax': '415 901 7040', 'website': 'https://www.salesforce.com', 'industry': 'Software - Application', 'industryKey': 'software-application', 'industryDisp': 'Software - Application', 'industrySymbol': 'h885', 'sector': 'Technology', 'sectorKey': 'technology', 'sectorDisp': 'Technology', 'longBusinessSummary': "Salesforce, Inc. provides Customer Relationship Management (CRM) technology that brings companies and customers together worldwide. The company's service includes sales to store data, monitor leads and progress, forecast opportunities, gain insights through analytics and artificial intelligence, and deliver quotes, contracts, and invoices; and service that enables companies to deliver trusted and highly personalized customer support at scale. In addition, its platform offering comprise a flexible 

In [5]:
def get_huggingface_embeddings(text, model_name="sentence-transformers/all-mpnet-base-v2"):
    """
    Generates embeddings for the given text using a specified Hugging Face model.

    Args:
        text (str): The input text to generate embeddings for.
        model_name (str): The name of the Hugging Face model to use.
                          Defaults to "sentence-transformers/all-mpnet-base-v2".

    Returns:
        np.ndarray: The generated embeddings as a NumPy array.
    """
    model = SentenceTransformer(model_name)
    return model.encode(text)


def cosine_similarity_between_sentences(sentence1, sentence2):
    """
    Calculates the cosine similarity between two sentences.

    Args:
        sentence1 (str): The first sentence for similarity comparison.
        sentence2 (str): The second sentence for similarity comparison.

    Returns:
        float: The cosine similarity score between the two sentences,
               ranging from -1 (completely opposite) to 1 (identical).

    Notes:
        Prints the similarity score to the console in a formatted string.
    """
    # Get embeddings for both sentences
    embedding1 = np.array(get_huggingface_embeddings(sentence1))
    embedding2 = np.array(get_huggingface_embeddings(sentence2))

    # Reshape embeddings for cosine_similarity function
    embedding1 = embedding1.reshape(1, -1)
    embedding2 = embedding2.reshape(1, -1)

    # Calculate cosine similarity
    similarity = cosine_similarity(embedding1, embedding2)
    similarity_score = similarity[0][0]
    print(f"Cosine similarity between the two sentences: {similarity_score:.4f}")
    return similarity_score


# Example usage
sentence1 = "I like walking to the park"
sentence2 = "I like running to the playground"

similarity = cosine_similarity_between_sentences(sentence1, sentence2)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Cosine similarity between the two sentences: 0.7123


In [6]:
#testing
crm_info = get_stock_info('CRM')
print(crm_info)

{'Ticker': 'CRM', 'Name': 'Salesforce, Inc.', 'Business Summary': "Salesforce, Inc. provides Customer Relationship Management (CRM) technology that brings companies and customers together worldwide. The company's service includes sales to store data, monitor leads and progress, forecast opportunities, gain insights through analytics and artificial intelligence, and deliver quotes, contracts, and invoices; and service that enables companies to deliver trusted and highly personalized customer support at scale. In addition, its platform offering comprise a flexible platform that enables companies of various sizes, locations, and industries to build business workflow and apps with customer; online learning platform that allows anyone to learn in-demand Salesforce skills; and Slack, an intelligent productivity platform. The company's marketing services enables companies to plan, personalize, automate, and optimize customer marketing journey, connecting interaction, and connected products; a

In [7]:
crm_desc = crm_info['Business Summary']

company_description = "I want to find companies that make customer relationship management and are headquarted in SanFrancisco, California"

similarity = cosine_similarity_between_sentences(crm_desc, company_description)

Cosine similarity between the two sentences: 0.4948


In [8]:
def get_company_tickers():
    """
    Downloads and parses the Stock ticker symbols from the GitHub-hosted SEC company tickers JSON file.

    Returns:
        dict: A dictionary containing company tickers and related information.

    Notes:
        The data is sourced from the official SEC website via a GitHub repository:
        https://raw.githubusercontent.com/team-headstart/Financial-Analysis-and-Automation-with-LLMs/main/company_tickers.json
    """
    # URL to fetch the raw JSON file from GitHub
    url = "https://raw.githubusercontent.com/team-headstart/Financial-Analysis-and-Automation-with-LLMs/main/company_tickers.json"

    # Making a GET request to the URL
    response = requests.get(url)

    # Checking if the request was successful
    if response.status_code == 200:
        # Parse the JSON content directly
        company_tickers = json.loads(response.content.decode('utf-8'))

        # Optionally save the content to a local file for future use
        with open("company_tickers.json", "w", encoding="utf-8") as file:
            json.dump(company_tickers, file, indent=4)

        print("File downloaded successfully and saved as 'company_tickers.json'")
        return company_tickers
    else:
        print(f"Failed to download file. Status code: {response.status_code}")
        return None

company_tickers = get_company_tickers()

File downloaded successfully and saved as 'company_tickers.json'


In [9]:
company_tickers

{'0': {'cik_str': 1045810, 'ticker': 'NVDA', 'title': 'NVIDIA CORP'},
 '1': {'cik_str': 320193, 'ticker': 'AAPL', 'title': 'Apple Inc.'},
 '2': {'cik_str': 789019, 'ticker': 'MSFT', 'title': 'MICROSOFT CORP'},
 '3': {'cik_str': 1018724, 'ticker': 'AMZN', 'title': 'AMAZON COM INC'},
 '4': {'cik_str': 1652044, 'ticker': 'GOOGL', 'title': 'Alphabet Inc.'},
 '5': {'cik_str': 1326801, 'ticker': 'META', 'title': 'Meta Platforms, Inc.'},
 '6': {'cik_str': 1318605, 'ticker': 'TSLA', 'title': 'Tesla, Inc.'},
 '7': {'cik_str': 1067983,
  'ticker': 'BRK-B',
  'title': 'BERKSHIRE HATHAWAY INC'},
 '8': {'cik_str': 1046179,
  'ticker': 'TSM',
  'title': 'TAIWAN SEMICONDUCTOR MANUFACTURING CO LTD'},
 '9': {'cik_str': 1730168, 'ticker': 'AVGO', 'title': 'Broadcom Inc.'},
 '10': {'cik_str': 59478, 'ticker': 'LLY', 'title': 'ELI LILLY & Co'},
 '11': {'cik_str': 19617, 'ticker': 'JPM', 'title': 'JPMORGAN CHASE & CO'},
 '12': {'cik_str': 104169, 'ticker': 'WMT', 'title': 'Walmart Inc.'},
 '13': {'cik_str'

In [10]:
len(company_tickers)

9998

In [11]:
#create our pinecone index
pinecone_api_key = userdata.get("PINECONE_API_KEY")
os.environ['PINECONE_API_KEY'] = pinecone_api_key

index_name = "stocks"
namespace = "stock-descriptions"

hf_embeddings = HuggingFaceEmbeddings()
vectorstore = PineconeVectorStore(index_name=index_name, embedding=hf_embeddings)

  hf_embeddings = HuggingFaceEmbeddings()
  hf_embeddings = HuggingFaceEmbeddings()


In [12]:
# Initialize tracking lists
successful_tickers = []
unsuccessful_tickers = []

# Load existing successful/unsuccessful tickers
try:
    with open('successful_tickers.txt', 'r') as f:
        successful_tickers = [line.strip() for line in f if line.strip()]
    print(f"Loaded {len(successful_tickers)} successful tickers")
except FileNotFoundError:
    print("No existing successful tickers file found")

try:
    with open('unsuccessful_tickers.txt', 'r') as f:
        unsuccessful_tickers = [line.strip() for line in f if line.strip()]
    print(f"Loaded {len(unsuccessful_tickers)} unsuccessful tickers")
except FileNotFoundError:
    print("No existing unsuccessful tickers file found")

Loaded 7790 successful tickers
Loaded 2317 unsuccessful tickers


In [13]:
def process_stock(stock_ticker: str) -> str:
    # Skip if already processed
    if stock_ticker in successful_tickers:
        return f"Already processed {stock_ticker}"

    try:
        # Get and store stock data
        stock_data = get_stock_info(stock_ticker)
        stock_description = stock_data['Business Summary']

        # Store stock description in Pinecone
        vectorstore_from_texts = PineconeVectorStore.from_documents(
            documents=[Document(page_content=stock_description, metadata=stock_data)],
            embedding=hf_embeddings,
            index_name=index_name,
            namespace=namespace
        )

        # Track success
        with open('successful_tickers.txt', 'a') as f:
            f.write(f"{stock_ticker}\n")
        successful_tickers.append(stock_ticker)

        return f"Processed {stock_ticker} successfully"

    except Exception as e:
        # Track failure
        with open('unsuccessful_tickers.txt', 'a') as f:
            f.write(f"{stock_ticker}\n")
        unsuccessful_tickers.append(stock_ticker)

        return f"ERROR processing {stock_ticker}: {e}"

def parallel_process_stocks(tickers: list, max_workers: int = 10) -> None:
    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
        future_to_ticker = {
            executor.submit(process_stock, ticker): ticker
            for ticker in tickers
        }

        for future in concurrent.futures.as_completed(future_to_ticker):
            ticker = future_to_ticker[future]
            try:
                result = future.result()
                print(result)

                # Stop on error
                if result.startswith("ERROR"):
                    print(f"Stopping program due to error in {ticker}")
                    executor.shutdown(wait=False)
                    raise SystemExit(1)

            except Exception as exc:
                print(f'{ticker} generated an exception: {exc}')
                print("Stopping program due to exception")
                executor.shutdown(wait=False)
                raise SystemExit(1)

# Prepare your tickers
tickers_to_process = [company_tickers[num]['ticker'] for num in company_tickers.keys()]

# Process them
parallel_process_stocks(tickers_to_process, max_workers=10)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Already processed CG
Already processed LAB
Already processed MPC
Already processed ERRAF
Already processed AAP
Already processed TARS
Already processed IONQ
Already processed FNMAJ
Already processed RUN
Already processed TRP
Already processed SEG
Already processed YUMC
Already processed ALMMF
Already processed SMR
Already processed IAS
Already processed SU
Already processed BZLFF
Already processed UFPT
Already processed PHAT
Already processed AEP
Already processed IPI
Already processed LBRDK
Already processed IDYA
Already processed DGX
Already processed NGVT
Already processed SQ
Already processed SHECF
Already processed FIHL
Already processed HQL
Already processed KRG
Already processed SNNUF
Already processed ODD
Already processed CTA-PA
Already processed SMC
Already processed MNR
Already processed SNAP
Already processed RF-PC
Already processed HI
Already processed AMBC
Already processed MNST
Already processed SAR
Already

ERROR:yfinance:404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/DAIC%20?modules=financialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&corsDomain=finance.yahoo.com&formatted=false&symbol=DAIC+&crumb=OTB28o6OOiH


Processed DAIC  successfully
Processed GRAF-UN successfully
Processed IIPR-PA successfully
Processed PLLTL successfully
Processed IRET successfully
Processed FMCCK successfully
Processed FMCCM successfully
Processed FMCCP successfully
Processed FMCKP successfully
Processed GAM-PB successfully
Processed BELFB successfully
Processed AHH-PA successfully
Processed GTN-A successfully
Processed DRDGF successfully
Processed CMRE-PB successfully
Processed KBSR successfully
Processed AGM-PD successfully
Processed PTCHF successfully
Processed CMRE-PC successfully
Processed CMRE-PD successfully
Processed MEOBF successfully
Processed BH successfully
Processed KELYB successfully
Processed HLTC successfully
Processed GGT-PE successfully
Processed SMBMF successfully
Processed CELJF successfully
Processed MPSYF successfully
Processed DEFTF successfully
Processed HVT-A successfully
Processed GAMI successfully
Processed ECCX successfully
Processed UMH-PD successfully
Processed MITT-PA successfully
Proce

ERROR:yfinance:500 Server Error: Internal Server Error for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/THCPW?modules=financialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&corsDomain=finance.yahoo.com&formatted=false&symbol=THCPW&crumb=OTB28o6OOiH


Processed LFT-PA successfully
Processed KEY-PL successfully
Processed ADNWW successfully
Processed UNOV successfully
Processed THCPU successfully
Processed IRAAU successfully
Processed IRAAW successfully
Processed THCPW successfully
Processed MHNC successfully
Processed MSCF successfully
Processed VLYPN successfully
Processed ARQQW successfully
Processed SCCC successfully
Processed DSAQW successfully
Processed SCCD successfully
Processed SCCE successfully
Processed SCCG successfully
Processed SCCF successfully
Processed SACC successfully
Processed GDL-PC successfully
Processed SACH-PA successfully
Processed BLEUR successfully
Processed OUST-WT successfully
Processed BLEUU successfully
Processed DSAQU successfully
Processed BLEUW successfully
Processed GFAIW successfully
Processed OUST-WTA successfully
Processed TEN-PF successfully
Processed TEN-PE successfully
Processed CTLPP successfully
Processed NMPWP successfully
Processed NMKCP successfully
Processed NMKBP successfully
Processed N

ERROR:yfinance:404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/ADZCF?modules=financialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&corsDomain=finance.yahoo.com&formatted=false&symbol=ADZCF&crumb=OTB28o6OOiH


Processed DEENF successfully
Processed SBXD-WT successfully
Processed FRBP successfully
Processed LEXXW successfully
Processed SBXD-UN successfully
Processed ADZCF successfully
Processed CTSWF successfully
Processed DGP successfully
Processed OLOXF successfully
Processed DGZ successfully
Processed DZZ successfully
Processed CAPNU successfully
Processed CTSUF successfully
Processed CAPNR successfully
Processed FRSPF successfully
Processed SAT successfully
Processed SAY successfully
Processed SAZ successfully
Processed SAJ successfully
Processed BW-PA successfully
Processed GRND-WT successfully
Processed MKFGW successfully
Processed GMTH successfully
Processed CNDAW successfully
Processed RELIW successfully
Processed CNDAU successfully
Processed CFR-PB successfully
Processed ADSEW successfully
Processed KHOB successfully
Processed NCPLW successfully
Processed WBS-PG successfully
Processed BLUAW successfully
Processed FFHPF successfully
Processed FAXRF successfully
Processed CODI-PC succe

ERROR:yfinance:500 Server Error: Internal Server Error for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/DMYY-WT?modules=financialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&corsDomain=finance.yahoo.com&formatted=false&symbol=DMYY-WT&crumb=OTB28o6OOiH


Processed FVNNR successfully
Processed NVAWW successfully
Processed NVAAF successfully
Processed FITBO successfully
Processed DMYY-WT successfully
Processed CHEB-WT successfully
Processed TOIIW successfully
Processed CHEB-UN successfully
Processed FITBP successfully
Processed DMYY-UN successfully
Processed ATEK-WT successfully
Processed ATEK-UN successfully
Processed LUNRW successfully
Processed IONQ-WT successfully
Processed XFOWW successfully
Processed TBMCR successfully
Processed AGXRW successfully
Processed NMHIW successfully
Processed BURUW successfully
Processed VAL-WT successfully
Processed MDNC successfully
Processed GDEVW successfully
Processed HSPOR successfully
Processed GLTK successfully
Processed CRTDW successfully
Processed HSPOU successfully
Processed HSPOW successfully
Processed HUDAR successfully
Processed AP-WT successfully
Processed ABLLL successfully
Processed ABLLW successfully
Processed WTFCP successfully
Processed RCFA-WT successfully
Processed HUDAU successfully

ERROR:yfinance:500 Server Error: Internal Server Error for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/LUCYW?modules=financialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&corsDomain=finance.yahoo.com&formatted=false&symbol=LUCYW&crumb=OTB28o6OOiH


Processed BYNOW successfully
Processed BYNOU successfully
Processed NEMCL successfully
Processed DYCQU successfully
Processed UHGI successfully
Processed DYCQR successfully
Processed LUCYW successfully
Processed BIPJ successfully
Processed MNESP successfully
Processed BRIPF successfully
Processed VFSWW successfully
Processed BIPI successfully
Processed ASB-PF successfully
Processed BIPH successfully
Processed MNQFF successfully
Processed ATHS successfully
Processed JOCM successfully
Processed CLDT-PA successfully
Processed STRRP successfully
Processed HYZNW successfully
Processed MNLCF successfully
Processed BKKT-WT successfully
Processed MNUFF successfully
Processed SAIHW successfully
Processed BIP-PA successfully
Processed BIP-PB successfully
Processed ATH-PB successfully
Processed ATH-PC successfully
Processed SIMAW successfully
Processed SIMAU successfully
Processed ATH-PE successfully
Processed ATH-PD successfully
Processed ATLCL successfully
Processed ATLCZ successfully
Processed

ERROR:yfinance:500 Server Error: Internal Server Error for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/PFTAW?modules=financialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&corsDomain=finance.yahoo.com&formatted=false&symbol=PFTAW&crumb=OTB28o6OOiH


Processed WHLRL successfully
Processed PFTAW successfully
Processed INPAP successfully
Processed PFTAU successfully
Processed COF-PI successfully
Processed COF-PJ successfully
Processed LIFWW successfully
Processed LIFWZ successfully
Processed PSPX successfully
Processed COF-PK successfully
Processed COF-PL successfully
Processed COF-PN successfully
Processed EPDU successfully
Processed SBEV-WT successfully
Processed CRESW successfully
Processed DHAIW successfully
Processed LMMY successfully
Processed MYPSW successfully
Processed AMBI-WT successfully
Processed PROCW successfully
Processed BMTX-WT successfully
Processed BFRIW successfully
Processed BWVTF successfully
Processed ATMP successfully
Processed TFC-PI successfully
Processed DJP successfully
Processed COWTF successfully
Processed TFC-PR successfully
Processed TFC-PO successfully
Processed EVVAQ successfully
Processed VXZ successfully
Processed VXX successfully
Processed PGMFF successfully
Processed DTSTW successfully
Processed 

In [14]:
# Initialize Pinecone
pc = Pinecone(api_key=userdata.get("PINECONE_API_KEY"),)

# Connect to your Pinecone index
pinecone_index = pc.Index(index_name)

In [15]:
query = "What are some companies that manufacture customer relationship management software?"
raw_query_embedding = get_huggingface_embeddings(query)
top_matches = pinecone_index.query(vector=raw_query_embedding.tolist(), top_k=10, include_metadata=True, namespace=namespace)

In [16]:
top_matches

{'matches': [{'id': 'e9bd6d17-cf65-4739-9f54-dc64d9b276b2',
              'metadata': {'Business Summary': 'Concentrix Corporation engages '
                                               'in the provision of '
                                               'technology-infused customer '
                                               'experience (CX) solutions '
                                               'worldwide. The company '
                                               'provides CX process '
                                               'optimization, technology '
                                               'innovation, front- and '
                                               'back-office automation, '
                                               'analytics, and business '
                                               'transformation services, '
                                               'across various channels of '
                                            

In [17]:
contexts = [item['metadata']['text'] for item in top_matches['matches']]

In [18]:
augmented_query = "<CONTEXT>\n" + "\n\n-------\n\n".join(contexts[ : 10]) + "\n-------\n</CONTEXT>\n\n\n\nMY QUESTION:\n" + query

In [19]:
print(augmented_query)

<CONTEXT>
Concentrix Corporation engages in the provision of technology-infused customer experience (CX) solutions worldwide. The company provides CX process optimization, technology innovation, front- and back-office automation, analytics, and business transformation services, across various channels of communication, such as voice, chat, email, social media, asynchronous messaging, and custom applications. It also offers customer lifecycle management; customer experience/user experience strategy and design; analytics and actionable insights; digital transformation services that design and engineer CX solutions to enable efficient customer self-service and build customer loyalty; customer engagement solutions and services that address the entirety of the customer lifecycle; AI technology that can intelligently act on customer intent to improve customer experience with non-human engagement; voice of the customer and analytics solutions to gather and analyze customer feedback to foster 

In [24]:
!pip install groq
from groq import Groq
client = Groq(
    api_key=userdata.get("GROQ_API_KEY"),
)



In [25]:
system_prompt = f"""You are an expert at providing answers about stocks. Please answer my question provided.
"""

chat_completion = client.chat.completions.create(
    model="llama-3.1-70b-versatile",
    messages=[
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": augmented_query}
    ]
)
response = chat_completion.choices[0].message.content

In [26]:
print(response)

Based on the provided context, some companies that manufacture customer relationship management (CRM) software include:

1. Salesforce, Inc. - They provide a comprehensive CRM platform with various features such as sales, marketing, and customer service tools.
2. Software Effective Solutions, Inc. - They offer CRM software tools for small and medium-sized organizations, and telecom operators, with features like workforce management, call centers, and customer service.
3. HubSpot, Inc. - Their CRM platform includes tools for marketing automation, email, social media, and reporting and analytics, as well as sales and customer service features.
4. Trident Digital Tech Holdings Ltd - Although not primarily a CRM software company, they do offer CRM and inventory management solutions as part of their IT consulting services.
5. SS&C Technologies Holdings, Inc. - They have a CRM solution as part of their software products, but it's more focused on the financial services and healthcare industri

In [27]:
from bs4 import BeautifulSoup