# Scraping Code

In [1]:
import requests
from bs4 import BeautifulSoup
import os

def scrape_links_and_texts():
    # Get user input for the keyword and construct the URL
    keyword = input("Enter a keyword to search: ")
    url = f"https://www.benzinga.com/search?q={keyword}"

    try:
        # Send a GET request to the URL
        response = requests.get(url)

        # Check if the request was successful (status code 200)
        if response.status_code == 200:
            # Parse the HTML content of the page
            soup = BeautifulSoup(response.content, 'html.parser')

            # Find the container with the specified class
            container = soup.find('div', class_='content-feed-list')

            # Find all links within the container
            links = container.find_all('a')

            # Create a directory to store the text files if it doesn't exist
            if not os.path.exists('search_results_texts'):
                os.makedirs('search_results_texts')
            else:
                # Delete previous text files from the folder
                files = os.listdir('search_results_texts')
                for file in files:
                    os.remove(os.path.join('search_results_texts', file))

            # Loop through each link
            for link in links:
                # Get the href attribute of the link
                link_url = link['href']

                # Send a GET request to the link URL
                link_response = requests.get(link_url)

                # Check if the request was successful (status code 200)
                if link_response.status_code == 200:
                    # Parse the HTML content of the link page
                    link_soup = BeautifulSoup(link_response.content, 'html.parser')

                    # Find the first h1 tag and all p tags with class "block core-block"
                    h1_tag = link_soup.find('h1')
                    p_tags = link_soup.find_all('p', class_='block core-block')

                    # Get the text from the h1 tag
                    h1_text = h1_tag.text.strip() if h1_tag else ""

                    # Get the text from all p tags
                    p_texts = [p.text.strip() for p in p_tags]

                    # Combine h1 text and p texts
                    combined_text = h1_text + '\n\n' + '\n'.join(p_texts)

                    # Write the combined text to a text file
                    with open(f'search_results_texts/{keyword}_{links.index(link) + 1}.txt', 'w', encoding='utf-8') as file:
                        file.write(combined_text)
                else:
                    print(f"Failed to retrieve data from link: {link_url}")
        else:
            print("Failed to retrieve search results.")
    except Exception as e:
        print(f"An error occurred: {e}")

In [2]:
# Call the function to initiate the process
scrape_links_and_texts()

Enter a keyword to search: Artificial Intelligence


# Vectorization Code

In [3]:
import time
import pandas as pd
import pinecone
from langchain.vectorstores import Pinecone
from langchain.embeddings import OpenAIEmbeddings
from dotenv import load_dotenv, find_dotenv
from langchain.text_splitter import RecursiveCharacterTextSplitter


load_dotenv(find_dotenv())


OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
PINECONE_INDEX_NAME = os.getenv("PINECONE_INDEX_NAME")
PINECONE_ENVIRONMENT = os.getenv("PINECONE_ENVIRONMENT")


pinecone.init(
    api_key=os.getenv('PINECONE_API_KEY'),  
    environment=os.getenv('PINECONE_ENV')  
)

if PINECONE_INDEX_NAME not in pinecone.list_indexes():
    pinecone.create_index(
        PINECONE_INDEX_NAME,
        dimension=1536,
        metric='cosine'
    )
    # wait for index to finish initialization
    while not pinecone.describe_index(PINECONE_INDEX_NAME).status['ready']:
        time.sleep(1)

index = pinecone.Index(PINECONE_INDEX_NAME)

pinecone.delete_index("langchain-index")        
        
pinecone.create_index("langchain-index", dimension=1536) 

index_name = "langchain-index"

embeddings = OpenAIEmbeddings(deployment="text-embedding-ada-002") #EXPENSIVE - - - USE CAREFULLY

text_field = "text"  # the metadata field that contains our text

# Initialize the vector store object
vectorstore = Pinecone(
    index, embeddings.embed_query, text_field
)

class Document:
    def __init__(self, page_content, metadata):
        self.page_content = page_content
        self.metadata = metadata

    def __repr__(self):
        return f"Document(page_content='{self.page_content}', metadata={self.metadata})"


# Define a simple text splitting function
def split_text(text, chunk_size=1000):
    chunks = []
    for i in range(0, len(text), chunk_size):
        chunk = text[i:i+chunk_size]
        chunks.append(chunk)
    return chunks    


text_folder_path = "./search_results_texts"

# Get a list of all .txt files in the specified folder
txt_files = [f for f in os.listdir(text_folder_path) if f.endswith('.txt')]

# Read the contents of each file and store in a list of Documents
documents_list = []
for file in txt_files:
    with open(os.path.join(text_folder_path, file), 'r', encoding='utf-8') as f:
        content = f.read()
        # Create a Document instance with the file content and metadata
        document = Document(page_content=content, metadata={'text': content})
        print(f"Processing document: {file}")
        # Vectorize the document using Pinecone
        try:
            search = Pinecone.from_documents([document], embeddings, index_name=index_name)
        except Exception as e:
            print(f"Error processing document {file}: {e}")

for i in range(len(documents_list)):
    try:
        document = documents_list[i]
        print(f"Processing document {i + 1}")

        # Split the document into chunks using the split_text function
        chunks = split_text(document.page_content)

        for chunk in chunks:
            chunk_document = Document(page_content=chunk, metadata={'text': chunk})
            # Vectorize the chunk using Pinecone
            search = Pinecone.from_documents([chunk_document], embeddings, index_name=index_name)

    except Exception as e:
        print(f"Error processing document {i + 1}: {e}")

  from tqdm.autonotebook import tqdm


Processing document: Artificial Intelligence_1.txt
Processing document: Artificial Intelligence_10.txt
Processing document: Artificial Intelligence_11.txt
Processing document: Artificial Intelligence_12.txt
Processing document: Artificial Intelligence_13.txt
Processing document: Artificial Intelligence_14.txt
Processing document: Artificial Intelligence_15.txt
Processing document: Artificial Intelligence_16.txt
Processing document: Artificial Intelligence_17.txt
Processing document: Artificial Intelligence_18.txt
Processing document: Artificial Intelligence_19.txt
Processing document: Artificial Intelligence_2.txt
Processing document: Artificial Intelligence_20.txt
Processing document: Artificial Intelligence_3.txt
Processing document: Artificial Intelligence_4.txt
Processing document: Artificial Intelligence_5.txt
Processing document: Artificial Intelligence_6.txt
Processing document: Artificial Intelligence_7.txt
Processing document: Artificial Intelligence_8.txt
Processing document:

In [4]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0002,
 'namespaces': {'': {'vector_count': 20}},
 'total_vector_count': 20}

# ChatBot Code

In [5]:
os.environ['PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION'] = 'python'

from langchain.chat_models import ChatOpenAI
from langchain.schema import (
    SystemMessage,
    HumanMessage,
    AIMessage
)

chat = ChatOpenAI(
    openai_api_key= OPENAI_API_KEY,
    model='gpt-3.5-turbo'
)

In [21]:
messages = [
    SystemMessage(content="""You are a seasoned financial advisor specializing in providing personalized investment advice to clients. Begin by introducing yourself as a trusted advisor committed to helping clients achieve their financial goals through strategic investment decisions.

Based on the client's prompt, propose a specific investment strategy or portfolio allocation that aligns with their goals and risk profile based on the context provided. Suggest diversification across asset classes such as stocks, bonds, and alternative investments, emphasizing the importance of a balanced approach.

Invite the client to ask questions or express any concerns they may have about the proposed investment strategy. Encourage them to share their thoughts on asset allocation, investment selection, and portfolio rebalancing. Wait for their feedback before providing further guidance.

Offer detailed explanations of the investment products or vehicles recommended within the proposed strategy, highlighting their features, benefits, and potential risks. Provide examples of specific securities or funds that fit the client's investment criteria, demonstrating how each contributes to their overall portfolio objectives.

Encourage the client to review their current investment holdings and consider any adjustments or optimizations based on the proposed strategy. Discuss the importance of regular portfolio monitoring and ongoing communication to ensure alignment with their long-term financial goals.

Address any questions or concerns raised by the client with patience and clarity, prioritizing their understanding and confidence in the recommended investment approach. Offer additional resources or educational materials to support their learning and decision-making process.

Conclude the consultation by summarizing the key points discussed and confirming the client's understanding of the proposed investment strategy. Express your commitment to their financial success and offer to schedule a follow-up meeting to review progress and make any necessary adjustments.

Throughout the interaction, maintain a proactive and client-centric approach, focusing on building trust and rapport while providing valuable insights and guidance tailored to the client's individual needs and circumstances.

If you don't have information about what is being asked in the quesr, simply ask the user to change the 'inital keyword' for a better response. 

PLEASE MAKE SURE THE ANSWER YOU PROVIDE IS PRESENTED IN BULLET POINTS.

"""),
]

In [22]:
index_list = pinecone.list_indexes()

from langchain.embeddings.openai import OpenAIEmbeddings

embed_model = OpenAIEmbeddings(model="text-embedding-ada-002")

In [23]:
from langchain.vectorstores import Pinecone

text_field = "text"  # the metadata field that contains our text

# initialize the vector store object
vectorstore = Pinecone(
    index, embed_model.embed_query, text_field
)

def augment_prompt(query: str):
    # get top 3 results from knowledge base
    results = vectorstore.similarity_search(query, k=3)
    # get the text from the results
    source_knowledge = "\n".join([x.page_content for x in results])
    # feed into an augmented prompt
    augmented_prompt = f"""Using the contexts below, answer the query.

    Contexts:
    {source_knowledge}

    Query: {query}"""
    return augmented_prompt



In [24]:
from langchain.vectorstores import Pinecone

query = "What stocks are the best to invest in now?"

vectorstore.similarity_search(query, k=10)

[Document(page_content='The Nvidia Chart Shows The Importance Of Earnings From Tesla, Meta, Microsoft, And Alphabet\n\nTo gain an edge, this is what you need to know today.\nPlease click here for an enlarged chart of NVIDIA Corp NVDA.\nNote the following:\nIn the early trade, money flows are positive in Apple Inc AAPL, Amazon.com, Inc. AMZN, GOOG, META, and NVDA.\nIn the early trade, money flows are neutral in MSFT.\nIn the early trade, money flows are negative in TSLA.\nIn the early trade, money flows are positive in SPDR S&P 500 ETF Trust SPY and Invesco QQQ Trust Series 1 QQQ.\nThe momo crowd is aggressively buying stocks in the early trade.\xa0Smart money is inactive in the early trade.\nThe momo crowd is selling gold in the early trade.\xa0Smart money is inactive in the early trade.\nFor longer-term, please see gold and silver ratings.\nThe most popular ETF for gold is SPDR Gold Trust GLD.\xa0The most popular ETF for silver is iShares Silver Trust SLV.\nThe momo crowd is selling o

In [25]:
print(augment_prompt(query))

Using the contexts below, answer the query.

    Contexts:
    The Nvidia Chart Shows The Importance Of Earnings From Tesla, Meta, Microsoft, And Alphabet

To gain an edge, this is what you need to know today.
Please click here for an enlarged chart of NVIDIA Corp NVDA.
Note the following:
In the early trade, money flows are positive in Apple Inc AAPL, Amazon.com, Inc. AMZN, GOOG, META, and NVDA.
In the early trade, money flows are neutral in MSFT.
In the early trade, money flows are negative in TSLA.
In the early trade, money flows are positive in SPDR S&P 500 ETF Trust SPY and Invesco QQQ Trust Series 1 QQQ.
The momo crowd is aggressively buying stocks in the early trade. Smart money is inactive in the early trade.
The momo crowd is selling gold in the early trade. Smart money is inactive in the early trade.
For longer-term, please see gold and silver ratings.
The most popular ETF for gold is SPDR Gold Trust GLD. The most popular ETF for silver is iShares Silver Trust SLV.
The momo c

In [26]:
# create a new user prompt
prompt = HumanMessage(
    content=augment_prompt(query)
)
# add to messages
messages.append(prompt)

res = chat(messages)

print(res.content)

- Based on the information provided about the current market conditions and money flows, consider the following stocks for potential investment:
  - Positive money flows in Apple Inc (AAPL), Amazon.com, Inc. (AMZN), Google (GOOG, GOOGL), Meta (formerly Facebook), and Nvidia (NVDA) indicate market interest in these companies.
  - Neutral money flows in Microsoft Corp (MSFT) suggest a steady performance.
  - Negative money flows in Tesla Inc (TSLA) might indicate caution.
- Additionally, the SPDR S&P 500 ETF Trust (SPY) and Invesco QQQ Trust Series 1 (QQQ) are seeing positive money flows, reflecting broader market sentiment.
- Consider diversifying your portfolio across these companies based on your risk tolerance and investment goals.
- Given the recent developments in the tech sector and market dynamics, companies like Nvidia and AMD could be interesting investment opportunities.
- It's essential to conduct thorough research, consider your risk profile, and consult with a financial adv

In [27]:
# now create a new user prompt
prompt = HumanMessage(
    content="How are these stocks related to AI?"
)
# add to messages
messages.append(prompt)

# send to chat-gpt
res = chat(messages)

print(res.content)

messages.append(res)

- Nvidia Corp (NVDA) and Advanced Micro Devices, Inc (AMD) are both companies that are closely tied to the artificial intelligence (AI) industry.
- Nvidia is known for its GPU (graphics processing unit) technology that is widely used in AI applications, particularly in deep learning and neural networks.
- Nvidia's GPUs are favored for their parallel processing capabilities, making them ideal for training complex AI models.
- Advanced Micro Devices (AMD) also offers GPUs and CPUs (central processing units) that are used in AI workloads, although Nvidia has traditionally been more dominant in the AI space.
- Both Nvidia and AMD provide hardware solutions that power AI algorithms and applications across various industries, including cloud computing, data centers, autonomous vehicles, and more.
- The demand for AI-related technologies and processing power continues to drive the growth and performance of these companies in the stock market.
- Investors interested in AI technology may find N

In [28]:
# now create a new user prompt
prompt = HumanMessage(
    content="What electric vehicle stocks should I purchase?"
)
# add to messages
messages.append(prompt)

# send to chat-gpt
res = chat(messages)

print(res.content)

messages.append(res)

I'm sorry, but based on the information provided in the query, there is no specific mention of electric vehicle stocks or recommendations related to investing in them. To provide you with accurate and tailored recommendations on electric vehicle stocks, please consider updating the initial keyword or providing more context related to electric vehicles or the automotive industry. This way, I can offer you relevant insights and suggestions on potential electric vehicle investments.
