In [1]:
# import statements
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
import torch # for finbert model
from transformers import BertTokenizer, BertForSequenceClassification
from transformers import pipeline
import pandas as pd
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pickle
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


First get the four columns sentiment_score, sentiment_positive, sentiment_negative and senitment_neutral by scraping the news page and finding the most frequent sentiment and its score

In [29]:
# Set Chrome options
chrome_options = Options()
chrome_options.add_argument("--headless")  # Run in headless mode (no UI)

driver = webdriver.Chrome(service = Service(ChromeDriverManager().install()))
# driver = webdriver.Chrome(service = Service(ChromeDriverManager().install()), options=chrome_options)

In [30]:
# Step 1: Open Google's homepage
driver.get("https://news.google.com/search?q=infosys%20when%3A1d&hl=en-IN&gl=IN&ceid=IN%3Aen")
time.sleep(3) 

In [31]:
# Extract article titles
articles = driver.find_elements(By.CLASS_NAME, "JtKRv")  # Target anchor tags with class "JtKRv"

# Extract and print the titles
for article in articles[:5]:
    title = article.text  # Extract the text inside the <a> tag
    print(title)

Leaked hiring memo sparks outrage over elite-only recruitment; blacklists Infosys, TCS, Wipro, Cognizant
IT Giant To Announce Q4FY25 Results, Final Dividend Recommendation; Check Dates For Infosys Q4 Updates
Infosys Share Price Highlights : Infosys closed today at ₹1631.70, up 2.48% from yesterday's ₹1592.20 | Stock Market News
Infosys, Wipro, TCS among tech giants blacklisted in US recruiter's leaked email: 'Not the right fit'
Sensex, Nifty Extend Winning Streak: HDFC Bank, Infosys Lead


In [32]:
len(articles) # no. of articles

45

In [26]:
# Load FinBERT model
MODEL_NAME = "ProsusAI/finbert"
tokenizer = BertTokenizer.from_pretrained(MODEL_NAME)
model = BertForSequenceClassification.from_pretrained(MODEL_NAME)

# Load the pre-trained FinBERT model
sentiment_analyzer = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)

Device set to use cpu


In [None]:
# Extract text from articles web elements
articles_text = [article.text for article in articles]
driver.quit()

# Perform sentiment analysis
results = sentiment_analyzer(articles_text)

In [None]:
# Extract sentiment labels and scores
current_news_labels = [result["label"] for result in results] # sentiments
current_news_scores = [result["score"] for result in results]

In [39]:
# finding the mode for 'sentiment' 
labels_series = pd.Series(current_news_labels)
sentiment_mode = labels_series.mode()[0]

print(f"The mode for the 'Sentiment' is: {sentiment_mode}")

The mode for the 'Sentiment' is: neutral


In [40]:
# Convert the list of labels and scores to a DataFrame
df = pd.DataFrame({'Sentiment': current_news_labels, 'Sentiment Score': current_news_scores})

# Find the mode of the sentiment labels
sentiment_mode = df['Sentiment'].mode()[0]

# Filter the DataFrame to only include rows where the sentiment is equal to the mode
sentiment_mode_data = df[df['Sentiment'] == sentiment_mode]

# Calculate the mean of the sentiment scores for the mode
mean_sentiment_score = sentiment_mode_data['Sentiment Score'].mean()

print(f"The mode for the 'Sentiment' column is: {sentiment_mode}")
print(f"The mean sentiment score for the mode '{sentiment_mode}' is: {mean_sentiment_score}")

The mode for the 'Sentiment' column is: neutral
The mean sentiment score for the mode 'neutral' is: 0.7947584199905395


In [41]:
sentiment_positive = sentiment_mode =='positive'
sentiment_negative = sentiment_mode =='negative'
sentiment_neutral = sentiment_mode =='neutral'

print(sentiment_positive, sentiment_negative, sentiment_neutral)

False False True


Now, scrape NSE website to get the other columns like OPEN, HIGH, LOW, PREV. CLOSE, ltp, close, vwap

In [2]:
# Set Chrome options
chrome_options = Options()
chrome_options.add_argument("--headless")  # Run in headless mode (no UI)
chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")

driver = webdriver.Chrome(service = Service(ChromeDriverManager().install()), options=chrome_options)

In [3]:
# Navigate to the page
driver.get("https://www.nseindia.com/get-quotes/equity?symbol=INFY")
time.sleep(5)

In [4]:
# store all the extracted values in a Dictionary
data = {}
try:
    # Extracting the "Last Traded Price (LTP)"
    ltp_element = WebDriverWait(driver, 15).until(
        EC.visibility_of_element_located((By.ID, "quoteLtp"))
    )
    ltp = ltp_element.text
    data["Last Traded Price (LTP)"] = ltp
    
    # Extracting the "Price Info Status" (Change in price and percentage)
    price_info_status = driver.find_element(By.ID, "priceInfoStatus").text
    data["Price Info Status"] = price_info_status
    
    # Extracting values from the table
    table_xpath = '//table[@id="priceInfoTable"]/tbody/tr/td'
    table_cells = driver.find_elements(By.XPATH, table_xpath)

    # Mapping table headers to extracted values
    table_headers = ["Prev. Close", "Open", "High", "Low", "Close", "Indicative Close", "VWAP", "Adjusted Price"]
    table_values = [cell.text for cell in table_cells]

    for header, value in zip(table_headers, table_values):
        data[header] = value

except Exception as e:
    print("Error occurred:", str(e))

finally:
    # Quit the driver after execution
    driver.quit()

In [5]:
print(data)

{'Last Traded Price (LTP)': '1,569.00', 'Price Info Status': '-34.55 (-2.15 %)', 'Prev. Close': '1,603.55', 'Open': '1,590.00', 'High': '1,598.45', 'Low': '1,560.00', 'Close': '1,570.65', 'Indicative Close': '-', 'VWAP': '1,576.42', 'Adjusted Price': '-'}


## Make the Prediction

In [49]:
# load the saved model
with open("stock_price_model.pkl", "rb") as file:
    stock_model = pickle.load(file)

In [62]:
# Create a DataFrame or NumPy array with the input features [colnames same as model training]
features = pd.DataFrame([{
    "OPEN": data['Open'],
    "HIGH": data['High'],
    "LOW": data['Low'],
    "PREV. CLOSE": data['Prev. Close'],
    "ltp": data['Last Traded Price (LTP)'],
    "vwap": data['VWAP'],
    "Sentiment Score": mean_sentiment_score,
    "Sentiment_negative": sentiment_negative,
    "Sentiment_neutral": sentiment_neutral,
    "Sentiment_positive": sentiment_positive
}])

features.head()

Unnamed: 0,OPEN,HIGH,LOW,PREV. CLOSE,ltp,vwap,Sentiment Score,Sentiment_negative,Sentiment_neutral,Sentiment_positive
0,1605.0,1636.15,1605.0,1592.75,1619.25,1626.09,0.794758,False,True,False


In [64]:
# Convert numerical columns to floating-point numbers
columns_to_convert = ["OPEN", "HIGH", "LOW", "PREV. CLOSE", "ltp", "vwap"]

for column in columns_to_convert:
    features[column] = features[column].astype(str)  # Ensure values are strings
    features[column] = features[column].str.replace(r"[^\d.]", "", regex=True)  # Remove non-numeric characters
    features[column] = pd.to_numeric(features[column], errors="coerce")  # Convert to float, keeping NaN for non-convertible values

In [65]:
features.dtypes

OPEN                  float64
HIGH                  float64
LOW                   float64
PREV. CLOSE           float64
ltp                   float64
vwap                  float64
Sentiment Score       float64
Sentiment_negative       bool
Sentiment_neutral        bool
Sentiment_positive       bool
dtype: object

In [67]:
# Make the prediction
predicted_price = stock_model.predict(features)
print(f"Predicted Closing Price: {predicted_price[0]:.2f}")

Predicted Closing Price: 1628.45


## Automating the Prediction