In [1]:
import time
import pandas as pd
import pickle
import numpy as np
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from transformers import BertTokenizer, BertForSequenceClassification, pipeline
from webdriver_manager.chrome import ChromeDriverManager

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def setup_driver():
    """Set up the Selenium WebDriver."""
    chrome_options = Options()
    # chrome_options.add_argument("--headless")  # Run in headless mode
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrome_options)
    return driver


def scrape_google_news(driver):
    """Scrape Google News for Infosys-related articles."""
    driver.get("https://news.google.com/search?q=infosys%20when%3A1d&hl=en-IN&gl=IN&ceid=IN%3Aen")
    time.sleep(5)
    try:
        articles = driver.find_elements(By.CLASS_NAME, "JtKRv")
        articles_text = [article.text for article in articles]
    except Exception as e:
        print("Error occurred:", str(e))
    finally:
        driver.quit()
    return articles_text


def analyze_sentiment(articles_text):
    """Perform sentiment analysis on the articles."""
    MODEL_NAME = "ProsusAI/finbert"
    tokenizer = BertTokenizer.from_pretrained(MODEL_NAME)
    model = BertForSequenceClassification.from_pretrained(MODEL_NAME)
    sentiment_analyzer = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
    results = sentiment_analyzer(articles_text)
    labels = [result["label"] for result in results]
    scores = [result["score"] for result in results]
    return labels, scores


def calculate_sentiment_mode(labels, scores):
    """Calculate the mode of the sentiment and its mean score."""
    labels_series = pd.Series(labels)
    sentiment_mode = labels_series.mode()[0]
    df = pd.DataFrame({'Sentiment': labels, 'Sentiment Score': scores})
    sentiment_mode_data = df[df['Sentiment'] == sentiment_mode]
    mean_sentiment_score = sentiment_mode_data['Sentiment Score'].mean()
    return sentiment_mode, mean_sentiment_score


def scrape_nse_data(driver):
    """Scrape NSE website for stock data."""
    driver.get("https://www.nseindia.com/get-quotes/equity?symbol=INFY")
    time.sleep(5)
    data = {}
    try:
        ltp_element = driver.find_element(By.ID, "quoteLtp")
        data["Last Traded Price (LTP)"] = ltp_element.text
        table_xpath = '//table[@id="priceInfoTable"]/tbody/tr/td'
        table_cells = driver.find_elements(By.XPATH, table_xpath)
        table_headers = ["Prev. Close", "Open", "High", "Low", "Close", "Indicative Close", "VWAP", "Adjusted Price"]
        table_values = [cell.text for cell in table_cells]
        for header, value in zip(table_headers, table_values):
            data[header] = value
    except Exception as e:
        print("Error occurred:", str(e))
    finally:
        driver.quit()
    return data

def preprocess_features(data, sentiment_mode, mean_sentiment_score):
    """Preprocess features for prediction."""
    sentiment_positive = sentiment_mode == 'positive'
    sentiment_negative = sentiment_mode == 'negative'
    sentiment_neutral = sentiment_mode == 'neutral'

    features = pd.DataFrame([{
        "OPEN": data['Open'],
        "HIGH": data['High'],
        "LOW": data['Low'],
        "PREV. CLOSE": data['Prev. Close'],
        "ltp": data['Last Traded Price (LTP)'],
        "vwap": data['VWAP'],
        "Sentiment Score": mean_sentiment_score,
        "Sentiment_negative": sentiment_negative,
        "Sentiment_neutral": sentiment_neutral,
        "Sentiment_positive": sentiment_positive
    }])

    columns_to_convert = ["OPEN", "HIGH", "LOW", "PREV. CLOSE", "ltp", "vwap"]
    for column in columns_to_convert:
        features[column] = features[column].astype(str)
        features[column] = features[column].str.replace(",", "", regex=False)  # Remove commas from the strings
        # features[column] = features[column].str.replace(r"[^\\d.]", "", regex=True)
        features[column] = pd.to_numeric(features[column], errors="coerce")
    return features


def load_model():
    """Load the saved stock price prediction model."""
    with open("stock_price_model.pkl", "rb") as file:
        model = pickle.load(file)
    return model


def predict_price(model, features):
    """Predict the stock price using the model."""
    predicted_price = model.predict(features)
    return predicted_price[0]

In [11]:
def main():
    """Main function to orchestrate the workflow."""
    driver = setup_driver()

    # Step 1: Scrape Google News
    articles_text = scrape_google_news(driver)

    # Step 2: Perform sentiment analysis
    labels, scores = analyze_sentiment(articles_text)

    # Step 3: Calculate sentiment mode and mean score
    sentiment_mode, mean_sentiment_score = calculate_sentiment_mode(labels, scores)

    # Step 4: Scrape NSE data
    driver = setup_driver()  # Reinitialize driver for NSE scraping
    nse_data = scrape_nse_data(driver)

    # Step 5: Preprocess features
    features = preprocess_features(nse_data, sentiment_mode, mean_sentiment_score)

    # Step 6: Load model and predict price
    model = load_model()
    predicted_price = predict_price(model, features)

    print(f"Predicted Closing Price: {predicted_price:.2f}")


if __name__ == "__main__":
    main()

Device set to use cpu


Predicted Closing Price: 1628.45
