In [1]:
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time
from transformers import pipeline
from ipywidgets import interact, widgets
from ipywidgets import interactive
from IPython.display import HTML, display, clear_output
from datetime import datetime, timedelta
import plotly.graph_objects as go
import numpy as np

In [2]:
def fetch_news(url, search_query):
    
    edge_driver_path = './msedgedriver.exe'  # Change this to the path of your Edge WebDriver
    driver = webdriver.Edge(executable_path=edge_driver_path)
    driver.get(url)
    
    search_bar = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.XPATH, '//input[@id="q"]'))
    )  
    
    search_bar.send_keys(search_query)
    time.sleep(5) 
    search_results_html = driver.page_source
    soup = BeautifulSoup(search_results_html, 'html.parser')

    news_elements = soup.find_all('li', class_='box item')
    if len(news_elements) > 0 :  
        headlines = []
        summaries = []
        date_times = []
        article_urls = []

        for news_element in news_elements:
            headline = news_element.find('h2').text
            summary = news_element.find('div').text
            datetime_element = news_element.find('span', class_='date')
            date_time = datetime_element.text if datetime_element else 'Date and time not available'
            article_url = news_element.find('a')['href']

            headlines.append(headline)
            summaries.append(summary)
            date_times.append(date_time)
            article_urls.append(article_url)

          # Create a DataFrame
            news_df = pd.DataFrame({
            'Headline': headlines,
            'Summary': summaries,
            'Datetime': date_times,
            'URL': article_urls
          })
        driver.quit()
        return news_df
    else:
        driver.quit()
        return None


In [3]:
def sentiment_converter(news_dataframe):
    model = pipeline('sentiment-analysis')
    
#     news_dataframe["headline_sentiment"] = news_dataframe.Headline.apply(model)
#     news_dataframe.headline_sentiment = news_dataframe.headline_sentiment.apply(lambda x: -x[0]['score'] if x[0]['label'] == 'NEGATIVE' else x[0]['score'])
    
    news_dataframe["summary_sentiment"] = news_dataframe.Summary.apply(model)
    news_dataframe.summary_sentiment = news_dataframe.summary_sentiment.apply(lambda x: -x[0]['score'] if x[0]['label'] == 'NEGATIVE' else x[0]['score'])
    
#     news_dataframe['secs_ago'] = news_dataframe.Datetime.apply(secs_converter)
    
    return news_dataframe

In [4]:
def convert_to_datetime(value):
    if 'minute' in value:
        return datetime.now() - timedelta(minutes=float(value.split()[0]))
    elif 'hour' in value:
        return datetime.now() - timedelta(hours=float(value.split()[0]))
    elif 'day' in value:
        return datetime.now() - timedelta(days=float(value.split()[0]))
    else:
        return None

In [12]:
def update_plot(interval_minutes, df):
    clear_output(wait=True)  # Clear the previous output
    
    # Determine the time range based on the selected interval
    if interval_minutes == 1:
        time_range = pd.to_timedelta('1H')
    elif interval_minutes == 3:
        time_range = pd.to_timedelta('3H')
    elif interval_minutes == 5:
        time_range = pd.to_timedelta('6H')
    else:
        time_range = pd.to_timedelta('24H')
    
    # Resample the data to the specified interval and calculate mean sentiment
    df_resampled = df.resample(f'{interval_minutes}T').mean().last(time_range)

    # Extract time from the timestamp
    df_resampled['Time'] = df_resampled.index.time

    # Format the x-axis labels to show both date and time at midnight,
    # and show only time for other intervals
    df_resampled['x_labels'] = [
        f'{date.strftime("%Y-%m-%d %H:%M")}' if (time >= pd.Timestamp('00:00:00').time() and time <= pd.Timestamp('00:30:00').time()) else f'{time.strftime("%H:%M")}'
        for date, time in zip(df_resampled.index, df_resampled['Time'])
    ]

    # Set color based on sentiment directly without a color scale
    df_resampled['colors'] = ['green' if sentiment >= 0 else 'red' for sentiment in df_resampled['summary_sentiment']]

    # Create a Plotly figure with a consistent size
    fig = go.Figure()

    # Add stacked bar traces
    fig.add_trace(go.Bar(
        x=df_resampled.index,
        y=df_resampled['summary_sentiment'],
        marker_color=df_resampled['colors'],
        hovertemplate='Datetime: %{x|%Y-%m-%d %H:%M}<br>Mean Sentiment: %{y:.4f}',
    ))

    # Set x-axis labels and tilt them to the left
    fig.update_layout(
        title=f'Stacked Plot with Mean Sentiment for Each {interval_minutes}-Minute Interval',
        xaxis_title='Datetime',
    )

    # Calculate tickvals and ticktext
    if interval_minutes != 0:
        tickvals = np.linspace(0, len(df_resampled) - 1, num=len(df_resampled) // max(1, int(30 / interval_minutes)), endpoint=True, dtype=int)
        ticktext = [pd.Timestamp(x).ceil('30T').strftime('%Y-%m-%d %H:%M') for x in df_resampled.index[tickvals]]
    else:
        tickvals = np.arange(0, len(df_resampled), dtype=int)
        ticktext = [df_resampled['x_labels'].iloc[i] for i in tickvals]

    # Set x-axis ticks
    fig.update_layout(
        xaxis=dict(
            tickmode='array',
            tickvals=df_resampled.index[::int(60 / interval_minutes)],
            ticktext=df_resampled['x_labels'][::int(60 / interval_minutes)],
        
#         tickvals=df_resampled.index[tickvals].tolist(),
#         ticktext=ticktext,
            tickangle=-45,  # Tilt the labels to the left
        ),
    )

    # Remove the color legend and color scale
    fig.update_layout(
        showlegend=False,
    )

    # Consistent figure size
    fig.update_layout(
        autosize=False,
        width=1000,
        height=600,
    )

    # Show the plot
    fig.show()




In [25]:
def static_sentiment_check(url, search_query):
    
    news_dataframe = fetch_news(url, search_query)

    if news_dataframe is None:
         print("No news found")
    else:
        news_dataframe = sentiment_converter(news_dataframe)
        news_dataframe['Datetime'] = news_dataframe['Datetime'].apply(convert_to_datetime)
        
        news_dataframe = news_dataframe.sort_values(by='Datetime')
        news_dataframe['Datetime'] = pd.to_datetime(news_dataframe['Datetime'])
        news_dataframe.set_index('Datetime', inplace=True)
        
        interval_dropdown = widgets.Dropdown(
            options=[1, 3, 5, 15, 30, 60],  # Options in minutes
            value=5,  # Initial value
            description='Interval (in min):',
            style={'description_width': 'initial'},  # To allow space for longer description
            format='0',  # To remove trailing '.0' in the displayed values
        )

        # Create an interactive widget
        interactive_plot = interactive(update_plot, interval_minutes=interval_dropdown, df=widgets.fixed(news_dataframe))

        # Display the widget and initial plot
        display(interactive_plot)
        display(news_dataframe.tail())

In [28]:

url = "https://pulse.zerodha.com/"
search_query = "jk tyre"
static_sentiment_check(url, search_query)

end_datetime_input = widgets.Text(
    value='2024-01-31 12:00:00',  # Provide a default end datetime
    description='End Datetime (YYYY-MM-DD HH:mm:ss):'
)


No model was supplied, defaulted to distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


interactive(children=(Dropdown(description='Interval (in min):', index=2, options=(1, 3, 5, 15, 30, 60), style…

Unnamed: 0_level_0,Headline,Summary,URL,summary_sentiment
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2024-01-18 18:07:30.402593,"JK Tyre, Saregama among 6 smallcap stock picks...",Emkay lists its top smallcap picks for near term,https://economictimes.indiatimes.com/markets/s...,0.896922
