In [1]:
from transformers import pipeline
import numpy as np
import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime, timedelta
import time
from IPython.display import HTML, display, clear_output
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
from matplotlib.dates import DateFormatter
from ipywidgets import interact, widgets
from matplotlib.ticker import FuncFormatter
from plotly.subplots import make_subplots
import plotly.graph_objects as go
from ipywidgets import interactive

In [2]:
def fetch_news(url):
    # Send an HTTP request to the website
    response = requests.get(url)

    if response.status_code == 200:

    # Parse the HTML content of the page
        soup = BeautifulSoup(response.text, 'html.parser')
        # Find and extract the news elements
        news_elements = soup.find_all('li', class_='box item')

        headlines = []
        summaries = []
        date_times = []
        article_urls = []

        for news_element in news_elements:
            headline = news_element.find('h2').text
            summary = news_element.find('div').text
            datetime_element = news_element.find('span', class_='date')
            date_time = datetime_element.text if datetime_element else 'Date and time not available'
            article_url = news_element.find('a')['href']

            headlines.append(headline)
            summaries.append(summary)
            date_times.append(date_time)
            article_urls.append(article_url)

          # Create a DataFrame
            news_df = pd.DataFrame({
            'Headline': headlines,
            'Summary': summaries,
            'Datetime': date_times,
            'URL': article_urls
          })
        return news_df
    else:
            print(f"Failed to retrieve the page. Status code: {response.status_code}")
            return None

In [3]:
def sentiment_converter(news_dataframe):
    model = pipeline('sentiment-analysis')
    
#     news_dataframe["headline_sentiment"] = news_dataframe.Headline.apply(model)
#     news_dataframe.headline_sentiment = news_dataframe.headline_sentiment.apply(lambda x: -x[0]['score'] if x[0]['label'] == 'NEGATIVE' else x[0]['score'])
    
    news_dataframe["summary_sentiment"] = news_dataframe.Summary.apply(model)
    news_dataframe.summary_sentiment = news_dataframe.summary_sentiment.apply(lambda x: -x[0]['score'] if x[0]['label'] == 'NEGATIVE' else x[0]['score'])
    
#     news_dataframe['secs_ago'] = news_dataframe.Datetime.apply(secs_converter)
    
    return news_dataframe

In [4]:
def convert_to_datetime(value):
    if 'minute' in value:
        return datetime.now() - timedelta(minutes=float(value.split()[0]))
    elif 'hour' in value:
        return datetime.now() - timedelta(hours=float(value.split()[0]))
    elif 'day' in value:
        return datetime.now() - timedelta(days=float(value.split()[0]))
    else:
        return None

In [5]:
def static_sentiment_check(url):
    
    news_dataframe = fetch_news(url)

    news_dataframe = sentiment_converter(news_dataframe)
#     time_mean_sentiment(news_dataframe)
    news_dataframe['Datetime'] = news_dataframe['Datetime'].apply(convert_to_datetime)
    

    if news_dataframe is not None:
        return news_dataframe


In [6]:
def auto_refresh(url, interval_seconds, num_refreshes):
    for _ in range(num_refreshes):
        

        news_dataframe = fetch_news(url)
#         pd.set_option('display.max_colwidth', None)
        
        news_dataframe = sentiment_converter(news_dataframe)
        time_mean_sentiment(news_dataframe)
        
        if news_dataframe is not None:
#             pd.set_option('display.max_colwidth', None)
            print("Headlines ") 
            print(news_dataframe[['Headline', 'Datetime']].head(2))
            print("\n")


        time.sleep(interval_seconds)

In [7]:
url = "https://pulse.zerodha.com/"

# auto_refresh(url, interval_seconds=120, num_refreshes=5)

In [8]:
df = static_sentiment_check(url)
df = df.sort_values(by='Datetime')
# Set the 'Datetime' column as the index
df['Datetime'] = pd.to_datetime(df['Datetime'])
df.set_index('Datetime', inplace=True)


No model was supplied, defaulted to distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


In [9]:

# Assuming your dataset is stored in a DataFrame named 'df'
# Make sure you have the 'Timestamp' and 'summary_sentiment' columns

# Function to update the plot based on the selected interval
def update_plot(interval_minutes):
    clear_output(wait=True)  # Clear the previous output
    
    # Determine the time range based on the selected interval
    if interval_minutes == 1:
        time_range = pd.to_timedelta('1H')
    elif interval_minutes == 3:
        time_range = pd.to_timedelta('3H')
    elif interval_minutes == 5:
        time_range = pd.to_timedelta('6H')
    else:
        time_range = pd.to_timedelta('24H')
    
    # Resample the data to the specified interval and calculate mean sentiment
    df_resampled = df.resample(f'{interval_minutes}T').mean().last(time_range)

    # Extract time from the timestamp
    df_resampled['Time'] = df_resampled.index.time

    # Format the x-axis labels to show both date and time at midnight,
    # and show only time for other intervals
    df_resampled['x_labels'] = [
        f'{date.strftime("%Y-%m-%d %H:%M")}' if (time >= pd.Timestamp('00:00:00').time() and time <= pd.Timestamp('00:30:00').time()) else f'{time.strftime("%H:%M")}'
        for date, time in zip(df_resampled.index, df_resampled['Time'])
    ]

    # Set color based on sentiment directly without a color scale
    df_resampled['colors'] = ['green' if sentiment >= 0 else 'red' for sentiment in df_resampled['summary_sentiment']]

    # Create a Plotly figure with a consistent size
    fig = go.Figure()

    # Add stacked bar traces
    fig.add_trace(go.Bar(
        x=df_resampled.index,
        y=df_resampled['summary_sentiment'],
        marker_color=df_resampled['colors'],
        hovertemplate='Datetime: %{x|%Y-%m-%d %H:%M}<br>Mean Sentiment: %{y:.4f}',
    ))

    # Set x-axis labels and tilt them to the left
    fig.update_layout(
        title=f'Stacked Plot with Mean Sentiment for Each {interval_minutes}-Minute Interval',
        xaxis_title='Datetime',
    )

    # Calculate tickvals and ticktext
    if interval_minutes != 0:
        tickvals = np.linspace(0, len(df_resampled) - 1, num=len(df_resampled) // max(1, int(30 / interval_minutes)), endpoint=True, dtype=int)
        ticktext = [pd.Timestamp(x).ceil('30T').strftime('%Y-%m-%d %H:%M') for x in df_resampled.index[tickvals]]
    else:
        tickvals = np.arange(0, len(df_resampled), dtype=int)
        ticktext = [df_resampled['x_labels'].iloc[i] for i in tickvals]

    # Set x-axis ticks
    fig.update_layout(
        xaxis=dict(
            tickmode='array',
            tickvals=df_resampled.index[::int(60 / interval_minutes)],
            ticktext=df_resampled['x_labels'][::int(60 / interval_minutes)],
        
#         tickvals=df_resampled.index[tickvals].tolist(),
#         ticktext=ticktext,
            tickangle=-45,  # Tilt the labels to the left
        ),
    )

    # Remove the color legend and color scale
    fig.update_layout(
        showlegend=False,
    )

    # Consistent figure size
    fig.update_layout(
        autosize=False,
        width=1000,
        height=600,
    )

    # Show the plot
    fig.show()

# Create a dropdown widget for selecting the interval
interval_dropdown = widgets.Dropdown(
    options=[1, 3, 5, 15, 30, 60],  # Options in minutes
    value=5,  # Initial value
    description='Interval (in min):',
    style={'description_width': 'initial'},  # To allow space for longer description
    format='0',  # To remove trailing '.0' in the displayed values
)

# Create an interactive widget
interactive_plot = interactive(update_plot, interval_minutes=interval_dropdown)

# Display the widget and initial plot
display(interactive_plot)


interactive(children=(Dropdown(description='Interval (in min):', index=2, options=(1, 3, 5, 15, 30, 60), style…

In [10]:
df.tail()

Unnamed: 0_level_0,Headline,Summary,URL,summary_sentiment
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2024-01-18 20:56:11.721549,FinAGG Technologies raises $11 million in Seri...,"The fintech, which provides working capital lo...",https://www.moneycontrol.com/news/technology/f...,-0.96519
2024-01-18 20:57:11.721549,Orry's viral clip from Koffee with Karan has t...,'I take great pride in being everywhere -- Orr...,https://www.moneycontrol.com/news/trends/orry-...,-0.529088
2024-01-18 20:58:11.721549,Puravankara posts 89% growth in pre-sales in 9...,,https://www.thehindu.com/business/puravankara-...,0.748121
2024-01-18 21:05:11.721549,Passengers grab phone snatcher through train w...,"The undated video, reportedly from Bihar’s Bha...",https://www.moneycontrol.com/news/trends/passe...,-0.997367
2024-01-18 21:12:11.721549,Epack Durable IPO | Institutional investors pi...,Epack Durable IPO | Optimix Wholesale Global E...,https://www.moneycontrol.com/news/business/ipo...,0.79769


In [12]:
import requests
from bs4 import BeautifulSoup

# Replace the URL with the actual website URL
url = "https://pulse.zerodha.com/"

# Define your search query
search_query = 'HDFC Bank'

# Perform a GET request to the website
response = requests.get(url)

# Check if the request was successful (status code 200)
if response.status_code == 200:
    # Parse the HTML content using BeautifulSoup
    soup = BeautifulSoup(response.text, 'html.parser')

    # Find the search bar element by its HTML attribute (e.g., name, id, class)
    search_bar = soup.find('input', {'name': 'search'})  # Replace with the actual attribute

    # Extract any additional form parameters if needed
    form_data = {
        'search': search_query,
        # Add any other form parameters as needed
    }

    # Perform a POST request with the search query
    search_results = requests.post(url, data=form_data)

    # Parse the search results using BeautifulSoup
    search_soup = BeautifulSoup(search_results.text, 'html.parser')

    # Extract and print relevant information from the search results
#     headlines = search_soup.find_all('h2', {'class': 'headline-class'})
    news_elements = soup.find_all('li', class_='box item')
    for news_element in news_elements:
        headline = news_element.find('h2').text
        headlines.append(headline)
    print(len(headlines))
    if len(headlines) >0:
            for headline in headlines:
                print(headline)
        
        
#         for index, headline in enumerate(headlines, start=1):
#             print(f"{index}. {headline.text}")
    else:
        print("No headlines found.")

else:
    print(f"Failed to retrieve the page. Status code: {response.status_code}")


1576
India takes investment spotlight while risks weigh in China: Suntory CEO Takeshi Niinami
ayodhya-ram-mandir-ram-janmabhumi-inauguration-live-updates
China's Mutual Funds Implode At Fastest Pace In Five Years As Stocks Sink
India: Marketer’s dream or Piketty’s nightmare?
Ramkrishna Forgings is riding the CV upswing, but stock at an elevated level
China Swap Market Goes Into Overdrive As Traders Seek FX Bypass
Nifty bull run to last till April, but be very cautious from mid-May to July: Astro cycle expert
Konstelec Engineers SME IPO: Check price band, GMP, financials, risks and listing date
Morning or evening: Which is the better time to go on a walk for best fitness results?
Dixon Tech may trade lower after inspection by DRI
Agri Picks Report January 19, 2024: Geojit
Bloodbath among risk-on assets – Disappointing HDFC earnings, or something more systemic?
Dollar Dominance Is Triggering Intervention Fears Across Markets
Q3 results today: RIL, HUL, Paytm, UltraTech among 46 companies

In [13]:
import requests
from bs4 import BeautifulSoup

# Replace the URL with the actual website URL


# Define your search query
search_query = 'HDFC Bank'

# Perform a GET request to the website
response = requests.get(url)

# Check if the request was successful (status code 200)
if response.status_code == 200:
    # Parse the HTML content using BeautifulSoup
    soup = BeautifulSoup(response.text, 'html.parser')

    # Find the search form element by its HTML attribute (e.g., name, id, class)
    search_form = soup.find('form', {'id': 'search-form'})  # Replace with the actual attribute

    # Extract the action URL and any other form parameters
    form_action = search_form.get('action')
    form_data = {
        'search': search_query,
        # Add any other form parameters as needed
    }

    # Perform a POST request with the search query
    search_results = requests.post(url + form_action, data=form_data)

    # Parse the search results using BeautifulSoup
    search_soup = BeautifulSoup(search_results.text, 'html.parser')

    # Extract and print relevant information from the search results
    headlines = search_soup.find_all('h2', {'class': 'headline-class'})

    if headlines:
        for index, headline in enumerate(headlines, start=1):
            print(f"{index}. {headline.text}")
    else:
        print("No headlines found.")

else:
    print(f"Failed to retrieve the page. Status code: {response.status_code}")


AttributeError: 'NoneType' object has no attribute 'get'