In [130]:
import numpy as np
import pandas as pd
from bs4 import BeautifulSoup
from urllib.request import Request, urlopen
from nltk.sentiment import SentimentIntensityAnalyzer
import requests
import re

# List of FNO Stocks

In [167]:
fno_list = pd.read_csv('fno_stocks_list.csv')

drop_words = [' Limited', ' Ltd', ' Industries', 'The ', ' (India)', ' (india)',' Enterprises',' Enterprise', ' Company', ' Laboratories', ' Corporation']
fno_list['Stock'] = fno_list['Stock Name']

for word in drop_words:
    fno_list['Stock'] = fno_list['Stock'].map(lambda x: x.replace(word, ''))
    
fno_list['Stock'] = fno_list['Stock'].map(lambda x: x.lower())
fno_list['Symbol'] = fno_list['Symbol'].map(lambda x: x.lower())

symbols = fno_list['Symbol'].values
stocks = fno_list['Stock'].values
filter = sorted(list(set(list(symbols) + list(stocks))))


# Request Data from Pulse

In [97]:
link = 'https://pulse.zerodha.com'

request_headers = {
    'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2.1 Safari/605.1.15'
}

response = requests.get(link, headers=request_headers)
html = BeautifulSoup(response.text, 'html')

#Removing articles that show as similar article under a main article
for ul in html.select('ul.similar'):
    ul.decompose()


headlines = html.select('h2.title')
descriptions = html.select('div.desc')
sources = html.select('span.feed')
dates = html.select('span.date')

In [98]:
headlines_list = []
for headline in headlines:
    headlines_list.append(headline.text.strip().lower())

descriptions_list = []
for description in descriptions:
    descriptions_list.append(description.text.strip().lower())

sources_list = []
for source in sources:
    sources_list.append(source.text.strip().lower())

dates_list = []
for date in dates:
    dates_list.append(date['title'])

# Filter Articles for FNO Stocks

In [187]:
#Add Stock as a tag in a new column if it's mentioned in the description

news_df = pd.DataFrame({'Headlines':headlines_list, 'Description':descriptions_list, 'Source':sources_list, 'Date':dates_list})

news_df['Tags'] = ''
for i in news_df.index:
    desc = news_df.loc[i,'Description']
    tags = [s for s in filter if re.search(r'\b{}\b'.format(re.escape(s)), desc)]

    #tags are under a single string separated by ',' and not an iterable of strings
    
    tags = ', '.join(tags)
    if len(tags) > 0:
        news_df.loc[i,'Tags'] = tags
    else:
        continue

#Drop rows which have on tags i.e. no mention of stock of interest
news_df = news_df[news_df['Tags']!= '']
news_df = news_df.reset_index(drop=True)

In [188]:
news_df

Unnamed: 0,Headlines,Description,Source,Date,Tags
0,anant ambani & radhika merchant pre-wedding fe...,the pre-wedding festivities of anant ambani an...,— moneycontrol,"04:10 PM, 29 Feb 2024",reliance
1,sensex recovers from muted start on feb f&o ex...,the 30-share bse benchmark sensex rose 195 poi...,— economic times,"04:01 PM, 29 Feb 2024","bharti airtel, indusind bank, itc, m&m, maruti..."
2,"look at shorting rec, pfc as well as fmcg stoc...",ca rudramurthy bv is bullish on the market but...,— economic times,"03:58 PM, 29 Feb 2024",pfc
3,taking stock: market ends with modest gain ami...,"adani enterprises, tata consumer, m&m, indusin...",— moneycontrol,"03:53 PM, 29 Feb 2024","adani, apollo hospitals, bajaj auto, eicher mo..."
4,accumulate can fin homes; target of rs 883: cd...,cd equisearch recommended accumulate rating on...,— moneycontrol,"02:12 PM, 29 Feb 2024",can fin homes
5,reliance and disney merge india operations | n...,"reliance industries, led by mukesh ambani, and...",— moneycontrol,"01:11 PM, 29 Feb 2024",reliance
6,jio financial services shares rise over 4% on ...,"other than jio financial, adani power, indian ...",— economic times,"12:57 PM, 29 Feb 2024","adani, muthoot finance, pi, power finance, shr..."
7,apple scraps plans to develop self-driving ele...,"after working on it for 10 years, apple has de...",— moneycontrol,"12:57 PM, 29 Feb 2024",titan
8,"citi bullish on bank of baroda stock, sees 11%...","stock prices of bob, sbi, and pnb declined up ...",— moneycontrol,"12:12 PM, 29 Feb 2024",pnb
9,bajaj auto shares falls over 3% as stock turns...,"on feb. 16, bajaj auto approved a buyback of 1...",— bloomberg quint,"12:10 PM, 29 Feb 2024",bajaj auto


 # Pass Description through ChatGPT and get an output on price outlook.