In [21]:
import pymongo
from datetime import datetime, timedelta
import yfinance as yf

In [22]:
client = pymongo.MongoClient("mongodb://localhost:27017")
db = client['new_db']
processed_collection = db['processed_news']
raw_collection = db['raw_news']

In [23]:
processed_data_list = list(processed_collection.find())

In [24]:
raw_data_list = list(raw_collection.find())

In [25]:
len(processed_data_list)

252

In [26]:
processed_data_list[1]

{'_id': ObjectId('6777a0a68581574bb8d03aca'),
 'raw_news': 'Weekly Tactical Pick | Cello World: This consumer ware player is set to clock accelerated growth The company is diversifying distribution reach and scaling up launches as demand is expected to improve going forward Consumer demand, which was muted in the past few quarters, is expected to revive from Q3FY25. Hence, Cello World Limited (CWL; CMP: Rs 765; Nifty level: 24,189), India’s leading consumer houseware brand, is our tactical pick for this week. After a subdued H1FY25, CWL saw strong growth momentum in October 2024. With the wedding and festive season, demand',
 'processed_text': 'weekly tactical pick cello world consumer ware player set clock accelerated growth company diversifying distribution reach scaling launch demand expected improve going forward consumer demand muted past quarter expected revive qfy hence cello world limited cwl cmp r nifty level india leading consumer houseware brand tactical pick week subdued hf

In [27]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import pipeline

In [28]:
tokenizer = AutoTokenizer.from_pretrained("yiyanghkust/finbert-tone")
model = AutoModelForSequenceClassification.from_pretrained("yiyanghkust/finbert-tone")
nlp_pipeline = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)

In [29]:
def split_text(text, max_length=500):
    tokens = tokenizer.tokenize(text)
    chunks = [tokens[i:i+max_length] for i in range(0, len(tokens), max_length)]
    return [tokenizer.convert_tokens_to_string(chunk) for chunk in chunks]

In [30]:
# for news in processed_data:
#     text = news['processed_text']
#     text_chunks = split_text(text)
#     sentiment_results = []
    
#     for chunk in text_chunks:
#         sentiment_result = nlp_pipeline(chunk)
#         sentiment_results.append(sentiment_result)
                        
#     print(sentiment_results)

In [31]:
for news in processed_data_list:
    
    text = news['raw_news']
    
    text_chunks = split_text(text)
    sentiment_results = []
    
    for chunk in text_chunks:
        sentiment_result = nlp_pipeline(chunk)
        sentiment_results.extend(sentiment_result)

    i = 0
    while i < len(sentiment_results) - 1:
        if sentiment_results[i]['label'] == sentiment_results[i + 1]['label']:
            avg_score = (sentiment_results[i]['score'] + sentiment_results[i + 1]['score']) / 2
            sentiment_results[i] = {"label": sentiment_results[i]['label'], "score": avg_score}
            del sentiment_results[i + 1]  
        else:
            i += 1  
    news.update({"finbert_analysis":sentiment_results})
 
    # print("Updated Sentiment Results:", sentiment_results)

In [32]:
processed_data_list[39]

{'_id': ObjectId('6777a5fe8581574bb8d03af5'),
 'raw_news': "Buy JSW Infrastructure; target of Rs 375: Motilal Oswal Motilal Oswal is bullish on JSW Infrastructure recommended buy rating on the stock with a target price of Rs 375 in its research report dated December 26, 2024. Motilal Oswal's research report on JSW Infrastructure Second-largest private port operator with improving market share: JSW Infrastructure (JSWINFRA), with an aggregate capacity of 170MMT as of Sep’24, is the second-largest private port operator in India after Adani Ports (having a capacity of ~633MMT). The company has reported a 22% cargo volume CAGR over FY18-24 (13% YoY growth in 1HFY25), far outpacing the industry growth rate of ~4% over the same period. Focused on ramping up capacity to 400MMT by 2030: JSWINFRA has embarked on a massive capex plan of INR300b (INR150b over FY25-28) towards expanding the total cargo handling capacity from 170mtpa currently to 288mtpa by FY28 and eventually to 400mtpa by FY30, b

In [33]:
finbert_list = []

In [34]:
for data in processed_data_list:
    if len(data['finbert_analysis']) == 1:
        if data['stock_name'] and data['ticker_name']:
            finbert_list.append(data)


In [35]:
# for data in finbert_list:
#     print(data['ticker_name'])

In [36]:
finbert_list[0]

{'_id': ObjectId('6777a09f8581574bb8d03ac9'),
 'raw_news': "Western Carriers stock zooms 13% after securing Rs 139-crore contract from Vedanta The four year-long agreement involves the handling of import, finished goods (FG) domestic, and export materials at Vedanta's JSG Plant The stock of Western Carriers (India) Ltd rose as high as 13.5 percent intraday on\xa0January 3, after the company secured a significant contract worth Rs 139 crore from Vedanta Ltd. The agreement involves the handling of import, finished goods (FG) domestic, and export materials at Vedanta's JSG Plant and will span a period of four years. At 11:43 am\xa0on January 3, the stock was trading at Rs 123.86, up by\xa06.87 percent The company clarified in a stock exchange filing that this contract was awarded by Vedanta, an unrelated party, and was negotiated at arm's length, aligning with Western Carriers' corporate objectives. In\xa0Q2FY2025, Western Carriers (India) Ltd reported a modest increase in net profit. Net

In [37]:
import pandas as pd
import yfinance as yf
from datetime import datetime, timedelta

def get_stock_features(ticker, news_date):
    
    try:
        news_date = datetime.strptime(news_date, "%Y-%m-%d")
        before_start = (news_date - timedelta(days=5)).strftime("%Y-%m-%d")
        after_end = (news_date + timedelta(days=5)).strftime("%Y-%m-%d")
        
        
        stock = yf.Ticker(ticker)
        try:
            historical_data = stock.history(start=before_start, end=after_end)  
            historical_data.index = historical_data.index.tz_localize(None)
        except Exception as e:
            print(f"Error fetching data for ticker {ticker}: {e}")
            return None
            
            
        if historical_data.empty:
            return None
        
        #before news stock data
        before_news = historical_data.loc[:news_date - timedelta(days=1)]
        avg_price_5d = before_news['Close'].mean()
        vol_5d = before_news['Close'].std()
        avg_volume_5d = before_news['Volume'].mean()
        
        # news day stock data
        if news_date.strftime("%Y-%m-%d") in historical_data.index:
            news_day = historical_data.loc[news_date.strftime("%Y-%m-%d")]
            open_price = news_day['Open']
            close_price = news_day['Close']
            volume_news_day = news_day['Volume']
            stock_movement = "up" if close_price > open_price else "down"
        else:
            open_price = close_price = volume_news_day = None
        
        # gap
        prev_day_idx = historical_data.index.get_loc(news_date) - 1
        if prev_day_idx >= 0:
            prev_close = historical_data.iloc[prev_day_idx]['Close']
            gap = open_price - prev_close if open_price is not None else None
        else:
            prev_close = None
            gap = None
        
        # after news stock data 
        after_news = historical_data.loc[news_date + timedelta(days=1):]
        price_movement_1d = ((after_news['Close'].iloc[0] - open_price) / open_price) * 100 if len(after_news) > 0 and open_price else None
        price_movement_3d = ((after_news['Close'].iloc[2] - open_price) / open_price) * 100 if len(after_news) > 2 and open_price else None
        price_movement_5d = ((after_news['Close'].iloc[4] - open_price) / open_price) * 100 if len(after_news) > 4 and open_price else None
        # avg_volume_after = after_news['Volume'].mean() if len(after_news) > 0 else None
        
        return {
            "avg_price_5d": avg_price_5d,
            "volatility_5d": vol_5d,
            "avg_volume_5d": avg_volume_5d,
            "open_price_news_day": open_price,
            "close_price_news_day": close_price,
            "volume_news_day": volume_news_day,
            "price_movement_1d": price_movement_1d,
            "price_movement_3d": price_movement_3d,
            # "price_movement_5d": price_movement_5d,
            # "avg_volume_after": avg_volume_after,
            "gap":gap,
            "stock_movement_on_news_day":stock_movement,
        }
    except Exception as e :
        return None


In [38]:
# news_date = "2025-01-03"
# ticker = "SBIN.NS"
# features = get_stock_features(ticker, news_date)

In [39]:
# features

In [40]:
count = 0 
for finbert_added_data in finbert_list:
    ticker = finbert_added_data['ticker_name'].strip()
    date = finbert_added_data['date'].strip()
    
    data = get_stock_features(ticker, date)
    print(count)
    finbert_added_data.update({"stock_data": data})
    count += 1

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183


In [41]:
finbert_list[169]

{'_id': ObjectId('677b62ea9229c97949479ea5'),
 'raw_news': 'NTPC Renewable Energy bags 1,000-MW solar power project NTPC Renewable Energy has secured a capacity of 1000 MW at a tariff of Rs 2.56/kWh and is currently awaiting the Letter of Award (LOA) from UPPCL. NTPC Renewable Energy Limited has won the bid for a\xa0 1000-MW in Uttar Pradesh. According to a filing on BSE on January 4,\xa0the renewable energy company\xa0emerged as a successful bidder in the e-reverse auction conducted by Uttar Pradesh Power Corporation Limited (UPPCL). The tender was\xa0 aimed at the "Selection of Solar Power Developers for Setting up 2000 MW ISTS-connected Solar PV Power Projects in India under Tariff-Based Competitive Bidding" and was conducted on January 3. According to the release, NTPC Renewable Energy has secured a capacity of 1000 MW at a tariff of Rs 2.56/kWh and is currently awaiting the Letter of Award (LOA) from UPPCL. NTPC Renewable Energy is a wholly-owned (unlisted) subsidiary of NTPC Gree

In [42]:
finbert_list[9]

{'_id': ObjectId('6777a17b8581574bb8d03ad5'),
 'raw_news': "Buy Signature Global; target of Rs 2000: Motilal Oswal Motilal Oswal is bullish on Signature Global recommended buy rating on the stock with a target price of Rs 2000 in its research report dated January 01, 2025. Motilal Oswal's research report on Signature Global Signature Global (SIGNATUR), with its strong presence in strategic locations in Gurugram, is on track to capitalize on the ongoing demand, guided by a strong project pipeline of 24.3msf. With a projected 35% CAGR growth in pre-sales over FY24-27, the company is set to cumulatively collect INR285b. Its strategic shift from the affordable to mid/mid-premium segment is expected to drive a strong cumulative OCF of INR95b. This will enable the company to turn net cash positive and reinvest in land to fuel future growth. We reiterate our BUY rating with a TP of INR2,000/share, indicating a 50% upside potential.",
 'processed_text': 'buy signature global target r motilal o

In [43]:
presentation_collection = db['presentation_news']

In [46]:
len(finbert_list)

184

In [48]:
ids = presentation_collection.insert_many(finbert_list)

In [49]:
ids

InsertManyResult([ObjectId('6777a09f8581574bb8d03ac9'), ObjectId('6777a0a68581574bb8d03aca'), ObjectId('6777a0d08581574bb8d03acb'), ObjectId('6777a0d38581574bb8d03acc'), ObjectId('6777a0d78581574bb8d03acd'), ObjectId('6777a0db8581574bb8d03acf'), ObjectId('6777a0e78581574bb8d03ad0'), ObjectId('6777a1448581574bb8d03ad2'), ObjectId('6777a1498581574bb8d03ad3'), ObjectId('6777a17b8581574bb8d03ad5'), ObjectId('6777a17d8581574bb8d03ad6'), ObjectId('6777a17f8581574bb8d03ad7'), ObjectId('6777a1d68581574bb8d03ad9'), ObjectId('6777a1d98581574bb8d03ada'), ObjectId('6777a1dc8581574bb8d03adb'), ObjectId('6777a1df8581574bb8d03adc'), ObjectId('6777a1e68581574bb8d03add'), ObjectId('6777a1e98581574bb8d03ade'), ObjectId('6777a2148581574bb8d03adf'), ObjectId('6777a2738581574bb8d03ae2'), ObjectId('6777a2a58581574bb8d03ae4'), ObjectId('6777a2af8581574bb8d03ae7'), ObjectId('6777a2b28581574bb8d03ae8'), ObjectId('6777a2be8581574bb8d03ae9'), ObjectId('6777a3298581574bb8d03aeb'), ObjectId('6777a34f8581574bb8d03a