In [36]:
import pymongo
from datetime import datetime, timedelta
import yfinance as yf

In [37]:
client = pymongo.MongoClient("mongodb://localhost:27017")
db = client['new_db']
processed_collection = db['processed_news']
raw_collection = db['raw_news']

In [38]:
processed_data_list = list(processed_collection.find())

In [39]:
raw_data_list = list(raw_collection.find())

In [None]:
len(processed_data_list)

In [None]:
processed_data_list[1]

In [42]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import pipeline

In [43]:
tokenizer = AutoTokenizer.from_pretrained("yiyanghkust/finbert-tone")
model = AutoModelForSequenceClassification.from_pretrained("yiyanghkust/finbert-tone")
nlp_pipeline = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)

In [44]:
def split_text(text, max_length=500):
    tokens = tokenizer.tokenize(text)
    chunks = [tokens[i:i+max_length] for i in range(0, len(tokens), max_length)]
    return [tokenizer.convert_tokens_to_string(chunk) for chunk in chunks]

In [45]:
# for news in processed_data:
#     text = news['processed_text']
#     text_chunks = split_text(text)
#     sentiment_results = []
    
#     for chunk in text_chunks:
#         sentiment_result = nlp_pipeline(chunk)
#         sentiment_results.append(sentiment_result)
                        
#     print(sentiment_results)

In [46]:
for news in processed_data_list:
    
    text = news['raw_news']
    
    text_chunks = split_text(text)
    sentiment_results = []
    
    for chunk in text_chunks:
        sentiment_result = nlp_pipeline(chunk)
        sentiment_results.extend(sentiment_result)

    i = 0
    while i < len(sentiment_results) - 1:
        if sentiment_results[i]['label'] == sentiment_results[i + 1]['label']:
            avg_score = (sentiment_results[i]['score'] + sentiment_results[i + 1]['score']) / 2
            sentiment_results[i] = {"label": sentiment_results[i]['label'], "score": avg_score}
            del sentiment_results[i + 1]  
        else:
            i += 1  
    news.update({"finbert_analysis":sentiment_results})
 
    # print("Updated Sentiment Results:", sentiment_results)

In [None]:
processed_data_list[39]

In [48]:
finbert_list = []

In [49]:
for data in processed_data_list:
    if len(data['finbert_analysis']) == 1:
        if data['stock_name'] and data['ticker_name']:
            finbert_list.append(data)


In [None]:
# for data in finbert_list:
#     print(data['ticker_name'])

In [55]:
finbert_list[0]

{'_id': ObjectId('6777a0d08581574bb8d03acb'),
 'raw_news': "Buy Coal India; target of Rs 525: Emkay Global Financial Emkay Global Financial is bullish on Coal India has recommended buy rating on the stock with a target price of Rs 525 in its research report dated January 02, 2025. Emkay Global Financial's research report on Coal India Over the past 12-18 months, investors were enthused by a large-cap stock delivering volumes growth CAGR of 7-8%, alongside an improving demand outlook resulting in solid earnings momentum. We were also subscribed to this narrative for COAL; however, the growth air-pocket in recent months has weakened that thesis. We see the company missing its FY25 guidance by 35- 40mt (4.5%), and therefore, we reduce our estimate to 800mt. In addition, there has been news flow around progress on captive coal mines that could displace e-auction volumes. In our assessment, these captive mines could produce 26mt in FY28E, displacing a third of the e-auction offtake. All tha

In [56]:
import yfinance as yf

def get_stock_data(ticker, date):
    stock = yf.Ticker(ticker)
    historical_data = stock.history(start=date, end=date)
    if not historical_data.empty:
        opening_price = historical_data['Open'].values[0]
        closing_price = historical_data['Close'].values[0]
        volume = historical_data['Volume'].values[0]
        price_movement = ((closing_price - opening_price) / opening_price) * 100
        return {
            "opening_price": opening_price,
            "closing_price": closing_price,
            "volume": volume,
            "price_movement": price_movement,
        }
    return None


In [58]:
data = get_stock_data(finbert_list[0]['ticker_name'], finbert_list[0]['date'])

COALINDIA.NS: No price data found, symbol may be delisted (1d 2025-01-03 -> 2025-01-03)


In [59]:
print(data)

None


In [51]:
def get_stock_data(stock_ticker, date):
    date_obj = datetime.strptime(date, "%B %d, %Y")
    formatted_date = date_obj.strftime("%Y-%m-%d")
    stock = yf.Ticker(stock_ticker)
    hist = stock.history(start=formatted_date, end=(date_obj + timedelta(days=2)).strftime("%Y-%m-%d"))
    if not hist.empty:
        closest_date = hist.index[0]
        open_price = hist.iloc[0]["Open"]
        close_price = hist.iloc[0]["Close"]
        volume = hist.iloc[0]["Volume"]
        volume = hist.iloc[0]["Prev_Close"]
        stock_movement = "up" if close_price > open_price else "down"
        return {
            "stock_ticker": stock_ticker,
            "date": closest_date.strftime("%Y-%m-%d"),
            "open_price": open_price,
            "close_price": close_price,
            "volume": volume,
            "movement": stock_movement,
        }
    else:
        return {"error":f"no data available"}