In [2]:
import pymongo

In [3]:
client = pymongo.MongoClient("mongodb://localhost:27017")
db = client['new_db']
collection = db['processed_news']

In [4]:
processed_data = list(collection.find())

In [5]:
len(processed_data)

218

In [6]:
processed_data[1]

{'_id': ObjectId('676e8cbb49f67e6ab04c1ed7'),
 'processed_text': 'agri pick report december geojit financial service id objectidecbbfeabced title agri pick report december geojit financial service desc according geojit financial service national commodity derivative exchange monday said would live trading session feb account presentation union budget aprmar date december datetime december ist content geojit financial services report daily agri pick national commodity derivative exchange monday said would live trading session feb account presentation union budget aprmar farmer karnataka sown rabi crop million hectare saturday year million hectare sown corresponding period last year according report released state agriculture government chana acreage state rose nearly year million hectare hectare sown last year acreage horse gram kulthi dal hectare hectare last year report showed total area sown pulse million hectare million hectare year ago jowar wheat chana onion chilli key crop grown 

In [7]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import pipeline

In [8]:
tokenizer = AutoTokenizer.from_pretrained("yiyanghkust/finbert-tone")
model = AutoModelForSequenceClassification.from_pretrained("yiyanghkust/finbert-tone")
nlp_pipeline = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)

In [9]:
def split_text(text, max_length=500):
    tokens = tokenizer.tokenize(text)
    chunks = [tokens[i:i+max_length] for i in range(0, len(tokens), max_length)]
    return [tokenizer.convert_tokens_to_string(chunk) for chunk in chunks]

In [90]:
# for news in processed_data:
#     text = news['processed_text']
#     text_chunks = split_text(text)
#     sentiment_results = []
    
#     for chunk in text_chunks:
#         sentiment_result = nlp_pipeline(chunk)
#         sentiment_results.append(sentiment_result)
                        
#     print(sentiment_results)

In [10]:
for news in processed_data:
    text = news['processed_text']
    text_chunks = split_text(text)
    sentiment_results = []
    
    for chunk in text_chunks:
        sentiment_result = nlp_pipeline(chunk)
        sentiment_results.extend(sentiment_result)

    i = 0
    while i < len(sentiment_results) - 1:
        if sentiment_results[i]['label'] == sentiment_results[i + 1]['label']:
            avg_score = (sentiment_results[i]['score'] + sentiment_results[i + 1]['score']) / 2
            sentiment_results[i] = {"label": sentiment_results[i]['label'], "score": avg_score}
            del sentiment_results[i + 1]  
        else:
            i += 1  
    news.update({"finbert_analysis":sentiment_results})
 
    # print("Updated Sentiment Results:", sentiment_results)

In [29]:
processed_data[56]

{'_id': ObjectId('676e9d2849f67e6ab04c1f16'),
 'processed_text': 'buy coal india target r sharekhan id objectidedfeabcf title buy coal india target r sharekhan desc sharekhan bullish coal india recommended buy rating stock target price r research report dated december date december datetime december ist content till ytdfy aprnov coal india reported offtake mn tonne yoy production mn tonne yoy volume growth weak strong monsoon affecting power demand however company strong growth lever volume picking rise power demand potential hike fsa coal realization valuation xx fyfy eps estimate attractive stock offer high dividend yield hence maintain buy unchanged pt rsdesc till ytdfy aprnov coal india reported offtake mn tonne yoy production mn tonne yoy volume growth weak strong monsoon affecting power demand however company strong growth lever volume picking rise power demand potential hike fsa coal realization valuation xx fyfy eps estimate attractive stock offer high dividend yield hence main

In [12]:
from datetime import datetime, timedelta
import yfinance as yf

In [13]:
def get_stock_data(stock_ticker, date):
    date_obj = datetime.strptime(date, "%B %d, %Y")
    formatted_date = date_obj.strftime("%Y-%m-%d")
    stock = yf.Ticker(stock_ticker)
    hist = stock.history(start=formatted_date, end=(date_obj + timedelta(days=2)).strftime("%Y-%m-%d"))
    if not hist.empty:
        closest_date = hist.index[0]
        open_price = hist.iloc[0]["Open"]
        close_price = hist.iloc[0]["Close"]
        volume = hist.iloc[0]["Volume"]
        volume = hist.iloc[0]["Prev_Close"]
        stock_movement = "up" if close_price > open_price else "down"
        return {
            "stock_ticker": stock_ticker,
            "date": closest_date.strftime("%Y-%m-%d"),
            "open_price": open_price,
            "close_price": close_price,
            "volume": volume,
            "movement": stock_movement,
        }
    else:
        return {"error":f"no data available"}

In [36]:
from yahooquery import search

def get_ticker_from_yahoo(company_name):
    results = search(company_name)
    if results and 'quotes' in results:
        for quote in results['quotes']:
            if 'symbol' in quote and 'longname' in quote and company_name.lower() in quote['longname'].lower():
                return quote['symbol']
    return None

# Example Usage
company_name = "EPACK"
ticker = get_ticker_from_yahoo(company_name)
print(f"Ticker for {company_name}: {ticker}")


Ticker for EPACK: EPACK.NS


In [34]:
stock_ticker = "SHRIDINE.BO"
date = "January 02, 2025"
stock_data = get_stock_data(stock_ticker, date)

In [35]:
stock_data

{'stock_ticker': 'SHRIDINE.BO',
 'date': '2025-01-02',
 'open_price': 399.8999938964844,
 'close_price': 395.0,
 'volume': 1687.0,
 'movement': 'down'}

Ticker for Mazagon Dock: MAZDOCK.NS
