In [7]:
from transformers import BertTokenizer, BertForSequenceClassification
import torch

Connecting to SQL database

In [8]:
import sqlite3

conn = sqlite3.connect("news_data.db")
cursor = conn.cursor()

cursor.execute("""
CREATE TABLE IF NOT EXISTS news_articles (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    company TEXT,
    source TEXT,
    title TEXT,
    description TEXT,
    url TEXT UNIQUE,
    published_at TEXT
)
""")

conn.commit()
conn.close()

Importing news API

In [11]:
from newsapi.newsapi_client import NewsApiClient
import os
from datetime import datetime, timedelta
load_dotenv("APIs.env")
api = NewsApiClient(api_key=os.getenv("NEWS_API_KEY"))


company = 'Reliance'
query = 'Reliance Industries'



to_date = datetime.utcnow()
from_date = to_date - timedelta(days=30)

response = api.get_everything(
    q=query,
    language="en",
    sort_by="publishedAt",
    from_param=from_date.strftime("%Y-%m-%d"),
    to=to_date.strftime("%Y-%m-%d"),
    page_size=100
)

In [12]:
def store_news_sqlite(company, articles):
    conn = sqlite3.connect("news_data.db")
    cursor = conn.cursor()

    for article in articles:
        try:
            cursor.execute("""
            INSERT OR IGNORE INTO news_articles
            (company, source, title, description, url, published_at)
            VALUES (?, ?, ?, ?, ?, ?)
            """, (
                company,   
                article.get("source", {}).get("name"),
                article.get("title"),
                article.get("description"),
                article.get("url"),
                article.get("publishedAt")
            ))
        except Exception as e:
            print("Error inserting article:", e)

    conn.commit()
    conn.close()

In [13]:
response = api.get_everything(q=query, language="en", sort_by="publishedAt")
store_news_sqlite(company, response["articles"])


In [14]:
import pandas as pd
import sqlite3

company = ['Reliance']
conn = sqlite3.connect("news_data.db")
placeholders = ",".join(["?"] * len(company))
query = f"""
SELECT *
FROM news_articles
WHERE company in ({placeholders})
"""

df = pd.read_sql(query, conn, params=company)

conn.close()

df.head()

Unnamed: 0,id,company,source,title,description,url,published_at
0,1,Reliance,Mypeoplesreview.com,Nepal: Pleasant and unpleasant truths,By Narayan Prasad Mishra Pleasant Facts We a...,https://mypeoplesreview.com/2026/01/21/nepal-p...,2026-01-21T06:56:23Z
1,2,Reliance,ArchDaily,"""Each Constraint Becomes More of an Opportunit...",The Zando Central Market redevelopment in Kins...,https://www.archdaily.com/1038040/each-constra...,2026-01-21T06:30:00Z
2,3,Reliance,Naturalnews.com,The Gut Health Revolution: Reclaiming your hea...,"The gut is the epicenter of health, influencin...",https://www.naturalnews.com/2026-01-21-gut-hea...,2026-01-21T06:00:00Z
3,4,Reliance,The Times of India,"Rs 6 lakh crore gone! Sensex crashes 1,000 poi...",Sensex Crash Today: Indian equities experience...,https://economictimes.indiatimes.com/markets/s...,2026-01-21T05:22:07Z
4,5,Reliance,The Times of India,Reliance Industries shares tumble to deepest o...,Reliance Industries shares have seen a signifi...,https://economictimes.indiatimes.com/markets/s...,2026-01-21T05:13:24Z


In [15]:
import spacy

nlp = spacy.load("en_core_web_sm")

In [16]:
def ner_company_filter(text, company_name):
    doc = nlp(text)
    org_entities = [ent.text.lower() for ent in doc.ents if ent.label_ == "ORG"] #for organisations
    
    return any(company_name.lower() in org for org in org_entities)

In [17]:
articles = response["articles"]

In [21]:
filtered_articles = []

company = 'Reliance'

for article in articles:
    text = (article["title"] or "") + " " + (article["description"] or "")

    matched_company = None
    if ner_company_filter(text, company):
        article["company"] = company   
        filtered_articles.append(article)
        

In [22]:
print("Total articles fetched:", len(articles))
print("Articles after NER filtering:", len(filtered_articles))

Total articles fetched: 100
Articles after NER filtering: 7


In [23]:
import pandas as pd

filtered_df = pd.DataFrame(filtered_articles)
filtered_df.head()

Unnamed: 0,source,author,title,description,url,urlToImage,publishedAt,content,company
0,"{'id': 'the-times-of-india', 'name': 'The Time...",Nikhil Agarwal,Reliance Industries shares tumble to deepest o...,Reliance Industries shares have seen a signifi...,https://economictimes.indiatimes.com/markets/s...,"https://img.etimg.com/thumb/msid-126934052,wid...",2026-01-21T05:13:24Z,Having lost around $29 billion in market value...,Reliance
1,"{'id': 'bloomberg', 'name': 'Bloomberg'}",Bloomberg,Reliance tumbles into deepest oversold level i...,The selloff has erased about $29 billion from ...,https://www.bloomberg.com/news/articles/2026-0...,https://bl-i.thgim.com/public/incoming/8rzzj8/...,2026-01-21T04:13:15Z,Reliance Industries Ltd.s shares are the most ...,Reliance
2,"{'id': None, 'name': 'BusinessLine'}",Bloomberg,Ambani's Reliance faces a rare January setback,"The fall, including a 3% decline after the com...",https://www.thehindubusinessline.com/companies...,https://bl-i.thgim.com/public/incoming/8rzzj8/...,2026-01-20T05:04:06Z,Reliance Industries has had a rough start to t...,Reliance
3,"{'id': None, 'name': 'BusinessLine'}",BL Research Bureau,"Day Trading Guide for January 20, 2025: Intrad...",Day Trading Guide gives you the key intraday s...,https://www.thehindubusinessline.com/portfolio...,https://bl-i.thgim.com/public/incoming/vqgr8k/...,2026-01-20T00:41:00Z,Day Trading Guide gives you the key intraday s...,Reliance
4,"{'id': 'the-times-of-india', 'name': 'The Time...",ET Bureau,RIL slumps over 3% after Q3 miss dampens growt...,Reliance Industries shares saw a significant d...,https://economictimes.indiatimes.com/markets/s...,"https://img.etimg.com/thumb/msid-126735417,wid...",2026-01-20T00:03:50Z,Mumbai: Shares of Nifty heavyweight Reliance I...,Reliance


In [26]:
from datetime import time
from pandas.tseries.offsets import BusinessDay
import pytz

IST = pytz.timezone("Asia/Kolkata")
MARKET_CLOSE = time(15, 30)

In [27]:
def map_to_trading_day(timestamp):
    ts = pd.Timestamp(timestamp).tz_localize(None)

    if ts.weekday() >= 5:  # Weekend
        return (ts + BusinessDay(1))

    if ts.time() > MARKET_CLOSE:
        return ts + BusinessDay(1)

    return ts

In [28]:
filtered_df["publishedAt"] = pd.to_datetime(filtered_df["publishedAt"], utc=True)

filtered_df["trading_day"] = filtered_df["publishedAt"].apply(map_to_trading_day)

In [29]:
print(filtered_df[['publishedAt', 'trading_day']])

                publishedAt         trading_day
0 2026-01-21 05:13:24+00:00 2026-01-21 05:13:24
1 2026-01-21 04:13:15+00:00 2026-01-21 04:13:15
2 2026-01-20 05:04:06+00:00 2026-01-20 05:04:06
3 2026-01-20 00:41:00+00:00 2026-01-20 00:41:00
4 2026-01-20 00:03:50+00:00 2026-01-20 00:03:50
5 2026-01-19 14:21:57+00:00 2026-01-19 14:21:57
6 2026-01-19 12:33:52+00:00 2026-01-19 12:33:52


In [31]:
from transformers import BertTokenizer, BertForSequenceClassification
import torch
tokenizer = BertTokenizer.from_pretrained('ProsusAI/finbert')
model = BertForSequenceClassification.from_pretrained('ProsusAI/finbert')

In [68]:
import numpy as np
import torch
from collections import Counter

def average_sentiment(text, max_tokens=512):
    inp = tokenizer(
        text,
        return_tensors="pt",
        padding=True,
        truncation=True,
        add_special_tokens=False
    )

    input_ids = inp["input_ids"][0]
    sentiments = []   # ✅ make sure this exists

    for i in range(0, len(input_ids), max_tokens):
        chunk_ids = input_ids[i:i + max_tokens]

        inputs = {
            "input_ids": chunk_ids.unsqueeze(0),
            "attention_mask": torch.ones_like(chunk_ids).unsqueeze(0)
        }

        with torch.no_grad():
            outputs = model(**inputs)
            probs = torch.softmax(outputs.logits, dim=1)
            label = torch.argmax(probs, dim=1).item()

        sentiments.append(label)   # ✅ correct

    # Majority vote
    majority_label = Counter(sentiments).most_common(1)[0][0]

    label_map = {
        0: "negative",
        1: "neutral",
        2: "positive"
    }

    return label_map[majority_label]



In [71]:
doc = """
Paytm's profits increase this quarter by 50%.

"""

score_1 = average_sentiment(doc)
print("Average sentiment:", score_1)

Average sentiment: positive


In [72]:
doc2 = []
for article in filtered_articles:
    text = (article["title"] or "") + " " + (article["description"] or "")
    score = average_sentiment(text)
    doc2.append(score)
filtered_df["sentiment_1"] = doc2

In [76]:
sentiment_map = {
    "negative": -1,
    "neutral": 0,
    "positive": 1
}

filtered_df["sentiment_num"] = filtered_df["sentiment_1"].map(sentiment_map)
filtered_df['sentiment_num']

0    0
1   -1
2    0
3    1
4   -1
5   -1
6    1
Name: sentiment_num, dtype: int64

In [77]:
filtered_df["publishedAt"] = pd.to_datetime(filtered_df["publishedAt"])

daily_sentiment = (
    filtered_df
    .groupby(filtered_df["publishedAt"].dt.date)["sentiment_num"]
    .mean()
)

daily_sentiment_2 = daily_sentiment.dropna()
daily_sentiment_2


publishedAt
2026-01-19    0.0
2026-01-20    0.0
2026-01-21   -0.5
Name: sentiment_num, dtype: float64