<a href="https://colab.research.google.com/github/abhinavvsunil/Time-series-modelling-and-inferential-analysis-of-energy-sector-of-national-stock-exchange-of-india/blob/main/demo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Install yfinance if not already
!pip install yfinance

import yfinance as yf
import pandas as pd

# -----------------------------
# Step 1: Choose 5 Nifty 500 stocks
# -----------------------------
tickers = ["RELIANCE.NS", "TCS.NS", "INFY.NS", "HDFCBANK.NS", "ICICIBANK.NS"]

# -----------------------------
# Step 2: Fetch live data
# -----------------------------
live_data = {}
for t in tickers:
    stock = yf.Ticker(t)
    # Get today's intraday data (1-minute interval)
    df = stock.history(period="1d", interval="1m")
    live_data[t] = df.tail()   # last few rows

# -----------------------------
# Step 3: Display results
# -----------------------------
for t, df in live_data.items():
    print(f"\n--- {t} ---")
    print(df)



--- RELIANCE.NS ---
                                  Open         High          Low        Close  \
Datetime                                                                        
2026-01-22 15:25:00+05:30  1403.599976  1404.500000  1403.300049  1403.500000   
2026-01-22 15:26:00+05:30  1404.000000  1405.300049  1403.300049  1405.000000   
2026-01-22 15:27:00+05:30  1404.900024  1405.000000  1403.300049  1403.300049   
2026-01-22 15:28:00+05:30  1403.699951  1404.599976  1403.300049  1404.300049   
2026-01-22 15:29:00+05:30  1404.300049  1404.699951  1403.199951  1404.300049   

                           Volume  Dividends  Stock Splits  
Datetime                                                    
2026-01-22 15:25:00+05:30  157245        0.0           0.0  
2026-01-22 15:26:00+05:30  138222        0.0           0.0  
2026-01-22 15:27:00+05:30  137789        0.0           0.0  
2026-01-22 15:28:00+05:30   62195        0.0           0.0  
2026-01-22 15:29:00+05:30   99113        0.0 

In [2]:
# Phase 1: Rules-Based Filtering with Live Data
# Author: Abhinav

!pip install yfinance
import yfinance as yf
import pandas as pd

# -----------------------------
# Step 1: Choose 5 Nifty 500 stocks
# -----------------------------
tickers = ["RELIANCE.NS", "TCS.NS", "INFY.NS", "HDFCBANK.NS", "ICICIBANK.NS"]

# -----------------------------
# Step 2: Fetch live price data (latest close)
# -----------------------------
prices = {}
for t in tickers:
    stock = yf.Ticker(t)
    df = stock.history(period="1d", interval="1m")
    prices[t] = df["Close"].iloc[-1]  # latest price

# -----------------------------
# Step 3: Mock fundamentals (replace with real API later)
# -----------------------------
fundamentals = {
    "RELIANCE.NS": {"ROE": 18, "DebtEquity": 0.5, "OperatingCashFlow": 100},
    "TCS.NS": {"ROE": 25, "DebtEquity": 0.3, "OperatingCashFlow": 200},
    "INFY.NS": {"ROE": 20, "DebtEquity": 0.8, "OperatingCashFlow": 150},
    "HDFCBANK.NS": {"ROE": 12, "DebtEquity": 1.2, "OperatingCashFlow": 300},
    "ICICIBANK.NS": {"ROE": 16, "DebtEquity": 0.9, "OperatingCashFlow": 250},
}

# -----------------------------
# Step 4: Apply Phase 1 Filters
# -----------------------------
quality_universe = []
for t in tickers:
    f = fundamentals[t]
    if f["ROE"] > 15 and f["DebtEquity"] < 1.0 and f["OperatingCashFlow"] > 0:
        quality_universe.append({
            "Stock": t,
            "LatestPrice": prices[t],
            "ROE": f["ROE"],
            "DebtEquity": f["DebtEquity"],
            "OperatingCashFlow": f["OperatingCashFlow"]
        })

# -----------------------------
# Step 5: Output
# -----------------------------
df_quality = pd.DataFrame(quality_universe)
print("Quality Universe (Phase 1):")
print(df_quality)


Quality Universe (Phase 1):
          Stock  LatestPrice  ROE  DebtEquity  OperatingCashFlow
0   RELIANCE.NS  1404.300049   18         0.5                100
1        TCS.NS  3153.000000   25         0.3                200
2       INFY.NS  1663.500000   20         0.8                150
3  ICICIBANK.NS  1346.099976   16         0.9                250


In [3]:
import pandas as pd

# Assume df_quality is your Phase 1 output DataFrame
# Example Phase 1 output (replace with your actual df_quality)
data = [
    {"Stock": "RELIANCE.NS", "LatestPrice": 2500, "ROE": 18, "DebtEquity": 0.5, "OperatingCashFlow": 100},
    {"Stock": "TCS.NS", "LatestPrice": 3500, "ROE": 25, "DebtEquity": 0.3, "OperatingCashFlow": 200},
    {"Stock": "INFY.NS", "LatestPrice": 1600, "ROE": 20, "DebtEquity": 0.8, "OperatingCashFlow": 150},
    {"Stock": "ICICIBANK.NS", "LatestPrice": 950, "ROE": 16, "DebtEquity": 0.9, "OperatingCashFlow": 250}
]
df_quality = pd.DataFrame(data)

# -----------------------------
# Step 1: Normalize metrics
# -----------------------------
df_quality["ROE_norm"] = df_quality["ROE"] / df_quality["ROE"].max()
df_quality["DebtEquity_norm"] = 1 - (df_quality["DebtEquity"] / df_quality["DebtEquity"].max())  # lower is better
df_quality["OCF_norm"] = df_quality["OperatingCashFlow"] / df_quality["OperatingCashFlow"].max()

# -----------------------------
# Step 2: Weighted Score
# -----------------------------
df_quality["FundamentalRank"] = (
    0.4 * df_quality["ROE_norm"] +
    0.3 * df_quality["DebtEquity_norm"] +
    0.3 * df_quality["OCF_norm"]
)

# -----------------------------
# Step 3: Sort by Rank
# -----------------------------
df_ranked = df_quality.sort_values(by="FundamentalRank", ascending=False)

print("Ranked Quality Universe:")
print(df_ranked[["Stock", "LatestPrice", "ROE", "DebtEquity", "OperatingCashFlow", "FundamentalRank"]])


Ranked Quality Universe:
          Stock  LatestPrice  ROE  DebtEquity  OperatingCashFlow  \
1        TCS.NS         3500   25         0.3                200   
3  ICICIBANK.NS          950   16         0.9                250   
0   RELIANCE.NS         2500   18         0.5                100   
2       INFY.NS         1600   20         0.8                150   

   FundamentalRank  
1         0.840000  
3         0.556000  
0         0.541333  
2         0.533333  


In [4]:
!pip install transformers torch

from transformers import pipeline

# Load FinBERT sentiment model
sentiment_pipeline = pipeline("sentiment-analysis", model="ProsusAI/finbert")

# Example headlines
headlines = [
    "Reliance Industries reports record quarterly profit",
    "Infosys faces regulatory investigation",
    "TCS announces major new client deal"
]

# Score headlines
for h in headlines:
    result = sentiment_pipeline(h)[0]
    print(h, "->", result)




The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/758 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/252 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Device set to use cpu


Reliance Industries reports record quarterly profit -> {'label': 'positive', 'score': 0.7533596754074097}
Infosys faces regulatory investigation -> {'label': 'negative', 'score': 0.8269966840744019}
TCS announces major new client deal -> {'label': 'positive', 'score': 0.5076025724411011}


In [11]:

import yfinance as yf
import pandas as pd
import numpy as np
import xgboost as xgb
from ta.momentum import RSIIndicator
from ta.volatility import BollingerBands
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# -----------------------------
# Step 1: Define stocks and mock inputs
# -----------------------------
stocks = ["TCS.NS", "ICICIBANK.NS", "RELIANCE.NS", "INFY.NS"]

# Mock sentiment scores (from FinBERT)
sentiment_scores = {
    "TCS.NS": 0.5076,
    "ICICIBANK.NS": 0.3120,
    "RELIANCE.NS": 0.7534,
    "INFY.NS": -0.8270
}


# Mock fundamental ranks (from Phase 1)
fundamental_ranks = {
    "TCS.NS": 0.840000,
    "ICICIBANK.NS": 0.556000,
    "RELIANCE.NS": 0.541333,
    "INFY.NS": 0.533333
}

# -----------------------------
# Step 2: Feature extraction
# -----------------------------
def extract_features(ticker):
    df = yf.download(ticker, period="6mo", interval="1d")
    df.dropna(inplace=True)

    # Ensure 'Close' is a Series and has a name, explicitly.
    # This addresses potential issues where `ta` might misinterpret the input.
    close_series = df["Close"].squeeze()
    close_series.name = "Close"

    # RSI
    rsi = RSIIndicator(close=close_series).rsi().iloc[-1]

    # Volatility (std dev of returns)
    df["Return"] = df["Close"].pct_change()
    volatility = df["Return"].rolling(20).std().iloc[-1]

    return rsi, volatility

features = []
for t in stocks:
    rsi, vol = extract_features(t)
    features.append({
        "Stock": t,
        "FundamentalRank": fundamental_ranks[t],
        "SentimentScore": sentiment_scores[t],
        "RSI": rsi,
        "Volatility": vol,
 })

df = pd.DataFrame(features)




  df = yf.download(ticker, period="6mo", interval="1d")
[*********************100%***********************]  1 of 1 completed
  df = yf.download(ticker, period="6mo", interval="1d")
[*********************100%***********************]  1 of 1 completed
  df = yf.download(ticker, period="6mo", interval="1d")
[*********************100%***********************]  1 of 1 completed
  df = yf.download(ticker, period="6mo", interval="1d")
[*********************100%***********************]  1 of 1 completed


In [12]:
print(df)

          Stock  FundamentalRank  SentimentScore        RSI  Volatility
0        TCS.NS         0.840000          0.5076  48.708002    0.013590
1  ICICIBANK.NS         0.556000          0.3120  38.887214    0.013042
2   RELIANCE.NS         0.541333          0.7534  27.142728    0.015064
3       INFY.NS         0.533333         -0.8270  58.488992    0.016060


          Stock  SuperScore
0        TCS.NS       0.765
2   RELIANCE.NS       0.656
1  ICICIBANK.NS       0.553
3       INFY.NS       0.277


In [16]:
fundamental_ranks = {
    "TCS.NS": 0.840000,
    "ICICIBANK.NS": 0.556000,
    "RELIANCE.NS": 0.541333,
    "INFY.NS": 0.533333
}

sentiment_scores = {
    "TCS.NS": 0.5076,
    "ICICIBANK.NS": 0.3120,
    "RELIANCE.NS": 0.7534,
    "INFY.NS": -0.8270
}

technical_indicators = {
    "TCS.NS": {"RSI": 61.2, "Volatility": 0.018},
    "ICICIBANK.NS": {"RSI": 58.7, "Volatility": 0.022},
    "RELIANCE.NS": {"RSI": 65.3, "Volatility": 0.017},
    "INFY.NS": {"RSI": 49.8, "Volatility": 0.025}
}
import pandas as pd

stocks = ["TCS.NS", "ICICIBANK.NS", "RELIANCE.NS", "INFY.NS"]

rows = []
max_vol = max([technical_indicators[s]["Volatility"] for s in stocks])

for s in stocks:
    FR = fundamental_ranks[s]
    Sent = sentiment_scores[s]
    RSI = technical_indicators[s]["RSI"]
    Vol = technical_indicators[s]["Volatility"]

    score = (0.4*FR +
             0.3*Sent +
             0.2*(RSI/100) +
             0.1*(1 - Vol/max_vol))

    rows.append({"Stock": s, "FinalScore": round(score, 3)})

df = pd.DataFrame(rows).sort_values("FinalScore", ascending=False)
print(df)



          Stock  FinalScore
0        TCS.NS       0.639
2   RELIANCE.NS       0.605
1  ICICIBANK.NS       0.445
3       INFY.NS       0.065


In [17]:
# FundamentalRank (Phase 1)
fundamental_ranks = {
    "TCS.NS": 0.840000,
    "ICICIBANK.NS": 0.556000,
    "RELIANCE.NS": 0.541333,
    "INFY.NS": 0.533333
}

# AI Composite Score (your Phase 2 FinalScore)
ai_scores = {
    "TCS.NS": 0.639,
    "RELIANCE.NS": 0.605,
    "ICICIBANK.NS": 0.445,
    "INFY.NS": 0.065
}
import pandas as pd

rows = []
for stock in fundamental_ranks:
    FR = fundamental_ranks[stock]
    AI = ai_scores[stock]
    super_score = 0.5*FR + 0.5*AI
    rows.append({"Stock": stock, "SuperScore": round(super_score, 3)})

super_df = pd.DataFrame(rows).sort_values("SuperScore", ascending=False)
print(super_df)



          Stock  SuperScore
0        TCS.NS       0.740
2   RELIANCE.NS       0.573
1  ICICIBANK.NS       0.501
3       INFY.NS       0.299
