<a href="https://colab.research.google.com/github/harjeet88/stock-prediction/blob/main/stocks/stock_prediction_strategy_basic_v1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [15]:
!pip install yfinance --upgrade



In [97]:
!pip install nsepy

Collecting nsepy
  Downloading nsepy-0.8.tar.gz (33 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: nsepy
  Building wheel for nsepy (setup.py) ... [?25l[?25hdone
  Created wheel for nsepy: filename=nsepy-0.8-py3-none-any.whl size=36057 sha256=48cd8cb70ceadaaa0674eb2dd839415adb81cbbe0c4fd95d196c810409e840ce
  Stored in directory: /root/.cache/pip/wheels/94/a7/d4/0e4e94292f5761407472684b2cd74771e9985af72f968101e0
Successfully built nsepy
Installing collected packages: nsepy
Successfully installed nsepy-0.8


In [1]:
import numpy as np
import pandas as pd
from datetime import datetime
import yfinance as yf
import matplotlib.pyplot as plt

In [83]:
# Portfolio parameters
initial_capital = 1000000  # ₹10,00,000 initial investment
target_annual_return = 0.25  # 25% annualized return
equity_allocation = 0.80  # 80% in equities
hedge_allocation = 0.10  # 10% in gold ETFs
cash_allocation = 0.10  # 10% in liquid funds
max_position_size = 0.05  # 5% per stock
stop_loss_pct = 0.25  # Increased to 25% to avoid premature exits
volatility_threshold = 0.40  # 40% max volatility
rebalance_freq = 'Q'  # Quarterly rebalancing
transaction_cost = 0.001  # 0.1% per trade

In [92]:
# High-conviction stock watchlist (NSE tickers)
stocks = [
    'RELIANCE.NS',  # Large-cap: Diversified, PLI beneficiary
    'TATAMOTORS.NS',  # Mid-cap: Auto, EV focus
    'DIXON.NS',  # Mid-cap: Electronics manufacturing (PLI)
    'POLYCAB.NS',  # Mid-cap: Electricals, renewable energy
    'CDSL.NS',  # Small-cap: Fintech, capital market
    'NAVINFLUOR.NS',  # Small-cap: Specialty chemicals
    'KPITTECH.NS',  # Small-cap: Auto-tech, EV software
#    'L&TFH.NS',  # Mid-cap: Affordable housing finance
    'GREENPOWER.NS',  # Small-cap: Renewable energy
    'BIRLACORPN.NS'  # Mid-cap: Cement, affordable housing
]

In [85]:


# Sector allocation weights (70% mid/small-cap, 30% large-cap)
sector_weights = {
    'Renewables': 0.30,  # e.g., GREENPOWER.NS, POLYCAB.NS
    'Specialty Chemicals': 0.25,  # e.g., NAVINFLUOR.NS
    'Fintech': 0.20,  # e.g., CDSL.NS, KPITTECH.NS
    'Manufacturing (PLI)': 0.15,  # e.g., DIXON.NS, RELIANCE.NS
    'Affordable Housing': 0.10  # e.g., L&TFH.NS, BIRLACORPN.NS
}


In [86]:

# Backtesting period (2018-2023)
start_date = '2018-01-01'
end_date = '2023-12-31'


In [98]:
from nsepy import get_history
def fetch_data(tickers, start, end):
    data = {}
    failed_tickers = []
    start_date = datetime.strptime(start, '%Y-%m-%d')
    end_date = datetime.strptime(end, '%Y-%m-%d')
    date_range = pd.date_range(start=start, end=end, freq='B')
    for ticker in tickers:
        try:
            df = get_history(symbol=ticker.replace('.NS', ''), start=start_date, end=end_date)
            if not df.empty and 'Close' in df.columns and len(df) >= 100:
                series = df['Close'].reindex(date_range, method='ffill').dropna()
                if len(series) >= 100:
                    data[ticker] = series
                    print(f"Successfully fetched and aligned data for {ticker}: {len(series)} rows")
                else:
                    failed_tickers.append(ticker)
                    print(f"No valid data for {ticker}: Insufficient data points after alignment")
            else:
                failed_tickers.append(ticker)
                print(f"No valid data for {ticker}: Insufficient data points")
        except Exception as e:
            failed_tickers.append(ticker)
            print(f"Error fetching data for {ticker}: {e}")
    if not data:
        print("No data fetched for any tickers. Exiting.")
        return pd.DataFrame()
    if failed_tickers:
        print(f"Failed to fetch data for: {failed_tickers}")
    try:
        df = pd.DataFrame(data, index=date_range)
        df = df.dropna(how='all')
        if not df.empty:
            print(f"DataFrame shape: {df.shape}")
            return df
        print("DataFrame is empty after construction.")
        return pd.DataFrame()
    except ValueError as e:
        print(f"Error creating DataFrame: {e}")
        return pd.DataFrame()

In [95]:
# Fetch historical data using yfinance
def fetch_data1(tickers, start, end):
    data = {}
    failed_tickers = []
    all_dates = pd.date_range(start=start, end=end, freq='D') # Create a full date range
    for ticker in tickers:
        try:
            df = yf.download(ticker, start=start, end=end, progress=False, auto_adjust=False)
            if not df.empty and 'Adj Close' in df.columns and len(df) >= 100:
                # Reindex to the full date range and forward fill missing values
                data[ticker] = df['Adj Close'].dropna().reindex(all_dates).fillna(method='ffill')
                print(f"Successfully fetched data for {ticker}: {len(df)} rows")
            else:
                failed_tickers.append(ticker)
                print(f"No valid data for {ticker}: Insufficient data points or missing 'Adj Close'")
        except Exception as e:
            failed_tickers.append(ticker)
            print(f"Error fetching data for {ticker}: {e}")

    if not data:
        print("No data fetched for any tickers. Exiting.")
        return pd.DataFrame()

    if failed_tickers:
        print(f"Failed to fetch data for: {failed_tickers}")

    try:
        # Create DataFrame from aligned data
        df = pd.DataFrame(data)
        if df.empty or len(df) < 2:
            print("DataFrame is empty or has insufficient rows after construction.")
            return pd.DataFrame()
        print(f"DataFrame shape: {df.shape}")
        return df
    except ValueError as e:
        print(f"Error creating DataFrame: {e}")
        return pd.DataFrame()

In [88]:

# Calculate returns and volatility
def calculate_metrics(df):
    if df.empty or len(df) < 2:
        print("Insufficient data for metrics calculation.")
        return pd.Series(dtype=float), pd.Series(dtype=float), pd.Series(dtype=float)
    returns = df.pct_change().dropna()
    if returns.empty:
        print("No valid returns calculated.")
        return pd.Series(dtype=float), pd.Series(dtype=float), pd.Series(dtype=float)
    annual_returns = returns.mean() * 252
    annual_vol = returns.std() * np.sqrt(252)
    sharpe_ratio = annual_returns / annual_vol
    return annual_returns, annual_vol, sharpe_ratio

In [89]:



# Simulate portfolio with rebalancing and stop-loss
def simulate_portfolio(df, weights, initial_capital):
    if df.empty or len(df) < 2:
        print("Insufficient data for simulation.")
        return 0, 0, 0, pd.Series(dtype=float)

    portfolio_value = initial_capital
    positions = {stock: initial_capital * weights.get(stock, max_position_size) / df[stock].iloc[0] for stock in df.columns}
    portfolio_values = [initial_capital]
    stop_loss_triggered = {stock: False for stock in df.columns}
    peak_prices = {stock: df[stock].iloc[0] for stock in df.columns}
    valid_dates = df.index

    if len(valid_dates) < 2:
        print("No valid dates for simulation.")
        return 0, 0, 0, pd.Series(dtype=float)

    for i, date in enumerate(valid_dates[1:], 1):
        daily_value = 0
        for stock in df.columns:
            if not stop_loss_triggered[stock]:
                price = df[stock].loc[date]
                if np.isnan(price):
                    print(f"NaN price detected for {stock} on {date}. Skipping.")
                    continue
                peak_prices[stock] = max(peak_prices[stock], price)
                if peak_prices[stock] > 0 and (peak_prices[stock] - price) / peak_prices[stock] >= stop_loss_pct:
                    stop_loss_triggered[stock] = True
                    positions[stock] = 0
                    portfolio_value *= (1 - transaction_cost)
                    print(f"Stop-loss triggered for {stock} on {date}")
                daily_value += positions[stock] * price
        if daily_value <= 0:
            print(f"Portfolio value reached zero or negative on {date}. Stopping simulation.")
            return 0, 0, 0, pd.Series(dtype=float)
        portfolio_values.append(daily_value)
        portfolio_value = daily_value

        # Quarterly rebalancing
        if i > 0 and date.quarter != valid_dates[i-1].quarter:
            total_value = sum(positions[stock] * df[stock].loc[date] for stock in df.columns if not stop_loss_triggered[stock])
            if total_value > 0:
                positions = {stock: (total_value * weights.get(stock, max_position_size) / df[stock].loc[date]) for stock in df.columns}
                stop_loss_triggered = {stock: False for stock in df.columns}
                portfolio_value *= (1 - transaction_cost)
                peak_prices = {stock: df[stock].loc[date] for stock in df.columns}
                print(f"Rebalanced portfolio on {date}")

    try:
        portfolio_df = pd.Series(portfolio_values, index=valid_dates, dtype=float)
    except ValueError as e:
        print(f"Error creating portfolio Series: {e}")
        return 0, 0, 0, pd.Series(dtype=float)

    if portfolio_df.empty or portfolio_df.iloc[-1] <= 0:
        print("Portfolio DataFrame is empty or portfolio value reached zero.")
        return 0, 0, 0, pd.Series(dtype=float)

    returns = portfolio_df.pct_change().dropna()
    cagr = ((portfolio_df.iloc[-1] / initial_capital) ** (1 / 5)) - 1 if not portfolio_df.empty else 0
    sharpe = returns.mean() * 252 / (returns.std() * np.sqrt(252)) if not returns.empty and returns.std() != 0 else 0
    max_drawdown = (portfolio_df / portfolio_df.cummax() - 1).min() if not portfolio_df.empty else 0
    return cagr, sharpe, max_drawdown, portfolio_df



In [90]:
# Main execution
def run():
    # Fetch data
    df = fetch_data(stocks, start_date, end_date)
    if df.empty or len(df) < 2:
        print("No valid data to proceed with simulation.")
        return

    # Assign weights (70% mid/small-cap, 30% large-cap)
    weights = {}
    large_caps = ['RELIANCE.NS']
    mid_small_caps = [s for s in df.columns if s not in large_caps]
    valid_large_caps = [s for s in large_caps if s in df.columns]
    if valid_large_caps:
        for stock in valid_large_caps:
            weights[stock] = 0.30 / len(valid_large_caps)
    for stock in mid_small_caps:
        sector = next((k for k, v in sector_weights.items() if stock in [s for s in stocks if k in s]), 'Other')
        weights[stock] = 0.70 * sector_weights.get(sector, 0.2) / len(mid_small_caps) if mid_small_caps else 0

    # Simulate portfolio
    cagr, sharpe, max_drawdown, portfolio_df = simulate_portfolio(df, weights, initial_capital)

    # Print results
    print(f"CAGR: {cagr:.2%}")
    print(f"Sharpe Ratio: {sharpe:.2f}")
    print(f"Max Drawdown: {max_drawdown:.2%}")

    # Plot portfolio value
    if not portfolio_df.empty:
        plt.figure(figsize=(10, 6))
        portfolio_df.plot(title='Portfolio Value (2018-2023)')
        plt.xlabel('Date')
        plt.ylabel('Portfolio Value (₹)')
        plt.grid(True)
        plt.show()
    else:
        print("No portfolio data to plot.")

In [99]:
run()



No valid data for RELIANCE.NS: Insufficient data points




No valid data for TATAMOTORS.NS: Insufficient data points




No valid data for DIXON.NS: Insufficient data points




No valid data for POLYCAB.NS: Insufficient data points




No valid data for CDSL.NS: Insufficient data points
No valid data for NAVINFLUOR.NS: Insufficient data points




No valid data for KPITTECH.NS: Insufficient data points
No valid data for GREENPOWER.NS: Insufficient data points




No valid data for BIRLACORPN.NS: Insufficient data points
No data fetched for any tickers. Exiting.
No valid data to proceed with simulation.
