In [8]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
from scipy.stats import linregress
from statsmodels.api import OLS, add_constant
import streamlit as st

# === Parameters ===
START_DATE = "2005-12-31"
END_DATE = "2025-07-31"
REBALANCE_MONTHS = [3, 6, 9, 12]  # Quarterly

# Step 1: Load NSE500 list
nse500 = pd.read_html("https://en.wikipedia.org/wiki/NIFTY_50", header=0)[1]
symbols = nse500['Symbol'].tolist()

# Step 2: Download adjusted close daily price history
data = {}
for symbol in tqdm(symbols):
    try:
        df = yf.download(symbol + ".NS", start=START_DATE, end=END_DATE, interval="1d", progress=False, auto_adjust=True)
        if df.empty or 'Close' not in df.columns:
            continue
        df = df[['Close']].rename(columns={'Close': symbol})
        data[symbol] = df
    except:
        continue

if not data:
    st.error("No data downloaded from yfinance. Check your internet connection or ticker symbols.")
    st.stop()

prices = pd.concat(data.values(), axis=1)
prices.columns = data.keys()

prices.index = pd.to_datetime(prices.index)
prices = prices.dropna(axis=1, how='all')

# === Calculate Barra-style factors ===
log_returns = np.log(prices / prices.shift(1))
monthly_prices = prices.resample('Q').last()

factor_data = {}
for date in monthly_prices.index[4:]:
    current_prices = monthly_prices.loc[date]
    past_prices = monthly_prices.loc[date - pd.DateOffset(months=4)]
    returns_12m = (current_prices / past_prices) - 1

    size = np.log(current_prices)
    momentum = returns_12m
    volatility = log_returns.loc[:date].rolling(252).std().iloc[-1]

    # Placeholder for value/growth (replace with real fundamentals later)
    value = pd.Series(np.random.normal(0, 1, len(current_prices)), index=current_prices.index)
    growth = pd.Series(np.random.normal(0, 1, len(current_prices)), index=current_prices.index)

    factors = pd.DataFrame({
        'Size': size,
        'Momentum': momentum,
        'Volatility': volatility,
        'Value': value,
        'Growth': growth
    })
    factor_data[date] = factors

# === Backtest each factor ===
def backtest_factor(factor_name):
    factor_returns = []
    for i, date in enumerate(factor_data.keys()):
        if date.month not in REBALANCE_MONTHS:
            continue
        df = factor_data[date].dropna()
        if len(df) < 10:
            continue

        top = df[factor_name].nlargest(int(len(df)*0.1)).index
        bottom = df[factor_name].nsmallest(int(len(df)*0.1)).index

        try:
            next_date = list(factor_data.keys())[i+1]
        except IndexError:
            break

        try:
            future_prices = monthly_prices.loc[next_date]
            curr_prices = monthly_prices.loc[date]

            top_ret = (future_prices[top] / curr_prices[top] - 1).mean()
            bottom_ret = (future_prices[bottom] / curr_prices[bottom] - 1).mean()
            factor_returns.append((next_date, top_ret, bottom_ret))
        except:
            continue

    df = pd.DataFrame(factor_returns, columns=["Date", "Top", "Bottom"]).set_index("Date")
    df["Top_Cumulative"] = (1 + df["Top"]).cumprod()
    df["Bottom_Cumulative"] = (1 + df["Bottom"]).cumprod()
    return df

# === Run backtests ===
factors = ['Size', 'Momentum', 'Volatility', 'Value', 'Growth']
results = {factor: backtest_factor(factor) for factor in factors}

# === Benchmark ===
benchmark = yf.download("^NSEI", start=START_DATE, end=END_DATE, interval="1mo")["Close"].resample('M').last()
benchmark_ret = benchmark.pct_change().dropna()
benchmark_cum = (1 + benchmark_ret).cumprod()

# === Plot Streamlit Outputs ===
st.title("📈 Barra Factor Strategy Backtests")
for factor in factors:
    df = results[factor]
    st.subheader(f"{factor} Factor")
    st.line_chart(df[["Top_Cumulative", "Bottom_Cumulative"]].join(benchmark_cum.rename("Nifty 50"), how="inner"))


100%|██████████| 50/50 [00:27<00:00,  1.82it/s]
  monthly_prices = prices.resample('Q').last()


KeyError: Timestamp('2006-11-30 00:00:00')

In [10]:
monthly_prices

Unnamed: 0_level_0,ADANIENT,ADANIPORTS,APOLLOHOSP,ASIANPAINT,AXISBANK,BAJAJ-AUTO,BAJFINANCE,BAJAJFINSV,BEL,BHARTIARTL,...,SUNPHARMA,TCS,TATACONSUM,TATAMOTORS,TATASTEEL,TECHM,TITAN,TRENT,ULTRACEMCO,WIPRO
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2006-03-31,3.575901,,225.542419,52.684460,62.256199,430.028259,4.531797,2688.238770,9.735339,170.971542,...,69.344307,179.003906,63.433384,152.469009,26.802870,,37.649734,65.315193,626.643921,56.808010
2006-06-30,7.764215,,178.413467,49.007195,47.330540,430.419647,3.489977,2690.685791,7.874147,153.263412,...,63.547821,162.960434,56.042950,132.256561,27.349739,,26.776594,50.368542,685.955322,52.647324
2006-09-30,8.271670,,214.187653,55.964470,66.981674,469.198334,3.301971,2933.104004,8.588296,194.375229,...,75.617874,191.880600,55.787903,143.379852,27.452240,122.518303,36.487278,63.728863,820.937012,53.831848
2006-12-31,14.018536,,193.218872,61.452759,82.949883,410.125885,3.394398,2563.822754,9.986594,260.941284,...,79.758629,230.052780,53.627918,149.852478,21.846991,331.368103,38.893372,63.833435,1011.193909,62.138920
2007-03-31,13.098175,,225.825226,64.230721,86.623985,380.131561,3.940432,2376.319824,11.192182,316.426880,...,86.401077,232.821686,45.158981,121.166557,20.370466,283.034180,38.117722,49.255703,713.786804,57.854351
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-09-30,3134.247559,1441.188354,7188.981934,3293.021484,1231.147827,12040.789062,766.617554,1972.439087,283.529663,1695.376831,...,1908.874146,4196.322754,1188.144165,966.341614,164.708893,1535.110352,3812.476318,7568.093262,11802.000000,265.450287
2024-12-31,2527.357666,1225.139404,7286.044922,2260.494385,1063.790771,8581.120117,679.038208,1567.336304,291.535309,1574.586670,...,1868.897583,4035.380859,907.970642,733.840576,134.903961,1675.584473,3243.388916,7117.184082,11426.349609,295.968842
2025-03-31,2314.616455,1177.222534,6616.200195,2319.251953,1101.058960,7684.104492,890.283508,2006.372437,301.320007,1719.029175,...,1729.008301,3562.290283,994.529175,668.700623,150.725006,1392.801392,3054.158691,5320.540527,11509.549805,262.250000
2025-06-30,2619.399902,1450.199951,7242.000000,2341.100098,1198.175903,8376.000000,936.500000,2056.000000,421.500000,1992.939209,...,1670.201904,3450.290771,1098.900024,688.000000,159.759995,1656.729004,3679.127686,6217.500000,12093.000000,265.980011


In [12]:
volatility

ADANIENT      0.041012
ADANIPORTS    0.046046
APOLLOHOSP    0.025978
ASIANPAINT    0.024922
AXISBANK      0.045694
BAJAJ-AUTO    0.051255
BAJFINANCE    0.043049
BAJAJFINSV    0.181911
BEL           0.031888
BHARTIARTL    0.033457
CIPLA         0.025520
COALINDIA          NaN
DRREDDY       0.028560
EICHERMOT     0.041772
ETERNAL            NaN
GRASIM        0.031553
HCLTECH       0.045754
HDFCBANK      0.036310
HDFCLIFE           NaN
HEROMOTOCO    0.026118
HINDALCO      0.047728
HINDUNILVR    0.026320
ICICIBANK     0.050151
INDUSINDBK    0.049171
INFY          0.031045
ITC           0.025997
JIOFIN             NaN
JSWSTEEL      0.052529
KOTAKBANK     0.050845
LT            0.037526
M&M           0.041211
MARUTI        0.031075
NESTLEIND     0.000000
NTPC          0.033542
ONGC          0.033419
POWERGRID     0.037856
RELIANCE      0.040247
SBILIFE            NaN
SHRIRAMFIN    0.027788
SBIN          0.036909
SUNPHARMA     0.028031
TCS           0.036394
TATACONSUM    0.028417
TATAMOTORS 

In [14]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
from scipy.stats import linregress
from statsmodels.api import OLS, add_constant
import streamlit as st

# === Parameters ===
START_DATE = "2005-12-31"
END_DATE = "2025-07-31"
REBALANCE_MONTHS = [3, 6, 9, 12]  # Quarterly

# Step 1: Load NSE500 list
nse500 = pd.read_html("https://en.wikipedia.org/wiki/NIFTY_50", header=0)[1]
symbols = nse500['Symbol'].tolist()

# Step 2: Download adjusted close daily price history
data = {}
for symbol in tqdm(symbols):
    try:
        df = yf.download(symbol + ".NS", start=START_DATE, end=END_DATE, interval="1d", progress=False)
        if df.empty or 'Close' not in df.columns:
            continue
        df = df[['Close']].rename(columns={'Close': symbol})
        data[symbol] = df
    except:
        continue

if not data:
    st.error("No data downloaded from yfinance. Check your internet connection or ticker symbols.")
    st.stop()

prices = pd.concat(data.values(), axis=1)
prices.columns = data.keys()

prices.index = pd.to_datetime(prices.index)
prices = prices.dropna(axis=1, how='all')

# === Calculate Barra-style factors ===
log_returns = np.log(prices / prices.shift(1))
monthly_prices = prices.resample('M').last()

factor_data = {}
for date in monthly_prices.index[12:]:
    try:
        current_prices = monthly_prices.loc[date]
        past_prices = monthly_prices.shift(12).loc[date]
        returns_12m = (current_prices / past_prices) - 1

        size = np.log(current_prices)
        momentum = returns_12m
        volatility = log_returns.loc[:date].rolling(252).std().iloc[-1]

        # Placeholder for value/growth (replace with real fundamentals later)
        value = pd.Series(np.random.normal(0, 1, len(current_prices)), index=current_prices.index)
        growth = pd.Series(np.random.normal(0, 1, len(current_prices)), index=current_prices.index)

        factors = pd.DataFrame({
            'Size': size,
            'Momentum': momentum,
            'Volatility': volatility,
            'Value': value,
            'Growth': growth
        })
        factor_data[date] = factors
    except KeyError:
        continue

# === Backtest each factor ===
def backtest_factor(factor_name):
    factor_returns = []
    for i, date in enumerate(factor_data.keys()):
        if date.month not in REBALANCE_MONTHS:
            continue
        df = factor_data[date].dropna()
        if len(df) < 10:
            continue

        top = df[factor_name].nlargest(int(len(df)*0.1)).index
        bottom = df[factor_name].nsmallest(int(len(df)*0.1)).index

        try:
            next_date = list(factor_data.keys())[i+1]
        except IndexError:
            break

        try:
            future_prices = monthly_prices.loc[next_date]
            curr_prices = monthly_prices.loc[date]

            top_ret = (future_prices[top] / curr_prices[top] - 1).mean()
            bottom_ret = (future_prices[bottom] / curr_prices[bottom] - 1).mean()
            factor_returns.append((next_date, top_ret, bottom_ret))
        except:
            continue

    df = pd.DataFrame(factor_returns, columns=["Date", "Top", "Bottom"]).set_index("Date")
    df["Top_Cumulative"] = (1 + df["Top"]).cumprod()
    df["Bottom_Cumulative"] = (1 + df["Bottom"]).cumprod()
    return df

# === Run backtests ===
factors = ['Size', 'Momentum', 'Volatility', 'Value', 'Growth']
results = {factor: backtest_factor(factor) for factor in factors}

# === Benchmark ===
benchmark = yf.download("^NSEI", start=START_DATE, end=END_DATE, interval="1mo")["Close"].resample('M').last()
benchmark_ret = benchmark.pct_change().dropna()
benchmark_cum = (1 + benchmark_ret).cumprod()

# === Plot Streamlit Outputs ===
st.title("📈 Barra Factor Strategy Backtests")
for factor in factors:
    df = results[factor]
    st.subheader(f"{factor} Factor")
    st.line_chart(df[["Top_Cumulative", "Bottom_Cumulative"]].join(benchmark_cum.rename("Nifty 50"), how="inner"))


  df = yf.download(symbol + ".NS", start=START_DATE, end=END_DATE, interval="1d", progress=False)
  df = yf.download(symbol + ".NS", start=START_DATE, end=END_DATE, interval="1d", progress=False)
  df = yf.download(symbol + ".NS", start=START_DATE, end=END_DATE, interval="1d", progress=False)
  df = yf.download(symbol + ".NS", start=START_DATE, end=END_DATE, interval="1d", progress=False)
  df = yf.download(symbol + ".NS", start=START_DATE, end=END_DATE, interval="1d", progress=False)
  df = yf.download(symbol + ".NS", start=START_DATE, end=END_DATE, interval="1d", progress=False)
  df = yf.download(symbol + ".NS", start=START_DATE, end=END_DATE, interval="1d", progress=False)
  df = yf.download(symbol + ".NS", start=START_DATE, end=END_DATE, interval="1d", progress=False)
  df = yf.download(symbol + ".NS", start=START_DATE, end=END_DATE, interval="1d", progress=False)
  df = yf.download(symbol + ".NS", start=START_DATE, end=END_DATE, interval="1d", progress=False)
  df = yf.download(s

TypeError: 'str' object is not callable

In [16]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
from scipy.stats import linregress
from statsmodels.api import OLS, add_constant
import streamlit as st

# === Parameters ===
START_DATE = "2005-12-31"
END_DATE = "2025-07-31"
REBALANCE_MONTHS = [3, 6, 9, 12]  # Quarterly

# Step 1: Load NSE500 list
nse500 = pd.read_html("https://en.wikipedia.org/wiki/NIFTY_50", header=0)[1]
symbols = nse500['Symbol'].tolist()

# Step 2: Download adjusted close daily price history
data = {}
for symbol in tqdm(symbols):
    try:
        df = yf.download(symbol + ".NS", start=START_DATE, end=END_DATE, interval="1d", progress=False)
        if df.empty or 'Close' not in df.columns:
            continue
        df = df[['Close']].rename(columns={'Close': symbol})
        data[symbol] = df
    except:
        continue

if not data:
    st.error("No data downloaded from yfinance. Check your internet connection or ticker symbols.")
    st.stop()

prices = pd.concat(data.values(), axis=1)
prices.columns = data.keys()

prices.index = pd.to_datetime(prices.index)
prices = prices.dropna(axis=1, how='all')

# === Calculate Barra-style factors ===
log_returns = np.log(prices / prices.shift(1))
monthly_prices = prices.resample('M').last()

factor_data = {}
for date in monthly_prices.index[12:]:
    try:
        current_prices = monthly_prices.loc[date]
        past_prices = monthly_prices.shift(12).loc[date]
        returns_12m = (current_prices / past_prices) - 1

        size = np.log(current_prices)
        momentum = returns_12m
        volatility = log_returns.loc[:date].rolling(252).std().loc[date]

        # Placeholder for value/growth (replace with real fundamentals later)
        value = pd.Series(np.random.normal(0, 1, len(current_prices)), index=current_prices.index)
        growth = pd.Series(np.random.normal(0, 1, len(current_prices)), index=current_prices.index)

        factors = pd.DataFrame({
            'Size': size,
            'Momentum': momentum,
            'Volatility': volatility,
            'Value': value,
            'Growth': growth
        })
        factor_data[date] = factors
    except KeyError:
        continue

# === Backtest each factor ===
def backtest_factor(factor_name):
    factor_returns = []
    for i, date in enumerate(factor_data.keys()):
        if date.month not in REBALANCE_MONTHS:
            continue
        df = factor_data[date].dropna()
        if len(df) < 10:
            continue

        top = df[factor_name].nlargest(int(len(df)*0.1)).index
        bottom = df[factor_name].nsmallest(int(len(df)*0.1)).index

        try:
            next_date = list(factor_data.keys())[i+1]
        except IndexError:
            break

        try:
            future_prices = monthly_prices.loc[next_date]
            curr_prices = monthly_prices.loc[date]

            top_ret = (future_prices[top] / curr_prices[top] - 1).mean()
            bottom_ret = (future_prices[bottom] / curr_prices[bottom] - 1).mean()
            factor_returns.append((next_date, top_ret, bottom_ret))
        except:
            continue

    df = pd.DataFrame(factor_returns, columns=["Date", "Top", "Bottom"]).set_index("Date")
    df.index = pd.to_datetime(df.index)
    df["Top_Cumulative"] = (1 + df["Top"]).cumprod()
    df["Bottom_Cumulative"] = (1 + df["Bottom"]).cumprod()
    return df

# === Run backtests ===
factors = ['Size', 'Momentum', 'Volatility', 'Value', 'Growth']
results = {factor: backtest_factor(factor) for factor in factors}

# === Benchmark ===
benchmark = yf.download("^NSEI", start=START_DATE, end=END_DATE, interval="1mo")["Close"].resample('M').last()
benchmark_ret = benchmark.pct_change().dropna()
benchmark_cum = (1 + benchmark_ret).cumprod()

# === Plot Streamlit Outputs ===
st.title("📈 Barra Factor Strategy Backtests")
for factor in factors:
    df = results[factor]
    st.subheader(f"{factor} Factor")
    benchmark_cum_renamed = benchmark_cum.copy()
    benchmark_cum_renamed.name = "Nifty 50"
    combined = df[["Top_Cumulative", "Bottom_Cumulative"]].join(benchmark_cum_renamed, how="inner")
    st.line_chart(combined)


  df = yf.download(symbol + ".NS", start=START_DATE, end=END_DATE, interval="1d", progress=False)
  df = yf.download(symbol + ".NS", start=START_DATE, end=END_DATE, interval="1d", progress=False)
  df = yf.download(symbol + ".NS", start=START_DATE, end=END_DATE, interval="1d", progress=False)
  df = yf.download(symbol + ".NS", start=START_DATE, end=END_DATE, interval="1d", progress=False)
  df = yf.download(symbol + ".NS", start=START_DATE, end=END_DATE, interval="1d", progress=False)
  df = yf.download(symbol + ".NS", start=START_DATE, end=END_DATE, interval="1d", progress=False)
  df = yf.download(symbol + ".NS", start=START_DATE, end=END_DATE, interval="1d", progress=False)
  df = yf.download(symbol + ".NS", start=START_DATE, end=END_DATE, interval="1d", progress=False)
  df = yf.download(symbol + ".NS", start=START_DATE, end=END_DATE, interval="1d", progress=False)
  df = yf.download(symbol + ".NS", start=START_DATE, end=END_DATE, interval="1d", progress=False)
  df = yf.download(s