# Mutual Fund Backtest: Top 5 by 5-Year Rolling Return

This notebook performs a backtest on mutual fund data with the following rules:

1. Start investment on 01-01-2018 with 500,000 INR.
2. At each rebalance (every year), select the top 5 funds with the highest 5-year rolling return, **only considering funds with at least 5 years of NAV history at that point**.
3. Divide the investment equally among the selected funds.
4. If a fund drops out of the top 5, exit and reinvest in the new top fund.
5. Rebalance annually and calculate the final return as of the latest available date.

In [3]:
import pandas as pd
from datetime import timedelta

# Load data
mf_df = pd.read_csv('mf_data.csv', parse_dates=['date'])
# mf_df = mf_df.sort_values(['Fund', 'Date'])
mf_df.head()

Unnamed: 0.1,Unnamed: 0,Scheme Code,Scheme Name,Scheme Category,date,nav,daily_returns,cumulative_returns
0,0,119076,DSP Flexi Cap Fund,Equity Scheme - Flexi Cap Fund,2013-01-02,18.188,,
1,1,119076,DSP Flexi Cap Fund,Equity Scheme - Flexi Cap Fund,2013-01-03,18.233,0.002474,1.002474
2,2,119076,DSP Flexi Cap Fund,Equity Scheme - Flexi Cap Fund,2013-01-04,18.241,0.000439,1.002914
3,3,119076,DSP Flexi Cap Fund,Equity Scheme - Flexi Cap Fund,2013-01-07,18.234,-0.000384,1.002529
4,4,119076,DSP Flexi Cap Fund,Equity Scheme - Flexi Cap Fund,2013-01-08,18.231,-0.000165,1.002364


In [4]:
# Backtest parameters
start_date = pd.Timestamp('2018-01-01')
end_date = mf_df['date'].max()
initial_investment = 500000
rebalance_dates = pd.date_range(start=start_date, end=end_date, freq='12MS')

portfolio = {}
portfolio_value = initial_investment

for rebalance_date in rebalance_dates:
    five_years_ago = rebalance_date - pd.DateOffset(years=5)
    eligible_funds = []
    rolling_returns = {}
    # Only consider funds with NAV on or before both rebalance_date and five_years_ago
    for fund in mf_df['Scheme Name'].unique():
        fund_df = mf_df[mf_df['Scheme Name'] == fund]
        nav_now_row = fund_df[fund_df['date'] <= rebalance_date].tail(1)
        nav_past_row = fund_df[fund_df['date'] <= five_years_ago].tail(1)
        if not nav_now_row.empty and not nav_past_row.empty:
            nav_now = nav_now_row['nav'].values[0]
            nav_past = nav_past_row['nav'].values[0]
            rolling_return = (nav_now / nav_past) - 1
            eligible_funds.append(fund)
            rolling_returns[fund] = rolling_return
    # Select top 5 funds by rolling return
    top5 = sorted(rolling_returns, key=rolling_returns.get, reverse=True)[:5]
    # Get NAVs for rebalance date
    navs = {}
    for fund in top5:
        nav_row = mf_df[(mf_df['Scheme Name'] == fund) & (mf_df['date'] <= rebalance_date)].tail(1)
        if not nav_row.empty:
            navs[fund] = nav_row['nav'].values[0]
    # Rebalance: sell all, buy new funds equally
    portfolio = {}
    if navs:
        amount_per_fund = portfolio_value / len(navs)
        for fund, nav in navs.items():
            units = amount_per_fund / nav
            portfolio[fund] = {'units': units, 'last_nav': nav}
    # Update portfolio value for next rebalance
    if rebalance_date != rebalance_dates[-1]:
        next_date = rebalance_dates[rebalance_dates.get_loc(rebalance_date) + 1]
        for fund in list(portfolio.keys()):
            nav_row = mf_df[(mf_df['Scheme Name'] == fund) & (mf_df['date'] <= next_date)].tail(1)
            if not nav_row.empty:
                portfolio[fund]['last_nav'] = nav_row['nav'].values[0]
        portfolio_value = sum([v['units'] * v['last_nav'] for v in portfolio.values()])

# Calculate final value as of today
today = mf_df['date'].max()
for fund in list(portfolio.keys()):
    nav_row = mf_df[(mf_df['Scheme Name'] == fund) & (mf_df['date'] <= today)].tail(1)
    if not nav_row.empty:
        portfolio[fund]['last_nav'] = nav_row['nav'].values[0]
final_value = sum([v['units'] * v['last_nav'] for v in portfolio.values()])

print(f"Final portfolio value as of {today.date()}: {final_value:.2f} INR")
print(f"Total return: {((final_value/initial_investment - 1)*100):.2f}%")

Final portfolio value as of 2025-06-13: 1359268.94 INR
Total return: 171.85%


In [2]:
import pandas as pd

# Load data
mf_df = pd.read_csv('mf_data.csv', parse_dates=['date'])

# Backtest parameters
start_date = pd.Timestamp('2019-01-01')
end_date = mf_df['date'].max()
initial_investment = 500000
rebalance_dates = pd.date_range(start=start_date, end=end_date, freq='12MS')

portfolio = {}  # {fund: {'units': float, 'buy_nav': float, 'last_nav': float}}
portfolio_value = initial_investment
rebalance_log = []

for i, rebalance_date in enumerate(rebalance_dates):
    five_years_ago = rebalance_date - pd.DateOffset(years=5)
    rolling_returns = {}
    # Only consider funds with NAV on or before both rebalance_date and five_years_ago
    for fund in mf_df['Scheme Name'].unique():
        fund_df = mf_df[mf_df['Scheme Name'] == fund]
        nav_now_row = fund_df[fund_df['date'] <= rebalance_date].tail(1)
        nav_past_row = fund_df[fund_df['date'] <= five_years_ago].tail(1)
        if not nav_now_row.empty and not nav_past_row.empty:
            nav_now = nav_now_row['nav'].values[0]
            nav_past = nav_past_row['nav'].values[0]
            rolling_return = (nav_now / nav_past) - 1
            rolling_returns[fund] = rolling_return

    # Select top 5 funds by rolling return
    top5 = sorted(rolling_returns, key=rolling_returns.get, reverse=True)[:5]
    top5_set = set(top5)

    # Get NAVs for rebalance date
    navs = {}
    for fund in top5:
        nav_row = mf_df[(mf_df['Scheme Name'] == fund) & (mf_df['date'] <= rebalance_date)].tail(1)
        if not nav_row.empty:
            navs[fund] = nav_row['nav'].values[0]

    # Determine funds to sell and buy
    sold_funds = list(set(portfolio.keys()) - top5_set)
    bought_funds = list(top5_set - set(portfolio.keys()))

    # Calculate profit/loss for sold funds
    pl_report = []
    for fund in sold_funds:
        units = portfolio[fund]['units']
        buy_nav = portfolio[fund]['buy_nav']
        last_nav = portfolio[fund]['last_nav']
        pl = (last_nav - buy_nav) * units
        pl_report.append({'fund': fund, 'units': units, 'buy_nav': buy_nav, 'sell_nav': last_nav, 'profit_loss': pl})

    # Rebalance: sell all, buy new funds equally
    # Calculate current value of portfolio
    for fund in portfolio:
        nav_row = mf_df[(mf_df['Scheme Name'] == fund) & (mf_df['date'] <= rebalance_date)].tail(1)
        if not nav_row.empty:
            portfolio[fund]['last_nav'] = nav_row['nav'].values[0]
    portfolio_value = sum([v['units'] * v['last_nav'] for v in portfolio.values()]) if portfolio else portfolio_value

    # Allocate equally to top5
    new_portfolio = {}
    if navs:
        amount_per_fund = portfolio_value / len(navs)
        for fund, nav in navs.items():
            units = amount_per_fund / nav
            new_portfolio[fund] = {'units': units, 'buy_nav': nav, 'last_nav': nav}

    # Log this rebalance
    rebalance_log.append({
        'rebalance_date': rebalance_date.date(),
        'top5': [(fund, round(rolling_returns[fund]*100,2)) for fund in top5],
        'bought_funds': bought_funds,
        'sold_funds': sold_funds,
        'pl_report': pl_report,
        'portfolio_value': round(portfolio_value,2)
    })

    portfolio = new_portfolio

# Final update for last period
today = mf_df['date'].max()
for fund in list(portfolio.keys()):
    nav_row = mf_df[(mf_df['Scheme Name'] == fund) & (mf_df['date'] <= today)].tail(1)
    if not nav_row.empty:
        portfolio[fund]['last_nav'] = nav_row['nav'].values[0]
final_value = sum([v['units'] * v['last_nav'] for v in portfolio.values()])

# Print rebalance log
for i, log in enumerate(rebalance_log):
    print(f"\nRebalance {i+1} on {log['rebalance_date']}")
    print("Top 5 funds (5Y rolling return %):")
    for fund, ret in log['top5']:
        print(f"  {fund}: {ret:.2f}%")
    print(f"Bought funds: {log['bought_funds']}")
    print(f"Sold funds: {log['sold_funds']}")
    if log['pl_report']:
        print("Profit/Loss for sold funds:")
        for pl in log['pl_report']:
            print(f"  {pl['fund']}: Units={pl['units']:.2f}, Buy NAV={pl['buy_nav']:.2f}, Sell NAV={pl['sell_nav']:.2f}, P/L={pl['profit_loss']:.2f}")
    print(f"Portfolio value after rebalance: {log['portfolio_value']:.2f} INR")

print(f"\nFinal portfolio value as of {today.date()}: {final_value:.2f} INR")
print(f"Total return: {((final_value/initial_investment - 1)*100):.2f}%")


Rebalance 1 on 2019-01-01
Top 5 funds (5Y rolling return %):
  Kotak Flexicap Fund: 149.98%
  Aditya Birla Sun Life Flexi Cap Fund: 149.01%
  Quant Flexi Cap Fund: 148.06%
  SBI FLEXICAP FUND: 146.32%
  JM Flexicap Fund: 135.70%
Bought funds: ['JM Flexicap Fund', 'SBI FLEXICAP FUND', 'Kotak Flexicap Fund', 'Aditya Birla Sun Life Flexi Cap Fund', 'Quant Flexi Cap Fund']
Sold funds: []
Portfolio value after rebalance: 500000.00 INR

Rebalance 2 on 2020-01-01
Top 5 funds (5Y rolling return %):
  Motilal Oswal Flexi Cap Fund: 84.77%
  Kotak Flexicap Fund: 77.66%
  JM Flexicap Fund: 75.40%
  SBI FLEXICAP FUND: 74.58%
  Aditya Birla Sun Life Flexi Cap Fund: 72.85%
Bought funds: ['Motilal Oswal Flexi Cap Fund']
Sold funds: ['Quant Flexi Cap Fund']
Profit/Loss for sold funds:
  Quant Flexi Cap Fund: Units=3770.48, Buy NAV=26.52, Sell NAV=26.52, P/L=0.00
Portfolio value after rebalance: 549461.48 INR

Rebalance 3 on 2021-01-01
Top 5 funds (5Y rolling return %):
  Parag Parikh Flexi Cap Fund: 1

In [3]:
import csv

# Save rebalance log to a CSV file
with open('rebalance_log.csv', 'w', newline='', encoding='utf-8') as csvfile:
    fieldnames = ['rebalance_date', 'top5', 'bought_funds', 'sold_funds', 'pl_report', 'portfolio_value']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    for log in rebalance_log:
        # Convert lists/dicts to string for CSV
        row = log.copy()
        row['top5'] = str(row['top5'])
        row['bought_funds'] = str(row['bought_funds'])
        row['sold_funds'] = str(row['sold_funds'])
        row['pl_report'] = str(row['pl_report'])
        writer.writerow(row)

print('Rebalance log saved to rebalance_log.csv')

Rebalance log saved to rebalance_log.csv


In [None]:
{
  "cells": [
    {
      "cell_type": "code",
      "metadata": {
        "language": "python"
      },
      "source": [
        "import pandas as pd",
        "import json",
        "",
        "# Save rebalance log to a readable CSV and JSON file",
        "readable_rows = []",
        "for log in rebalance_log:",
        "    # Expand pl_report for each sold fund, or add a row with empty P/L if none sold",
        "    if log['pl_report']:",
        "        for pl in log['pl_report']:",
        "            readable_rows.append({",
        "                'rebalance_date': log['rebalance_date'],",
        "                'top5': ', '.join([f'{f} ({ret}%)' for f, ret in log['top5']]),",
        "                'bought_funds': ', '.join(log['bought_funds']),",
        "                'sold_funds': ', '.join(log['sold_funds']),",
        "                'sold_fund': pl['fund'],",
        "                'units': round(pl['units'], 2),",
        "                'buy_nav': round(pl['buy_nav'], 2),",
        "                'sell_nav': round(pl['sell_nav'], 2),",
        "                'profit_loss': round(pl['profit_loss'], 2),",
        "                'portfolio_value': log['portfolio_value']",
        "            })",
        "    else:",
        "        readable_rows.append({",
        "            'rebalance_date': log['rebalance_date'],",
        "            'top5': ', '.join([f'{f} ({ret}%)' for f, ret in log['top5']]),",
        "            'bought_funds': ', '.join(log['bought_funds']),",
        "            'sold_funds': ', '.join(log['sold_funds']),",
        "            'sold_fund': '',",
        "            'units': '',",
        "            'buy_nav': '',",
        "            'sell_nav': '',",
        "            'profit_loss': '',",
        "            'portfolio_value': log['portfolio_value']",
        "        })",
        "",
        "# Save as CSV",
        "pd.DataFrame(readable_rows).to_csv('rebalance_log_readable.csv', index=False)",
        "",
        "# Save as pretty JSON for further inspection if needed",
        "with open('rebalance_log_readable.json', 'w', encoding='utf-8') as f:",
        "    json.dump(rebalance_log, f, indent=2, ensure_ascii=False)",
        "",
        "print('Readable rebalance log saved to rebalance_log_readable.csv and rebalance_log_readable.json')"
      ]
    }
  ]
}{
  "cells": [
    {
      "cell_type": "code",
      "metadata": {
        "language": "python"
      },
      "source": [
        "import pandas as pd",
        "import json",
        "",
        "# Save rebalance log to a readable CSV and JSON file",
        "readable_rows = []",
        "for log in rebalance_log:",
        "    # Expand pl_report for each sold fund, or add a row with empty P/L if none sold",
        "    if log['pl_report']:",
        "        for pl in log['pl_report']:",
        "            readable_rows.append({",
        "                'rebalance_date': log['rebalance_date'],",
        "                'top5': ', '.join([f'{f} ({ret}%)' for f, ret in log['top5']]),",
        "                'bought_funds': ', '.join(log['bought_funds']),",
        "                'sold_funds': ', '.join(log['sold_funds']),",
        "                'sold_fund': pl['fund'],",
        "                'units': round(pl['units'], 2),",
        "                'buy_nav': round(pl['buy_nav'], 2),",
        "                'sell_nav': round(pl['sell_nav'], 2),",
        "                'profit_loss': round(pl['profit_loss'], 2),",
        "                'portfolio_value': log['portfolio_value']",
        "            })",
        "    else:",
        "        readable_rows.append({",
        "            'rebalance_date': log['rebalance_date'],",
        "            'top5': ', '.join([f'{f} ({ret}%)' for f, ret in log['top5']]),",
        "            'bought_funds': ', '.join(log['bought_funds']),",
        "            'sold_funds': ', '.join(log['sold_funds']),",
        "            'sold_fund': '',",
        "            'units': '',",
        "            'buy_nav': '',",
        "            'sell_nav': '',",
        "            'profit_loss': '',",
        "            'portfolio_value': log['portfolio_value']",
        "        })",
        "",
        "# Save as CSV",
        "pd.DataFrame(readable_rows).to_csv('rebalance_log_readable.csv', index=False)",
        "",
        "# Save as pretty JSON for further inspection if needed",
        "with open('rebalance_log_readable.json', 'w', encoding='utf-8') as f:",
        "    json.dump(rebalance_log, f, indent=2, ensure_ascii=False)",
        "",
        "print('Readable rebalance log saved to rebalance_log_readable.csv and rebalance_log_readable.json')"
      ]
    }
  ]
}

{'cells': [{'cell_type': 'code',
   'metadata': {'language': 'python'},
   'source': ['import pandas as pd',
    'import json',
    '',
    '# Save rebalance log to a readable CSV and JSON file',
    'readable_rows = []',
    'for log in rebalance_log:',
    '    # Expand pl_report for each sold fund, or add a row with empty P/L if none sold',
    "    if log['pl_report']:",
    "        for pl in log['pl_report']:",
    '            readable_rows.append({',
    "                'rebalance_date': log['rebalance_date'],",
    "                'top5': ', '.join([f'{f} ({ret}%)' for f, ret in log['top5']]),",
    "                'bought_funds': ', '.join(log['bought_funds']),",
    "                'sold_funds': ', '.join(log['sold_funds']),",
    "                'sold_fund': pl['fund'],",
    "                'units': round(pl['units'], 2),",
    "                'buy_nav': round(pl['buy_nav'], 2),",
    "                'sell_nav': round(pl['sell_nav'], 2),",
    "                'pro