In [None]:
import pandas as pd
import json

df = pd.read_csv('mf_data.csv', parse_dates=['date'])

start_date = pd.Timestamp('2019-01-01')
end_date = df['date'].max()
initial_investment = 1000000
rebalance_dates = pd.date_range(start=start_date, end=end_date, freq='12MS')

portfolio = {}
rebalance_log = []
cash = initial_investment

for i, rebalance_date in enumerate(rebalance_dates):
    five_years_ago = rebalance_date - pd.DateOffset(years=1)
    rolling_returns = {}
    for fund in df['Scheme Name'].unique():
        fund_df = df[df['Scheme Name'] == fund]
        nav_now_row = fund_df[fund_df['date'] <= rebalance_date].tail(1)
        nav_past_row = fund_df[fund_df['date'] <= five_years_ago].tail(1)
        if not nav_now_row.empty and not nav_past_row.empty:
            nav_now = nav_now_row['nav'].values[0]
            nav_past = nav_past_row['nav'].values[0]
            rolling_return = (nav_now / nav_past) - 1
            rolling_returns[fund] = rolling_return

    top3 = sorted(rolling_returns, key=rolling_returns.get, reverse=True)[:3]
    top3_set = set(top3)

    navs = {}
    for fund in top3:
        nav_row = df[(df['Scheme Name'] == fund) & (df['date'] <= rebalance_date)].tail(1)
        if not nav_row.empty:
            navs[fund] = nav_row['nav'].values[0]

    portfolio_value = cash
    portfolio_value_breakdown = []
    for fund, pos in portfolio.items():
        nav_row = df[(df['Scheme Name'] == fund) & (df['date'] <= rebalance_date)].tail(1)
        if not nav_row.empty:
            nav = nav_row['nav'].values[0]
            portfolio[fund]['last_nav'] = nav
            value = pos['units'] * nav
            portfolio_value += value
            portfolio_value_breakdown.append({
                'fund': fund,
                'units': pos['units'],
                'nav': nav,
                'value': value
            })
        else:
            portfolio[fund]['last_nav'] = None
            portfolio_value_breakdown.append({
                'fund': fund,
                'units': pos['units'],
                'nav': None,
                'value': 0
            })
    if cash > 0:
        portfolio_value_breakdown.append({'fund': 'CASH', 'units': '', 'nav': '', 'value': cash})

    sold_funds = list(set(portfolio.keys()) - top3_set)
    pl_report = []
    for fund in sold_funds:
        pos = portfolio[fund]
        sell_nav = pos['last_nav']
        if sell_nav is not None:
            proceeds = pos['units'] * sell_nav
            cash += proceeds
            pl = (sell_nav - pos['buy_nav']) * pos['units']
        else:
            proceeds = 0
            pl = None
        pl_report.append({
            'fund': fund,
            'units': pos['units'],
            'buy_nav': pos['buy_nav'],
            'buy_date': str(pos['buy_date']),
            'sell_nav': sell_nav,
            'sell_date': str(rebalance_date),
            'profit_loss': pl
        })
        del portfolio[fund]

    held_funds = list(top3_set & set(portfolio.keys()))
    held_value = 0
    for fund in held_funds:
        nav = portfolio[fund]['last_nav']
        held_value += portfolio[fund]['units'] * nav

    total_to_invest = cash + held_value
    amount_per_fund = total_to_invest / 3 if len(navs) == 3 else 0

    new_portfolio = {}
    bought_funds = []
    for fund in top3:
        nav = navs[fund]
        if fund in portfolio:
            units = amount_per_fund / nav
            new_portfolio[fund] = {
                'units': units,
                'buy_nav': portfolio[fund]['buy_nav'],
                'buy_date': portfolio[fund]['buy_date'],
                'last_nav': nav
            }
        else:
            units = amount_per_fund / nav
            new_portfolio[fund] = {
                'units': units,
                'buy_nav': nav,
                'buy_date': rebalance_date,
                'last_nav': nav
            }
            bought_funds.append(fund)
    portfolio = new_portfolio
    cash = 0

    rebalance_log.append({
        'rebalance_date': str(rebalance_date.date()),
        'top3': [(fund, round(rolling_returns[fund]*100,2)) for fund in top3],
        'bought_funds': bought_funds,
        'sold_funds': sold_funds,
        'pl_report': pl_report,
        'portfolio_value': round(sum([v['units'] * v['last_nav'] for v in portfolio.values()]),2),
        'portfolio_value_breakdown': portfolio_value_breakdown
    })

final_value = sum([v['units'] * v['last_nav'] for v in portfolio.values()])
today = df['date'].max()
print(f"\nFinal portfolio value as of {today.date()}: {final_value:.2f} INR")
print(f"Total return: {((final_value/initial_investment - 1)*100):.2f}%")

# Save readable rebalance log
readable_rows = []
for log in rebalance_log:
    breakdown_str = json.dumps(log['portfolio_value_breakdown'])
    if log['pl_report']:
        for pl in log['pl_report']:
            readable_rows.append({
                'rebalance_date': log['rebalance_date'],
                'top3': ', '.join([f'{f} ({ret}%)' for f, ret in log['top3']]),
                'bought_funds': ', '.join(log['bought_funds']),
                'sold_funds': ', '.join(log['sold_funds']),
                'sold_fund': pl['fund'],
                'units': round(pl['units'], 2),
                'buy_nav': round(pl['buy_nav'], 2),
                'buy_date': pl['buy_date'],
                'sell_nav': round(pl['sell_nav'], 2) if pl['sell_nav'] is not None else '',
                'sell_date': pl['sell_date'],
                'profit_loss': round(pl['profit_loss'], 2) if pl['profit_loss'] is not None else '',
                'portfolio_value': log['portfolio_value'],
                'portfolio_value_breakdown': breakdown_str
            })
    else:
        readable_rows.append({
            'rebalance_date': log['rebalance_date'],
            'top3': ', '.join([f'{f} ({ret}%)' for f, ret in log['top3']]),
            'bought_funds': ', '.join(log['bought_funds']),
            'sold_funds': ', '.join(log['sold_funds']),
            'sold_fund': '',
            'units': '',
            'buy_nav': '',
            'buy_date': '',
            'sell_nav': '',
            'sell_date': '',
            'profit_loss': '',
            'portfolio_value': log['portfolio_value'],
            'portfolio_value_breakdown': breakdown_str
        })

pd.DataFrame(readable_rows).to_csv('rebalance_log_readable.csv', index=False)
def convert_dates(obj):
    if isinstance(obj, dict):
        return {k: convert_dates(v) for k, v in obj.items()}
    elif isinstance(obj, list):
        return [convert_dates(i) for i in obj]
    elif hasattr(obj, 'isoformat'):
        return obj.isoformat()
    else:
        return obj

rebalance_log_serializable = convert_dates(rebalance_log)
with open('rebalance_log_readable.json', 'w', encoding='utf-8') as f:
    json.dump(rebalance_log_serializable, f, indent=2, ensure_ascii=False)
print('Readable rebalance log saved to rebalance_log_readable.csv and rebalance_log_readable.json')


In [7]:
import pandas as pd
import json

df = pd.read_csv('mf_data.csv', parse_dates=['date'])

In [23]:
start_date = pd.Timestamp('2019-01-01')
end_date = df['date'].max()
initial_investment = 1000000
rebalance_dates = pd.date_range(start=start_date, end = end_date, freq='12MS')

portfolio = {}
rebalance_log = []
cash = initial_investment
no_of_funds = 3 # no of funds to be added
period = 1 # no of years for the return lookup

In [24]:
for i, rebalance_date in enumerate(rebalance_dates):
    previous_period = rebalance_date - pd.DateOffset(years=period)
    rolling_returns = {}
    for fund in df['Scheme Name'].unique():
        fund_df = df[df['Scheme Name']==fund]
        nav_now_row = fund_df[fund_df['date'] <= rebalance_date].tail(1)
        nav_past_row = fund_df[fund_df['date'] <= previous_period].tail(1)

        if not nav_now_row.empty and not nav_past_row.empty:
            nav_now = nav_now_row['nav'].values[0]
            nav_past = nav_past_row['nav'].values[0]
            rolling_return = (nav_now/nav_past) - 1
            rolling_returns[fund] = rolling_return

    top = sorted(rolling_returns, key=rolling_returns.get, reverse=True)[:no_of_funds]
    top_set = set(top)

    navs = {}
    for fund in top:
        nav_row = df[(df['Scheme Name'] == fund) & (df['date'] <= rebalance_date)].tail(1)
        if not nav_row.empty:
            navs[fund] = nav_row['nav'].values[0]

    # portfolio_value = cash
    # portfolio_value_breakdown = []
    # for fund, pos in 

In [25]:
navs

{'Motilal Oswal Flexi Cap Fund': np.float64(70.9024),
 'Invesco India Flexi Cap Fund': np.float64(19.9),
 'JM Flexicap Fund': np.float64(118.308)}

In [50]:
import pandas as pd

def backtest_mutual_funds(filename, start='2019-01-01', amount=1000000, lookback_years=3, num_funds=5):
    df = pd.read_csv(filename, parse_dates=['date'])
    df = df.sort_values(['Scheme Name', 'date'])

    portfolio = amount
    holdings = {}  # fund_name -> (qty, buy_price)
    portfolio_history = []  # to track portfolio over time
    transaction_log = []  # to track transactions
    
    rebalance_dates = pd.date_range(start, df['date'].max(), freq='AS-JAN')

    for rebalance in rebalance_dates:

        if rebalance < pd.to_datetime(start):
            continue
        total = 0
        # Sell holdings first
        for fund, (qty, buy) in holdings.items():
            last_nav = df[(df['Scheme Name'] == fund) & (df['date'] <= rebalance)].sort_values('date').iloc[-1]['nav']
            sell = qty * last_nav
            profit = sell - qty * buy

            transaction_log.append({
                'date': rebalance,
                'transaction': 'sell',
                'fund': fund,
                'qty': qty,
                'sold_at': last_nav,
                'profit': profit
            })

            total += sell
            portfolio = total
            
        holdings = {}

        # Look back to select funds
        lookback = rebalance - pd.DateOffset(years=lookback_years)

        df_lookback = df[(df['date'] >= lookback) & (df['date'] < rebalance)]

        returns = []

        for fund, group in df_lookback.groupby('Scheme Name'):
            group = group.sort_values('date')

            if len(group) < 2:
                continue

            ret = (group.iloc[-1]['nav'] / group.iloc[0]['nav']) - 1

            returns.append((fund, ret))

        top_funds = sorted(returns, key=lambda x: x[1], reverse=True)[:num_funds]

        fund_names = [f[0] for f in top_funds]

        allocation = float(portfolio) / num_funds

        for fund in fund_names:
            fund_nav = df[(df['Scheme Name'] == fund) & (df['date'] <= rebalance)].sort_values('date').iloc[-1]['nav']

            if fund_nav <= 0 or pd.isnull(fund_nav):
                continue

            qty = round(float(allocation) / float(fund_nav), 6)

            holdings[fund] = (qty, fund_nav)

            transaction_log.append({
                'date': rebalance,
                'transaction': 'buy',
                'fund': fund,
                'qty': qty,
                'bought_at': fund_nav
            })

        portfolio_history.append({
            'date': rebalance,
            'portfolio': portfolio,
            'holdings': holdings.copy(),
        })

    transactions_df = pd.DataFrame(transaction_log)
    portfolio_df = pd.DataFrame(portfolio_history)

    transactions_df.to_csv("transaction_report.csv", index=False)
    portfolio_df.to_csv("backtest_report.csv", index=False)

    return transactions_df, portfolio_df

if __name__ =="__main__" :
    transactions, portfolio = backtest_mutual_funds("mf_data.csv",
                                   start='2019-01-01', amount=1000000,
                                   lookback_years=1, num_funds=1)

    print("Backtesting complete. Transactions and portfolio reports have been saved.")


  rebalance_dates = pd.date_range(start, df['date'].max(), freq='AS-JAN')


1000000
1187387.3873862
1443243.2432418
2269526.075677392
2550378.2643173793
3354656.727414291
4312714.235762865
Backtesting complete. Transactions and portfolio reports have been saved.


In [34]:
transactions_df

NameError: name 'transactions_df' is not defined

In [53]:
import pandas as pd

def backtest_mutual_funds(filename, start='2019-01-01', amount=1000000, lookback_years=3, num_funds=5):
    df = pd.read_csv(filename, parse_dates=['date'])
    df = df.sort_values(['Scheme Name', 'date'])

    portfolio = amount
    holdings = {}  # fund_name -> {qty, buy, buy_date}
    portfolio_history = []  # to track portfolio over time
    transaction_log = []  # to track transactions

    rebalance_dates = pd.date_range(start, df['date'].max(), freq='AS-JAN')

    for rebalance in rebalance_dates:

        if rebalance < pd.to_datetime(start):
            continue

        lookback = rebalance - pd.DateOffset(years=lookback_years)

        df_lookback = df[(df['date'] >= lookback) & (df['date'] < rebalance)]

        returns = []

        for fund, group in df_lookback.groupby('Scheme Name'):
            group = group.sort_values('date')

            if len(group) < 2:
                continue

            ret = (group.iloc[-1]['nav'] / group.iloc[0]['nav']) - 1

            returns.append((fund, ret))

        top_funds = sorted(returns, key=lambda x: x[1], reverse=True)[:num_funds]

        fund_names = [f[0] for f in top_funds]

        to_sell = [f for f in holdings if f not in fund_names]

        total = portfolio

        for fund in to_sell:
            qty, buy, buy_dt = holdings[fund].values()

            last_nav = df[(df['Scheme Name'] == fund) & (df['date'] <= rebalance)].sort_values('date').iloc[-1]['nav']

            sell = qty * last_nav

            transaction_log.append({
                'transaction': 'sell',
                'fund': fund,
                'qty': qty,
                'sold_at': last_nav,
                'sold_on': rebalance,
                'bought_at': buy,
                'bought_on': buy_dt,
                'profit': sell - qty * buy
            })

            total += sell

            holdings.pop(fund)

        allocation = float(total) / len(fund_names)

        for fund in fund_names:

            fund_nav = df[(df['Scheme Name'] == fund) & (df['date'] <= rebalance)].sort_values('date').iloc[-1]['nav']

            if fund not in holdings:

                qty = round(float(allocation) / float(fund_nav), 6)

                holdings[fund] = {
                    'qty': qty,
                    'buy': fund_nav,
                    'buy_dt': rebalance
                }

                transaction_log.append({
                    'transaction': 'buy',
                    'fund': fund,
                    'qty': qty,
                    'bought_at': fund_nav,
                    'bought_on': rebalance
                })

        portfolio_history.append({
            'date': rebalance,
            'portfolio': total,
            'holdings': holdings.copy(),
        })

    transactions_df = pd.DataFrame(transaction_log)
    portfolio_df = pd.DataFrame(portfolio_history)

    portfolio_df = portfolio_df.sort_values('date')

    annual = portfolio_df.copy()
    annual['Year'] = annual['date'].dt.year
    annual = annual.groupby('Year')['portfolio'].last().reset_index()

    annual['annual_return_%'] = annual['portfolio'].pct_change() * 100

    years_passed = annual.iloc[-1]['Year'] - annual.iloc[0]['Year']

    CAGR = (annual.iloc[-1]['portfolio'] / annual.iloc[0]['portfolio']) ** (1 / years_passed) - 1

    annual.to_csv("annual_report.csv", index=False)

    transactions_df.to_csv("transaction_report.csv", index=False)
    portfolio_df.to_csv("backtest_report.csv", index=False)

    return transactions_df, portfolio_df, annual, CAGR

if __name__ =="__main__" :
    transactions, portfolio, annual, CAGR = backtest_mutual_funds("mf_data.csv",
                                   start='2019-01-01', amount=1000000,
                                   lookback_years=3, num_funds=5)

    print("Backtesting complete. Transactions, portfolio, and annual reports have been saved.")
    print(f"CAGR: {CAGR * 100:.2f}%")


  rebalance_dates = pd.date_range(start, df['date'].max(), freq='AS-JAN')


Backtesting complete. Transactions, portfolio, and annual reports have been saved.
CAGR: 20.36%
