In [5]:
import itertools
import pandas as pd
import datetime

def backtest_mutual_funds(filename, start='2019-01-01', amount=1000000,
                           lookback_years=1, num_funds=5):

    df = pd.read_csv(filename, parse_dates=['date'])
    df = df.copy()

    transactions = []
    portfolio = {}
    annual = []

    start_dt = pd.to_datetime(start)

    for rebalance_dt in pd.date_range(start_dt, df.date.max(), freq='AS-JAN'):

        past_dt = rebalance_dt - pd.DateOffset(years=lookback_years)

        past_data = df[(df.date >= past_dt) & (df.date < rebalance_dt)]

        fund_returns = past_data.groupby('Scheme Code')['nav'].apply(lambda x: (x.iloc[-1] / x.iloc[0] - 1) * 100)

        top_funds = fund_returns.sort_values(ascending=False).head(num_funds).index

        total = amount if not portfolio else sum([qty * df[(df.date == rebalance_dt) & (df['Scheme Code'] == fund)]['nav'].values[0] for fund, qty in portfolio.items()])

        portfolio = {}

        for fund in top_funds:
            price = df[(df.date == rebalance_dt) & (df['Scheme Code'] == fund)]['nav'].values[0]
            qty = (total / num_funds) / price

            portfolio[fund] = qty

            transactions.append({
                'date': rebalance_dt,
                'type': 'buy',
                'fund': fund,
                'qty': qty,
                'price': price
            })

        annual.append({
            'date': rebalance_dt,
            'portfolio': total
        })

    CAGR = (annual[-1]['portfolio'] / annual[0]['portfolio']) ** (1 / (len(annual) / 1)) - 1

    return transactions, portfolio, annual, CAGR

def run_multiple_backtests(filename, start='2019-01-01', amount=1000000,
                           lookback_years_vals=[1, 2, 3, 4, 5], num_funds_vals=[1, 2, 3, 4, 5] ):

    results = []

    for lookback, num_funds in itertools.product(lookback_years_vals, num_funds_vals):

        transactions, portfolio, annual, CAGR = backtest_mutual_funds(filename,
                                      start=start,
                                      amount=amount,
                                      lookback_years=lookback,
                                      num_funds=num_funds)

        total_return = 100 * (annual[-1]['portfolio'] / annual[0]['portfolio'] - 1)

        results.append({
            'lookback_years': lookback,
            'num_funds': num_funds,
            'CAGR (%)': CAGR * 100,
            'total_return (%)': total_return
        })

    results_df = pd.DataFrame(results)
    results_df.to_csv("strategy_report.csv", index=False)

    return results_df

if __name__ =="__main__" :

    strategies = run_multiple_backtests("mf_data.csv",
                                   start='2019-01-01', amount=1000000,
                                   lookback_years_vals=[0.5, 1, 2, 3, 4, 5], num_funds_vals=[1, 2, 3, 4, 5])

    print("All backtests complete. Summary of strategies has been saved to strategy_report.csv")


  for rebalance_dt in pd.date_range(start_dt, df.date.max(), freq='AS-JAN'):


ValueError: Non-integer years and months are ambiguous and not currently supported.