In [58]:
import requests
import pandas as pd
import json
import numpy as np
import os
import matplotlib.pyplot as plt
import datetime

In [95]:
def read_mutual_fund_categories_amfi():
    df_mf_list = pd.read_csv('https://portal.amfiindia.com/DownloadSchemeData_Po.aspx?mf=0', header = 'infer')
    df_mf_list.columns = df_mf_list.columns.str.strip()
    df_mf_list.replace(np.nan,'', inplace = True)
    scheme_categories = df_mf_list['Scheme Category'].unique() ### use df_mf_list_1 after remove the category condition to get this
    # df_mf_list_1 = df_mf_list[(df_mf_list['Closure Date'] == '') & (df_mf_list['Scheme Category'] == 'Equity Scheme - Flexi Cap Fund') & (df_mf_list['Scheme NAV Name'].str.contains("Direct") == True) & (df_mf_list['Scheme NAV Name'].str.contains("Growth") == True)].reset_index(drop=True)
    df_mf_list_1 = df_mf_list[(df_mf_list['Scheme Category'] == 'Equity Scheme - Flexi Cap Fund') & (df_mf_list['Scheme NAV Name'].str.contains("direct", case=False)) & (df_mf_list['Scheme NAV Name'].str.contains("growth", case=False))].reset_index(drop=True)
    return df_mf_list_1

def get_mf_price_data_mfapi(code, ):
    mf_data_response = requests.get(f"https://api.mfapi.in/mf/{code}", allow_redirects = True)
    mf_data_1 = json.loads(mf_data_response.content.decode('utf-8'))
    # mf_data = pd.json_normalize(mf_data_1['data'])
    mf_data = pd.json_normalize(mf_data_1)
    mf_data_df = pd.DataFrame.from_dict(mf_data['data'][0])
    mf_data_df['date'] = pd.to_datetime(mf_data_df['date'], format = '%d-%m-%Y')
    mf_data_df['nav'] = mf_data_df['nav'].astype(float)
    mf_data_df = mf_data_df.sort_values('date')
    mf_data_df['daily_returns'] = mf_data_df['nav'].pct_change()
    mf_data_df['cumulative_returns'] = (mf_data_df['daily_returns']+1).cumprod()
    return mf_data_df

In [24]:
def read_mutual_fund_list_mfapi():
    response = requests.get("https://api.mfapi.in/mf", allow_redirects = True)
    data = json.loads(response.content.decode('utf-8'))
    df_mf_list = pd.json_normalize(data)

In [146]:
def calculate_return_by_timeframe(returns_df, df, no_of_years):
    cutoff = datetime.datetime.now() - datetime.timedelta(days=no_of_years*365)
    # filtered_schemes = df.groupby('Scheme Code')['date'].min()
    # filtered_schemes = filtered_schemes[filtered_schemes <= cutoff].index
    # filtered_df = df[df['Scheme Code'].isin(filtered_schemes)]

    col_name_total = f'{no_of_years}Y Return'
    col_name_cagr = f'{no_of_years}Y CAGR %'
    
    data = {
        'scheme_code': [],
        'scheme_name': [],
        # col_name_total: [],
        col_name_cagr: []
    }
    
    returns = []

    for scheme, group in df.groupby('Scheme Code'):
        if group['date'].min()>=cutoff:
            data['scheme_code'].append(scheme)
            data['scheme_name'].append(group.iloc[0]['Scheme Name'])
            # data[col_name_total].append(0)
            data[col_name_cagr].append(0)
        else:
            group_sorted = group.sort_values('date')
            start_val = group_sorted.loc[group_sorted['date']>= cutoff].iloc[0]['nav']
            end_val = group_sorted.iloc[-1]['nav']
            total_return = (end_val / start_val - 1) * 100
            cagr = ((1 + total_return / 100) ** (1 / no_of_years) - 1) * 100
            data['scheme_code'].append(scheme)
            data['scheme_name'].append(group.iloc[0]['Scheme Name'])
            # data[col_name_total].append(total_return)
            data[col_name_cagr].append(cagr)
            # returns.append((scheme, group_sorted.iloc[0]['Scheme Name'], total_return, cagr))

    # Now add all at once
    for col, values in data.items():
        returns_df[col] = values
    return returns_df

In [149]:
if __name__ == "__main__":
    all_funds = []
    mutual_fund_list = read_mutual_fund_categories_amfi()
    if os.path.exists('mf_data.csv'):
    # if 1 == 2:
        print('file already exist')
        final_df = pd.read_csv("mf_data.csv")
    else:
        for index, row in mutual_fund_list.iterrows():
            code = row['Code']
            mf_data_df = get_mf_price_data_mfapi(code)
            mf_data_df['Scheme Code'] = row['Code']
            mf_data_df['Scheme Name'] = row['Scheme Name']
            mf_data_df['Scheme Category'] = row['Scheme Category']
            all_funds.append(mf_data_df)
            print(f"{code} - {row['Scheme Name']}")
        final_df = pd.concat(all_funds, ignore_index=True)
        final_df = final_df[['Scheme Code', 'Scheme Name', 'Scheme Category', 'date', 'nav' , 'daily_returns', 'cumulative_returns']]
        final_df.to_csv("mf_data.csv")
    final_df['date'] = pd.to_datetime(final_df['date'])
    returns_df = pd.DataFrame()
    returns_df = calculate_return_by_timeframe(returns_df, final_df, 1)
    returns_df = calculate_return_by_timeframe(returns_df, final_df, 3)
    returns_df = calculate_return_by_timeframe(returns_df, final_df, 5)
    returns_df.sort_values('5Y CAGR %',ascending=False, inplace=True, ignore_index=True)


file already exist


In [150]:
returns_df

Unnamed: 0,scheme_code,scheme_name,1Y CAGR %,3Y CAGR %,5Y CAGR %
0,120843,Quant Flexi Cap Fund,-6.235361,24.782992,34.072398
1,118955,HDFC Flexi Cap Fund,11.502581,27.83203,31.506242
2,120492,JM Flexicap Fund,-2.692328,29.315692,29.06541
3,118535,Franklin India Flexi Cap Fund,8.169368,24.708099,28.178148
4,122639,Parag Parikh Flexi Cap Fund,12.753375,25.050547,27.636505
5,140353,Edelweiss Flexi Cap Fund,4.676111,24.809888,27.219686
6,120046,HSBC Flexi Cap Fund,6.129176,25.170153,26.145179
7,133839,PGIM India Flexi Cap Fund,7.962771,18.430912,25.190722
8,119292,Union Flexi Cap Fund,5.57956,21.02981,24.577528
9,120564,Aditya Birla Sun Life Flexi Cap Fund,9.274731,22.211754,24.491078


In [None]:
# Convert date column to datetime first
df['date'] = pd.to_datetime(df['date'])

# Sort by Scheme Code and then by date
df = df.sort_values(['Scheme Code', 'date'])

# We'll filter for funds which have at least 5yrs of history at each rebalance
def fund_5yrs_history(scheme_df, rebalance_dt):
    """Check if fund has 5yrs of history before rebalance_dt."""
    start_dt = rebalance_dt - pd.DateOffset(years=5)
    return (scheme_df[scheme_df['date'].between(start_dt, rebalance_dt)].shape[0] > 0)

# We'll create a helper to compute 5yrs return
def fund_5yrs_return(scheme_df, rebalance_dt):
    """Calculate total return over 5yrs for fund at rebalance_dt."""
    start_dt = rebalance_dt - pd.DateOffset(years=5)
    past_vals = scheme_df[(scheme_df['date'] <= rebalance_dt) & (scheme_df['date'] >= start_dt)]

    if past_vals.empty:
        return float('-inf')
    start_val = past_vals.iloc[0]['nav']
    end_val = past_vals.iloc[-1]['nav']

    return (end_val / start_val) - 1

# Define rebalance dates starting from 2018-01-01 till today
start = pd.to_datetime('2018-01-01')
end = df['date'].max()
rebalance_days = []
current = start
import datetime

while current <= end:
    rebalance_days.append(current)
    current = current + pd.DateOffset(years=1)

rebalance_days