In [28]:
import pandas as pd
# import yfinance as yf

In [29]:
def fill_missing_returns(df):
    # Step 1: Calculate average returns for 1yr, 3yr, 5yr returns, ignoring missing values
    avg_1yr_return = df['1yr Annualized Return'].mean()
    avg_3yr_return = df['3yr Annualized Return'].mean()
    avg_5yr_return = df['5yr Annualized Return'].mean()

    # Step 2: Fill missing return values with average return - tracking error
    df['1yr Annualized Return'] = df.apply(
        lambda row: avg_1yr_return - row['Tracking Error'] if pd.isna(row['1yr Annualized Return']) else row['1yr Annualized Return'], axis=1)
    df['3yr Annualized Return'] = df.apply(
        lambda row: avg_3yr_return - row['Tracking Error'] if pd.isna(row['3yr Annualized Return']) else row['3yr Annualized Return'], axis=1)
    df['5yr Annualized Return'] = df.apply(
        lambda row: avg_5yr_return - row['Tracking Error'] if pd.isna(row['5yr Annualized Return']) else row['5yr Annualized Return'], axis=1)
    return df


In [30]:
def convert_to_float(df):
    df['Expense ratio'] = df['Expense ratio'].apply(lambda x: pd.to_numeric(x, errors='coerce') if isinstance(x, str) else x)
    df['Tracking Error'] = df['Tracking Error'].apply(lambda x: pd.to_numeric(x, errors='coerce') if isinstance(x, str) else x)
    df['AUM'] = df['AUM'].apply(lambda x: pd.to_numeric(x.replace(',', ''), errors='coerce') if isinstance(x, str) else x)
    df['1yr Annualized Return'] = df['1yr Annualized Return'].apply(lambda x: pd.to_numeric(x, errors='coerce') if isinstance(x, str) else x)
    df['3yr Annualized Return'] = df['3yr Annualized Return'].apply(lambda x: pd.to_numeric(x, errors='coerce') if isinstance(x, str) else x)
    df['5yr Annualized Return'] = df['5yr Annualized Return'].apply(lambda x: pd.to_numeric(x, errors='coerce') if isinstance(x, str) else x)
    return df

def normalize_data(df):
    # Normalizing the data
    df['Expense_ratio_normalized'] = (df['Expense ratio'].max() - df['Expense ratio']) / (df['Expense ratio'].max() - df['Expense ratio'].min())
    df['Tracking_Error_normalized'] = (df['Tracking Error'].max() - df['Tracking Error']) / (df['Tracking Error'].max() - df['Tracking Error'].min())
    df['AUM_normalized'] = (df['AUM'] - df['AUM'].min()) / (df['AUM'].max() - df['AUM'].min())

    # Normalizing the returns
    df['Return_normalized'] = (
        ((df['1yr Annualized Return'] - df['1yr Annualized Return'].min()) / (df['1yr Annualized Return'].max() - df['1yr Annualized Return'].min())) +
        ((df['3yr Annualized Return'] - df['3yr Annualized Return'].min()) / (df['3yr Annualized Return'].max() - df['3yr Annualized Return'].min())) +
        ((df['5yr Annualized Return'] - df['5yr Annualized Return'].min()) / (df['5yr Annualized Return'].max() - df['5yr Annualized Return'].min()))
    ) / 3  # Average of normalized returns
    
    return df

def calculate_total_score(df):
    # Step 4: Calculate the total score using weightages
    df['Total_Score'] = (
                        df['Expense_ratio_normalized'] * 0.35 +
                        df['Tracking_Error_normalized'] * 0.25 +
                        df['AUM_normalized'] * 0.15 +
                        df['Return_normalized'] * 0.25)
    return df

def rank_funds(df):
    df['Rank'] = df['Total_Score'].rank(ascending=False)
    df = df.sort_values(by='Rank')
    return df


In [32]:
def process_fund_data(file_path):
    df = pd.read_csv(file_path)
    df = fill_missing_returns(df)
    df = convert_to_float(df)
    df = normalize_data(df)
    df = calculate_total_score(df)
    df = rank_funds(df)
    return df

In [33]:
nifty50_df = process_fund_data('../data/niftynext50.csv')

print(nifty50_df[['Fund', 'Total_Score', 'Rank']])

                                              Fund  Total_Score  Rank
5        ICICI Prudential Nifty Next 50 Index Fund     0.588158   1.0
10                    UTI Nifty Next 50 Index Fund     0.541256   2.0
9                     SBI Nifty Next 50 Index Fund     0.512102   3.0
2                     DSP Nifty Next 50 Index Fund     0.505126   4.0
6                   Kotak Nifty Next 50 Index Fund     0.391928   5.0
7           Motilal Oswal Nifty Next 50 Index Fund     0.379446   6.0
8                    Navi Nifty Next 50 Index Fund     0.371123   7.0
3                    HDFC NIFTY Next 50 Index Fund     0.362571   8.0
0   Aditya Birla Sun Life Nifty Next 50 Index Fund     0.332786   9.0
4                    HSBC Nifty Next 50 Index Fund     0.281632  10.0
1                    Axis Nifty Next 50 Index Fund     0.237754  11.0


In [34]:
nn50_df = process_fund_data('../data/niftynext50.csv')
print(nn50_df[['Fund', 'Total_Score', 'Rank']])


                                              Fund  Total_Score  Rank
5        ICICI Prudential Nifty Next 50 Index Fund     0.588158   1.0
10                    UTI Nifty Next 50 Index Fund     0.541256   2.0
9                     SBI Nifty Next 50 Index Fund     0.512102   3.0
2                     DSP Nifty Next 50 Index Fund     0.505126   4.0
6                   Kotak Nifty Next 50 Index Fund     0.391928   5.0
7           Motilal Oswal Nifty Next 50 Index Fund     0.379446   6.0
8                    Navi Nifty Next 50 Index Fund     0.371123   7.0
3                    HDFC NIFTY Next 50 Index Fund     0.362571   8.0
0   Aditya Birla Sun Life Nifty Next 50 Index Fund     0.332786   9.0
4                    HSBC Nifty Next 50 Index Fund     0.281632  10.0
1                    Axis Nifty Next 50 Index Fund     0.237754  11.0


In [37]:
midcap150_df = process_fund_data('../data/midcap150.csv')
print(midcap150_df[['Fund', 'Total_Score', 'Rank']])

                                     Fund  Total_Score  Rank
1          Motilal Oswal Nifty Midcap 150     0.770095   1.0
3           Nippon India Nifty Midcap 150     0.620828   2.0
5       ICICI Prudential Nifty Midcap 150     0.548547   3.0
0        Navi Nifty Midcap 150 Index Fund     0.512988   4.0
2                   HDFC Nifty Midcap 150     0.506178   5.0
4                    SBI Nifty Midcap 150     0.424632   6.0
6  Aditya Birla Sun Life Nifty Midcap 150     0.090465   7.0


In [40]:

smallcap250_df = process_fund_data('../data/smallcap250.csv')
print(smallcap250_df[['Fund', 'Total_Score', 'Rank']])

                                                Fund  Total_Score  Rank
6        Motilal Oswal Nifty Smallcap 250 Index Fund     0.594843   1.0
9            Edelweiss Nifty Smallcap 250 Index Fund     0.582855   2.0
4     ICICI Prudential Nifty Smallcap 250 Index Fund     0.560748   3.0
7         Nippon India Nifty Smallcap 250 Index Fund     0.552561   4.0
3                 HDFC NIFTY Smallcap 250 Index Fund     0.539139   5.0
8                  SBI Nifty Smallcap 250 Index Fund     0.520715   6.0
1                  Axis Nifty Smallcap 50 Index Fund     0.457480   7.0
5        Motilal Oswal Nifty Microcap 250 Index Fund     0.426927   8.0
2                 Kotak Nifty Smallcap 50 Index Fund     0.255728   9.0
0  Aditya Birla Sun Life Nifty Smallcap 50 Index ...     0.087673  10.0
