In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
import numpy as np

# Load the dataset
file_path = '/content/drive/MyDrive/MtechMit/DsaLab/miniProject/comprehensive_mutual_funds_data.csv'
df = pd.read_csv(file_path)

# Replace '-' with NaN in the dataset
df.replace('-', pd.NA, inplace=True)

# Separate numerical and categorical columns
num_cols = df.select_dtypes(include=['float64', 'int64']).columns
cat_cols = df.select_dtypes(include=['object']).columns

# Impute numerical columns with mean
df[num_cols] = df[num_cols].fillna(df[num_cols].mean())

# Impute categorical columns with mode
df[cat_cols] = df[cat_cols].fillna(df[cat_cols].mode().iloc[0])


# Columns to impute
columns_to_impute = ['returns_3yr', 'returns_5yr']  # Update this list based on the actual column names

# Impute missing values with the median
for column in columns_to_impute:
    if column in df.columns:  # Check if the column exists
        df[column].fillna(df[column].median(), inplace=True)

# Save the updated dataframe to a new CSV file
updated_file_path = '/content/drive/MyDrive/MtechMit/DsaLab/miniProject/mutual_fund_updated_file.csv'
df.to_csv(updated_file_path, index=False)

print(f"Updated file saved at: {updated_file_path}")


Updated file saved at: /content/drive/MyDrive/MtechMit/DsaLab/miniProject/mutual_fund_updated_file.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[column].fillna(df[column].median(), inplace=True)


In [None]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

# Load the dataset
file_path = '/content/drive/MyDrive/MtechMit/DsaLab/miniProject/mutual_fund_imputed_file.csv'
df = pd.read_csv(file_path)

# Define the columns to use for scoring
columns_to_score = ['expense_ratio', 'sd', 'sharpe', 'beta']

# Normalize the columns
scaler = MinMaxScaler()

# Apply min-max normalization to minimize columns ('Expense Ratio', 'Standard Deviation', 'Beta')
df[['expense_ratio', 'sd', 'beta']] = scaler.fit_transform(df[['expense_ratio', 'sd', 'beta']])

# Inverse normalization for Sharpe Ratio (since higher Sharpe is better)
df['sharpe'] = scaler.fit_transform(-df[['sharpe']])  # Inverse Sharpe for minimization

# Define the weights for each criterion
weights = {'expense_ratio': 0.25, 'sd': 0.25, 'sharpe': 0.3, 'beta': 0.2}

# Calculate the composite score and store it in a new column 'composite_score'
df['composite_score'] = (
    df['expense_ratio'] * weights['expense_ratio'] +
    df['sd'] * weights['sd'] +
    df['sharpe'] * weights['sharpe'] +
    df['beta'] * weights['beta']
)

# Sort the funds by composite score (ascending if lower score is better)
df_sorted = df.sort_values(by='composite_score', ascending=True)

# Save the updated dataframe to a new CSV file
updated_file_path = '/content/drive/MyDrive/MtechMit/DsaLab/miniProject/mutual_fund_composite_scores.csv'
df_sorted.to_csv(updated_file_path, index=False)

print(f"Updated CSV file with composite scores saved at: {updated_file_path}")


Updated CSV file with composite scores saved at: /content/drive/MyDrive/MtechMit/DsaLab/miniProject/mutual_fund_composite_scores.csv
