In [1]:
import warnings
warnings.filterwarnings('ignore')
warnings.filterwarnings('ignore', category=DeprecationWarning)
warnings.filterwarnings('ignore', message='Your specific warning message')

import pandas as pd
import seaborn as sns
from sklearn.model_selection import KFold, cross_val_predict, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_curve, auc, accuracy_score, roc_auc_score
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.dummy import DummyClassifier
import matplotlib.pyplot as plt
from prettytable import PrettyTable

In [2]:
df= pd.read_csv('BMI.csv') # read the dataframe into df

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 252 entries, 0 to 251
Data columns (total 14 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   Age      252 non-null    int64  
 1   Weight   252 non-null    float64
 2   Height   252 non-null    float64
 3   Neck     252 non-null    float64
 4   Chest    252 non-null    float64
 5   Abdomen  252 non-null    float64
 6   Hip      252 non-null    float64
 7   Thigh    252 non-null    float64
 8   Knee     252 non-null    float64
 9   Ankle    252 non-null    float64
 10  Biceps   252 non-null    float64
 11  Forearm  252 non-null    float64
 12  Wrist    252 non-null    float64
 13  fatpctg  252 non-null    float64
dtypes: float64(13), int64(1)
memory usage: 27.7 KB


## Wrapper method 
### forward stepwise regression

In [4]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Separate features (X) and target variable (y)
X = df.drop('fatpctg', axis=1)
y = df['fatpctg']

# Function to perform forward stepwise regression
def forward_stepwise_selection(X, y):
    features = list(X.columns)
    selected_features = []
    best_features = []
    best_mse = float('inf')

    for _ in range(len(features)):
        mse_list = []

        for feature in features:
            current_features = selected_features + [feature]
            X_current = X[current_features]

            # Train a Linear Regression model
            lr_model = LinearRegression()
            lr_model.fit(X_current, y)
            y_pred = lr_model.predict(X_current)

            # Calculate Mean Squared Error
            mse = mean_squared_error(y, y_pred)
            mse_list.append(mse)

        # Select the feature that minimizes MSE
        best_feature_index = np.argmin(mse_list)
        best_feature = features[best_feature_index]

        # Update selected features and remove the chosen feature from the list
        selected_features.append(best_feature)
        features.remove(best_feature)

        # Update best features if the current set has a lower MSE
        if mse_list[best_feature_index] < best_mse:
            best_mse = mse_list[best_feature_index]
            best_features = selected_features.copy()

    return best_features
# Perform forward stepwise regression
forward_best_features = forward_stepwise_selection(X, y)
print("Best features from forward stepwise regression:", forward_best_features)

Best features from forward stepwise regression: ['Abdomen', 'Weight', 'Wrist', 'Forearm', 'Neck', 'Age', 'Thigh', 'Hip', 'Height', 'Biceps', 'Ankle', 'Chest', 'Knee']


### backward stepwise regression

In [5]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Function to perform backward stepwise regression
def backward_stepwise_selection(X, y):
    features = list(X.columns)
    selected_features = features.copy()
    best_features = []
    best_mse = float('inf')

    for _ in range(len(features)):
        mse_list = []

        for feature in selected_features:
            current_features = selected_features.copy()
            current_features.remove(feature)
            X_current = X[current_features]

            # Check if X_current is not empty
            if not X_current.empty:
                # Train a Linear Regression model
                lr_model = LinearRegression()
                
                # Convert X_current to a NumPy array
                X_current_np = X_current.to_numpy().reshape(-1, len(current_features))

                lr_model.fit(X_current_np, y)
                y_pred = lr_model.predict(X_current_np)

                # Calculate Mean Squared Error
                mse = mean_squared_error(y, y_pred)
                mse_list.append(mse)

            else:
                mse_list.append(float('inf'))

        # Select the feature to remove that minimizes MSE
        worst_feature_index = np.argmin(mse_list)
        worst_feature = selected_features[worst_feature_index]

        # Update selected features and remove the chosen feature
        selected_features.remove(worst_feature)

        # Update best features if the current set has a lower MSE
        if mse_list[worst_feature_index] < best_mse:
            best_mse = mse_list[worst_feature_index]
            best_features = selected_features.copy()

    return best_features

# Perform backward stepwise regression
backward_best_features = backward_stepwise_selection(X, y)
print("Best features from backward stepwise regression:", backward_best_features)


Best features from backward stepwise regression: ['Age', 'Weight', 'Height', 'Neck', 'Chest', 'Abdomen', 'Hip', 'Thigh', 'Ankle', 'Biceps', 'Forearm', 'Wrist']


## Filter method

In [6]:
# Calculate the correlation between each feature and the output variable
correlations = df.drop('fatpctg', axis=1).apply(lambda x: x.corr(df['fatpctg']))

# Sort features based on the absolute correlation values
correlations = correlations.abs().sort_values(ascending=False)

# Display the ranking of features
print("Ranking of features based on correlation:")
print(correlations)


Ranking of features based on correlation:
Abdomen    0.813432
Chest      0.702620
Hip        0.625201
Weight     0.612925
Thigh      0.559608
Knee       0.508665
Biceps     0.493271
Neck       0.490592
Forearm    0.361387
Wrist      0.346575
Age        0.291458
Ankle      0.265970
Height     0.133211
dtype: float64


## Embedded method:
### Lasso regression

In [7]:
from sklearn.linear_model import Lasso
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Separate features (X) and target variable (y)
X = df.drop('fatpctg', axis=1)
y = df['fatpctg']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train_std = scaler.fit_transform(X_train)
X_test_std = scaler.transform(X_test)

# Fit Lasso regression model
lasso_model = Lasso(alpha=0.01)  # You can adjust the regularization parameter (alpha)
lasso_model.fit(X_train_std, y_train)

# Display coefficients and corresponding features
lasso_coefficients = pd.Series(lasso_model.coef_, index=X.columns)
selected_features_lasso = lasso_coefficients[lasso_coefficients != 0].index

print("Selected features from Lasso regression:")
print(selected_features_lasso)


Selected features from Lasso regression:
Index(['Age', 'Weight', 'Height', 'Neck', 'Chest', 'Abdomen', 'Hip', 'Thigh',
       'Ankle', 'Biceps', 'Forearm', 'Wrist'],
      dtype='object')


### Random forest

In [8]:
from sklearn.ensemble import RandomForestRegressor

# Fit Random Forest model
rf_model = RandomForestRegressor(random_state=42)
rf_model.fit(X_train, y_train)

# Display feature importances
feature_importances = pd.Series(rf_model.feature_importances_, index=X.columns)
selected_features_rf = feature_importances.sort_values(ascending=False).index

print("Selected features from Random Forest:")
print(selected_features_rf)


Selected features from Random Forest:
Index(['Abdomen', 'Weight', 'Wrist', 'Height', 'Hip', 'Neck', 'Chest', 'Age',
       'Ankle', 'Biceps', 'Knee', 'Forearm', 'Thigh'],
      dtype='object')
