## Body Fat Percentage Estimator (function only)
To see the project in its entirety, look at "Evren Salih Capstone in its entirety.ipynb"

In [2]:
# It's GUI time. I will create a function that asks the user for their name, weight, neck circumference, Abdomen circumference, Thigh circumference, and wrist circumference. Once all data has been inputted, the function will return a dataframe containing their name, their measurements, and their body fat percentage.
# I'm taking it from the top so that anyone that wants to try this out doesn't have to do all the cleaning and feature engineering that I had to do. Will also put a link to the dataframe on GitHub so that it doesn't require downloading.
def body_fat_percentage():
    # Necessary modules.
    import numpy as np
    import pandas as pd

    # Reading in our dataframe.
    df = pd.read_csv('bodyfat.csv')

    # Converting to metric system.
    df['Weight'] = df['Weight']*0.45359237
    df['Height'] = df['Height']*2.54

    # Relabeling columns.
    df = df.rename(columns={'BodyFat':'BodyFat(%)','Age':'Age(years)','Weight':'Weight(kg)','Density':'Density(Db)'})
    for column in df.iloc[:,4:15]:
        df = df.rename(columns={column:column+'(cm)'})

    # Our function to clean outliers.
    def clean_outliers(df):
        df_clean = df.copy()
        for column in df:
            q1, q3 = np.percentile(df[column],[25,75])
            IQR = q3 - q1
            upper = q3 + 1.5*IQR
            lower = q1 - 1.5*IQR
            df_clean[column] = np.where((df[column] < lower) | (df[column] > upper), np.nan, df[column])
            mean = df_clean[column].mean()
            df_clean[column] = df_clean[column].fillna(mean)
        return df_clean

    # Applying cleaning function.
    df = clean_outliers(df)

    # Extracting our feature columns.
    feature_cols = list(df.columns)
    # Columns that we don't need. Rather than dropping these columns from the beginning, I chose to remove them at feature engineering stage so that if there is more training data available in the future, we can test how that will impact our model through feature engineering.
    # These need to go regardless.
    feature_cols.remove('BodyFat(%)')
    feature_cols.remove('Density(Db)')
    # These may change in the future.
    feature_cols.remove('Height(cm)')
    feature_cols.remove('Knee(cm)')
    feature_cols.remove('Chest(cm)')
    feature_cols.remove('Age(years)')
    feature_cols.remove('Forearm(cm)')
    feature_cols.remove('Ankle(cm)')
    feature_cols.remove('Biceps(cm)')
    feature_cols.remove('Hip(cm)')

    # Import modules we need for train/test split.
    import statsmodels.api as sm
    from sklearn.model_selection import train_test_split

    # X and y to be split
    X = df[feature_cols]
    X = sm.add_constant(X)
    y = df['BodyFat(%)']

    # Train/test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

    # Initialising and fitting.
    lin_reg = sm.OLS(y_train, X_train)
    results = lin_reg.fit()

    # Our inputs.
    name = input('What is your name?')
    weight = float(input('What is your weight in kg?'))
    neck = float(input('What is your neck circumference in cm?'))
    abdomen = float(input('What is your abdomen circumference in cm?'))
    thigh = float(input('What is your thigh circumference in cm?'))
    wrist = float(input('What is your wrist circumference in cm?'))

    # Making and formatting the dataframe we wish the function to return.
    obs = pd.DataFrame()
    obs['const'] = [1.0]
    obs['Weight(kg)'] = [weight]
    obs['Neck(cm)'] = [neck]
    obs['Abdomen(cm)'] = [abdomen]
    obs['Thigh(cm)'] = [thigh]
    obs['Wrist(cm)'] = [wrist]
    obs['BodyFat(%)'] = results.predict(obs)
    obs['Name'] = [name]
    obs.insert(0,'Name',obs.pop('Name'))
    obs = obs.drop(columns='const')

    # Finally returning the dataframe with our results
    return obs

In [3]:
body_fat_percentage()

Unnamed: 0,Name,Weight(kg),Neck(cm),Abdomen(cm),Thigh(cm),Wrist(cm),BodyFat(%)
0,Evren,78.1,38.0,92.4,49.0,16.5,20.829362
