In [1]:
import pandas as pd
import numpy as np
import os
from google.colab import drive
import seaborn as sns
import matplotlib.pyplot as plt
drive.mount('/content/drive')

df = pd.read_csv("/content/drive/MyDrive/frailty_data.csv")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
from sklearn.preprocessing import StandardScaler
raw_data = "/content/drive/MyDrive/frailty_data.csv"
df = pd.read_csv(raw_data)
print("Initial Data:")
print(df.head())
df['Frailty'] = df['Frailty'].map({'N': 0, 'Y': 1})
numeric_columns = ['Height', 'Weight', 'Age', 'Grip strength']
scaler = StandardScaler()
df[numeric_columns] = scaler.fit_transform(df[numeric_columns])
print("Processed Data:")
print(df.head())

Initial Data:
   Height  Weight  Age  Grip strength Frailty
0    65.8     112   30             30       N
1    71.5     136   19             31       N
2    69.4     153   45             29       N
3    68.2     142   22             28       Y
4    67.8     144   29             24       Y
Processed Data:
     Height    Weight       Age  Grip strength  Frailty
0 -1.766641 -1.473912 -0.204911       0.932505        0
1  1.829735  0.303670 -1.106520       1.165631        0
2  0.504754  1.562791  1.024556       0.699379        0
3 -0.252377  0.748066 -0.860627       0.466252        1
4 -0.504754  0.896198 -0.286876      -0.466252        1


#Feature Engineering

In [6]:
df["BMI"] = (df["Weight"] / (df["Height"] ** 2)).round(2)

def age_group(age):
    if age < 30:
        return "<30"
    elif 30 <= age <= 45:
        return "30–45"
    elif 46 <= age <= 60:
        return "46–60"
    else:
        return ">60"

df["AgeGroup"] = df["Age"].apply(age_group)
print(age_group)


<function age_group at 0x7e4690936ca0>


In [8]:
mean_bmi_by_frailty = df.groupby("Frailty")["BMI"].mean()
grip_by_age_group = df.groupby("AgeGroup")["Grip strength"].mean()
print(mean_bmi_by_frailty)
print(grip_by_age_group)
print(df[["Height", "Weight", "Age", "Grip strength", "Frailty", "BMI", "AgeGroup"]])

Frailty
0   -28.013333
1     4.195000
Name: BMI, dtype: float64
AgeGroup
<30    0.0
Name: Grip strength, dtype: float64
     Height    Weight       Age  Grip strength  Frailty     BMI AgeGroup
0 -1.766641 -1.473912 -0.204911       0.932505        0   -0.47      <30
1  1.829735  0.303670 -1.106520       1.165631        0    0.09      <30
2  0.504754  1.562791  1.024556       0.699379        0    6.13      <30
3 -0.252377  0.748066 -0.860627       0.466252        1   11.74      <30
4 -0.504754  0.896198 -0.286876      -0.466252        1    3.52      <30
5  0.063094 -0.659187  1.434378       0.000000        0 -165.59      <30
6  0.757132  0.674000  1.516343      -0.932505        1    1.18      <30
7  0.946415  0.303670 -0.778662      -1.398757        1    0.34      <30
8 -0.441660 -1.473912 -1.270449      -1.631883        0   -7.56      <30
9 -1.135698 -0.881385  0.532769       1.165631        0   -0.68      <30


#One-Hot Encoding of AgeGroup

In [12]:
df["AgeGroup"] = df["Age"].apply(age_group)
df = pd.get_dummies(df, columns=["AgeGroup"])

#EDA & Reporting


In [9]:
numeric_cols = df.select_dtypes(include=['float64', 'int64', 'int8']).columns
summary_stats = df[numeric_cols].agg(['mean', 'median', 'std']).transpose()
print(numeric_cols)
print(summary_stats)

Index(['Height', 'Weight', 'Age', 'Grip strength', 'Frailty', 'BMI'], dtype='object')
                       mean    median        std
Height         3.552714e-15 -0.094641   1.054093
Weight        -4.440892e-16  0.303670   1.054093
Age            1.110223e-17 -0.245893   1.054093
Grip strength  4.440892e-17  0.233126   1.054093
Frailty        4.000000e-01  0.000000   0.516398
BMI           -1.513000e+01  0.215000  53.101608


#correlation

In [14]:
grip_frailty_corr = df["Grip strength"].corr(df["Frailty"])
print(grip_frailty_corr)

-0.4758668672668008
