In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.feature_selection import RFECV
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import ElasticNet
from sklearn.linear_model import Ridge

# model data

In [2]:
df = pd.read_csv('BF_Data.txt', sep='\t', decimal=',')

df = df.drop([45, 73, 93, 166])
df = df.loc[df['Pct,BF'] != 0]

df.head()

FileNotFoundError: [Errno 2] No such file or directory: 'BF_Data.txt'

# features and target

In [None]:
features = df.drop(columns="Pct,BF")
target = df['Pct,BF']

features.head()

# normalization

In [None]:
scaler = StandardScaler()

features_scaled = scaler.fit_transform(features)

target_scaled = scaler.fit_transform(target.values.reshape(-1, 1))
target_scaled

In [None]:
X_train, X_test, y_train, y_test = train_test_split(features_scaled, target_scaled, test_size=0.2, random_state=42)

model = Ridge(alpha=0.1) #LinearRegression() # 

# RFECV initialization
rfecv = RFECV(estimator=model, step=1, cv=5)  # step=1 - 1 variable at a time

# fitting RFECV 
rfecv.fit(X_train, y_train)

# idx of chosen variables
selected_feature_indices = np.where(rfecv.support_)[0]

selected_features = features_scaled[:, selected_feature_indices]
selected_features

# prediction with chosen features 

In [None]:
X_train_selected, X_test_selected, y_train_selected, y_test_selected = train_test_split(selected_features, target_scaled, test_size=0.2, random_state=42)

model_selected = LinearRegression() # = Ridge(alpha=0.1)

model_selected.fit(X_train_selected, y_train_selected)

y_pred_selected = model_selected.predict(X_test_selected)

rmse = np.sqrt(mean_squared_error(y_test_selected, y_pred_selected))
rmse

# adding BMI for abs prediction

In [None]:
df = pd.read_csv('C:/Users/48574/Desktop/WAT_SEM_5/MED/Lab1/lab1_dane_cd/MED-lab-1-cd-Dobor zmiennych-bodyfat-dane-i-opis — kopia.txt', sep='\t', decimal=',')

df['BMI'] = (df['Weight'] / (df['Height'] ** 2)) * 703 # system 'imperialny'

df = df.drop(columns=['Weight', 'Height'])

df = df.drop([45, 73, 93, 166])
df = df.loc[df['Pct,BF'] != 0]

df.corr()

In [None]:
plt.scatter(df['Abdomen'], df['BMI'])
plt.xlabel('Abdomen')
plt.ylabel('BMI')
plt.title('BMI(abdomen)')
plt.show()

# new model explanatory and response vars

In [None]:
features_bmi = df['BMI']
target_bmi = df['Abdomen']

# normalization

In [None]:
scaler_bmi = StandardScaler()

features_bmi_scaled = scaler_bmi.fit_transform(features_bmi.values.reshape(-1, 1))

target_bmi_scaled = scaler_bmi.fit_transform(target_bmi.values.reshape(-1, 1))
features_bmi_scaled

In [None]:
from sklearn.metrics import r2_score

X_train_bmi, X_test_bmi, y_train_bmi, y_test_bmi = train_test_split(features_bmi_scaled, target_bmi_scaled, test_size=0.2, random_state=42)

model_bmi = LinearRegression()

model_bmi.fit(X_train_bmi, y_train_bmi)

y_pred_bmi = model_bmi.predict(X_test_bmi)

rmse_bmi = mean_squared_error(y_test_bmi, y_pred_bmi, squared = False)

R2 = r2_score(y_test_bmi, y_pred_bmi)

R2, rmse_bmi

# regularization using ElasticNet

In [None]:
df_en = pd.read_csv('C:/Users/48574/Desktop/WAT_SEM_5/MED/Lab1/lab1_dane_cd/MED-lab-1-cd-Dobor zmiennych-bodyfat-dane-i-opis — kopia.txt', sep='\t', decimal=',')

df_en = df_en.drop([45, 73, 93, 166])
df_en = df_en.loc[df_en['Pct,BF'] != 0]
df_en

In [None]:
features_en = df_en.drop(columns="Pct,BF")
target_en = df_en['Pct,BF']

scaler_en = StandardScaler()
features_scaled_en = scaler_en.fit_transform(features_en)
target_scaled_en = scaler_en.fit_transform(target_en.values.reshape(-1, 1))

features_scaled_en

In [None]:
X_train_en, X_test_en, y_train_en, y_test_en = train_test_split(features_scaled_en, target_scaled_en, test_size=0.3, random_state=42)

model_en = ElasticNet(alpha=0.01, l1_ratio = 0.75) # alpha regularization coef, l1_ratio - L1 (LASSO) - L2 (Ridge) proportion 

model_en.fit(X_train_en, y_train_en)

y_pred_en = model_en.predict(X_test_en)

rmse_en = mean_squared_error(y_test_en, y_pred_en, squared = False)

# RMSE and parameters
rmse_en, model_en.coef_, model_en.intercept_