In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score, KFold
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
data_path = r'C:\Users\2024\Downloads\augmented_data.ods'
data = pd.read_excel(data_path, engine='odf')

# feature and target separation
X = data[['GSR Voltage']].values  # Input variable
y = data['Blood Sugar'].values    # Output variable

# split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# standardization
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [None]:
# Linear Regression Model
linear_model = LinearRegression()
linear_model.fit(X_train_scaled, y_train)

# Predictions for Linear Regression
y_pred_train = linear_model.predict(X_train_scaled)
y_pred_test = linear_model.predict(X_test_scaled)

# Model evaluation for Linear Regression
train_mse = mean_squared_error(y_train, y_pred_train)
test_mse = mean_squared_error(y_test, y_pred_test)
train_r2 = r2_score(y_train, y_pred_train)
test_r2 = r2_score(y_test, y_pred_test)

print(f"Linear Regression Training MSE: {train_mse:.2f}")
print(f"Linear Regression Testing MSE: {test_mse:.2f}")
print(f"Linear Regression Training R2: {train_r2:.2f}")
print(f"Linear Regression Testing R2: {test_r2:.2f}")

In [None]:
# feature extraction with polynomial features
poly = PolynomialFeatures(degree=2, include_bias=False)
X_train_poly = poly.fit_transform(X_train_scaled)
X_test_poly = poly.transform(X_test_scaled)

# ridge regression with cross-validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)
ridge = Ridge(alpha=1.0)
ridge_scores = cross_val_score(ridge, X_train_poly, y_train, cv=kf, scoring='neg_mean_squared_error')

# train ridge regression model
ridge.fit(X_train_poly, y_train)
y_pred_ridge = ridge.predict(X_test_poly)

In [None]:
# metrics for ridge regression
ridge_test_mse = mean_squared_error(y_test, y_pred_ridge)
ridge_test_r2 = r2_score(y_test, y_pred_ridge)

print(f"Ridge Regression Testing MSE: {ridge_test_mse:.2f}")
print(f"Ridge Regression Testing R2: {ridge_test_r2:.2f}")
print(f"Ridge Cross-Validation MSE: {-ridge_scores.mean():.2f}")

# residual analysis
residuals = y_test - y_pred_ridge

In [None]:

plt.figure(figsize=(8, 5))
sns.histplot(residuals, kde=True, bins=20, color='green')
plt.title('Residual Distribution')
plt.xlabel('Residuals')
plt.ylabel('Frequency')
plt.show()

# scatter plot of Residuals
plt.scatter(y_pred_ridge, residuals, alpha=0.7)
plt.axhline(0, color='red', linestyle='--')
plt.title('Residuals vs Predicted Values')
plt.xlabel('Predicted Blood Sugar')
plt.ylabel('Residuals')
plt.show()

# visualization of predicted vs actual
plt.figure(figsize=(8, 5))
plt.scatter(X_test, y_test, color='blue', label='Actual Values')
plt.scatter(X_test, y_pred_ridge, color='red', alpha=0.5, label='Predicted Values')
plt.title('Blood Sugar vs GSR Voltage (Ridge Regression)')
plt.xlabel('GSR Voltage')
plt.ylabel('Blood Sugar')
plt.legend()
plt.show()