# Blood Glucose Level Prediction using ML

This notebook trains a machine learning model to predict blood glucose levels based on return loss measurements at different frequencies.

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns

# For saving the model
import joblib
import json

%matplotlib inline
sns.set_style("whitegrid")

## 1. Load and Explore the Data

In [None]:
# Load the CSV data
data = pd.read_csv("SUGAR UPTO 1000MG_approx.csv")
print(f"Data shape: {data.shape}")
print("\nFirst few rows:")
data.head()

In [None]:
# Check the frequency range
print(f"Frequency range: {data['Freq [GHz]'].min()} GHz to {data['Freq [GHz]'].max()} GHz")
print(f"Number of frequency points: {len(data)}")

# Check sugar concentrations
sugar_columns = [col for col in data.columns if 'MG' in col]
print(f"Sugar concentrations: {sugar_columns}")
print(f"Number of concentrations: {len(sugar_columns)}")

## 2. Data Preprocessing

In [None]:
# Prepare features (remove frequency column)
X_raw = data.drop(columns=['Freq [GHz]'])

# Transpose to have samples as rows
# Each row represents one sugar concentration measurement across all frequencies
X = X_raw.T.values
print(f"Training data shape: {X.shape}")  # Should be (21, 333)

# Create labels (0mg to 1000mg in 50mg increments)
y = np.arange(0, 1001, 50)
print(f"Labels shape: {y.shape}")
print(f"Labels: {y}")

## 3. Data Visualization

In [None]:
# Plot some sample spectra
plt.figure(figsize=(12, 6))

# Select a few concentrations to plot
concentrations_to_plot = ['0MG', '100MG', '250MG', '500MG', '1000MG']
frequencies = data['Freq [GHz]'].values

for conc in concentrations_to_plot:
    if conc in data.columns:
        plt.plot(frequencies, data[conc], label=f'{conc} Glucose', linewidth=2)

plt.xlabel('Frequency (GHz)')
plt.ylabel('Return Loss (dB)')
plt.title('Return Loss vs Frequency for Different Glucose Concentrations')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

## 4. Feature Scaling

In [None]:
# Normalize the data using StandardScaler
scaler = StandardScaler()
X_norm = scaler.fit_transform(X)

print("Before scaling:")
print(f"  Mean: {X.mean():.4f}")
print(f"  Std: {X.std():.4f}")

print("\nAfter scaling:")
print(f"  Mean: {X_norm.mean():.4f}")
print(f"  Std: {X_norm.std():.4f}")

## 5. Dimensionality Reduction with PCA

In [None]:
# Apply PCA to reduce dimensions
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_norm)

print(f"Explained variance ratio: {pca.explained_variance_ratio_}")
print(f"Total explained variance: {pca.explained_variance_ratio_.sum():.4f}")

In [None]:
# Visualize PCA results
plt.figure(figsize=(10, 8))

scatter = plt.scatter(X_pca[:, 0], X_pca[:, 1], c=y, cmap='viridis', s=100, alpha=0.7)
plt.colorbar(scatter, label='Glucose Level (mg)')

plt.xlabel(f'PC1 ({pca.explained_variance_ratio_[0]:.2%} variance)')
plt.ylabel(f'PC2 ({pca.explained_variance_ratio_[1]:.2%} variance)')
plt.title('PCA of Glucose Spectra Data')
plt.grid(True, alpha=0.3)
plt.show()

## 6. Train KNN Classifier

In [None]:
# Train K-Nearest Neighbors classifier
knn = KNeighborsClassifier(n_neighbors=1)  # 1-NN for simplicity
knn.fit(X_pca, y)

print("Model trained successfully!")

## 7. Model Evaluation

In [None]:
# Evaluate on training data (since we have limited samples)
y_pred = knn.predict(X_pca)
accuracy = accuracy_score(y, y_pred)

print(f"Training Accuracy: {accuracy:.2f}")
print("\nPredictions vs Actual:")
for actual, predicted in zip(y, y_pred):
    status = "✓" if actual == predicted else "✗"
    print(f"  Actual: {actual:4d}mg, Predicted: {predicted:4d}mg {status}")

## 8. Test with New Data

In [None]:
# Test with a specific concentration
test_index = 3  # 150MG
test_sample = X[test_index:test_index+1]  # Keep as 2D array

# Preprocess
test_norm = scaler.transform(test_sample)
test_pca = pca.transform(test_norm)

# Predict
prediction = knn.predict(test_pca)
actual = y[test_index]

print(f"Test sample: {sugar_columns[test_index]}")
print(f"Actual glucose level: {actual} mg")
print(f"Predicted glucose level: {prediction[0]} mg")
print(f"Correct: {'Yes' if actual == prediction[0] else 'No'}")

## 9. Save the Model

In [None]:
# Save models using joblib
joblib.dump(scaler, 'sugar_scaler.pkl')
joblib.dump(pca, 'sugar_pca.pkl')
joblib.dump(knn, 'sugar_knn_model.pkl')

print("Models saved successfully!")
print("Saved files:")
print(" - sugar_scaler.pkl")
print(" - sugar_pca.pkl")
print(" - sugar_knn_model.pkl")

In [None]:
# Also save parameters for JavaScript implementation
model_params = {
    'scaler_mean': scaler.mean_.tolist(),
    'scaler_scale': scaler.scale_.tolist(),
    'pca_components': pca.components_.tolist(),
    'pca_mean': pca.mean_.tolist(),
    'training_data_pca': X_pca.tolist(),
    'training_labels': y.tolist()
}

with open('sugar_model_params.json', 'w') as f:
    json.dump(model_params, f, indent=2)

print("Model parameters saved to 'sugar_model_params.json'")

## 10. How to Use the Model

In [None]:
print("""
HOW TO USE THE TRAINED MODEL:

1. LOAD YOUR TEST DATA:
   - Collect return loss measurements at the same 333 frequencies
   - Data should be a 1D array of length 333

2. PREPROCESS THE DATA:
   scaled_data = scaler.transform([your_data])
   pca_data = pca.transform(scaled_data)

3. MAKE PREDICTIONS:
   prediction = knn.predict(pca_data)
   print(f"Predicted glucose level: {prediction[0]} mg")

4. ALTERNATIVELY, LOAD THE SAVED MODEL:
   loaded_scaler = joblib.load('sugar_scaler.pkl')
   loaded_pca = joblib.load('sugar_pca.pkl')
   loaded_knn = joblib.load('sugar_knn_model.pkl')
""")