# Daily Water Intake Prediction (ML + ANN) â€” Husandeep Atwal

Upload `Daily_Water_Intake.csv` when prompted and run all cells.


In [None]:
from google.colab import files
uploaded = files.upload()

import os
print('Files in session:', os.listdir())

In [None]:
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, roc_curve, accuracy_score, f1_score


In [None]:
df = pd.read_csv('Daily_Water_Intake.csv')
display(df.head())
print('Shape:', df.shape)
print('\nMissing values:')
print(df.isna().sum())
print('\nTarget distribution:')
print(df['Hydration Level'].value_counts())


In [None]:
df['Hydration Level'] = df['Hydration Level'].map({'Poor': 0, 'Good': 1})
y = df['Hydration Level']
X = df.drop(columns=['Hydration Level'])

cat_cols = ['Gender', 'Physical Activity Level', 'Weather']
num_cols = [c for c in X.columns if c not in cat_cols]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

preprocess = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), num_cols),
        ('cat', OneHotEncoder(drop='first', handle_unknown='ignore'), cat_cols),
    ]
)

def fit_evaluate(model, model_name):
    pipe = Pipeline([('prep', preprocess), ('model', model)])
    start = time.time()
    pipe.fit(X_train, y_train)
    train_time = time.time() - start
    y_pred = pipe.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    wf1 = f1_score(y_test, y_pred, average='weighted')
    cm = confusion_matrix(y_test, y_pred)
    auc = None
    fpr = tpr = None
    try:
        y_proba = pipe.predict_proba(X_test)[:, 1]
        auc = roc_auc_score(y_test, y_proba)
        fpr, tpr, _ = roc_curve(y_test, y_proba)
    except Exception:
        pass

    print('\n====================', model_name, '====================')
    print(f'Training time (s): {train_time:.6f}')
    print(f'Test Accuracy:     {acc:.6f}')
    if auc is not None:
        print(f'ROC-AUC:           {auc:.6f}')
    print(f'Weighted F1:       {wf1:.6f}')
    print('Confusion Matrix:\n', cm)
    print('\nClassification Report:\n', classification_report(y_test, y_pred, digits=6))
    return {'Model': model_name, 'Test Accuracy': acc, 'ROC-AUC': auc, 'Weighted F1': wf1, 'Training Time (s)': train_time, 'roc': (fpr, tpr)}

results = []
results.append(fit_evaluate(LogisticRegression(max_iter=2000, random_state=42), 'Logistic Regression'))
results.append(fit_evaluate(RandomForestClassifier(n_estimators=200, random_state=42, n_jobs=-1), 'Random Forest'))
results.append(fit_evaluate(MLPClassifier(hidden_layer_sizes=(32,16), max_iter=100, random_state=42), 'ANN (MLPClassifier)'))

summary = pd.DataFrame([{k:v for k,v in r.items() if k!='roc'} for r in results])
display(summary)

plt.figure()
for r in results:
    fpr, tpr = r['roc']
    if fpr is not None and tpr is not None:
        plt.plot(fpr, tpr, label=r['Model'])
plt.plot([0,1],[0,1], linestyle='--')
plt.title('ROC Curves (Daily Water Intake)')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.legend()
plt.show()
