In [None]:
import os
import pickle
from joblib import dump
import datetime
import mlflow

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

In [None]:
# ----------------------------
# Load dataset
# ----------------------------
data_file = "data/data.pickle"
target_file = "data/target.pickle"

with open(data_file, "rb") as f:
    X = pickle.load(f)

with open(target_file, "rb") as f:
    y = pickle.load(f)

In [None]:
# ----------------------------
# Train/test split
# ----------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# ----------------------------
# Train Linear Regression model
# ----------------------------
model = LinearRegression()
model.fit(X_train, y_train)


In [None]:
# ----------------------------
# Predict and evaluate
# ----------------------------
y_pred = model.predict(X_test)
r2 = r2_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)

print(f"R2 Score: {r2:.4f}")
print(f"MSE: {mse:.4f}")


In [None]:
# ----------------------------
# Save model
# ----------------------------
os.makedirs("models", exist_ok=True)
timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
model_filename = f"models/model_{timestamp}_lr_model.joblib"
dump(model, model_filename)
print(f"Model saved as {model_filename}")

# ----------------------------
# Save metrics
# ----------------------------
os.makedirs("metrics", exist_ok=True)
metrics_filename = f"metrics/{timestamp}_metrics.json"
import json
metrics = {"R2": r2, "MSE": mse}

with open(metrics_filename, "w") as f:
    json.dump(metrics, f, indent=4)

print(f"Metrics saved as {metrics_filename}")

# ----------------------------
# Optionally, log experiment with MLflow
# ----------------------------
mlflow.set_tracking_uri("./mlruns")
experiment_name = f"Sleep_Prediction_{timestamp}"
experiment_id = mlflow.create_experiment(experiment_name)

with mlflow.start_run(experiment_id=experiment_id, run_name=f"Sleep_Prediction_{timestamp}"):
    mlflow.log_params({
        "dataset_name": "gaming_mental_health.csv",
        "n_samples": X.shape[0],
        "n_features": X.shape[1]
    })
    mlflow.log_metrics(metrics)