In [None]:
# Databricks notebook: 02_model_training

import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import mlflow
import mlflow.sklearn

# Load data
df = pd.read_csv("/dbfs/tmp/hcp_data.csv")
X = df.drop("target", axis=1)
y = df["target"]

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

# Train model
model = LogisticRegression()
model.fit(X_train, y_train)
acc = accuracy_score(y_test, model.predict(X_test))

# Log to MLflow
mlflow.set_tracking_uri("http://localhost:5000")  # Or your actual URI
mlflow.set_experiment("hcp-propensity-experiment")

with mlflow.start_run():
    mlflow.log_param("model", "LogisticRegression")
    mlflow.log_metric("accuracy", acc)
    mlflow.sklearn.log_model(model, "hcp-propensity-model")

print(f"Model trained and logged with accuracy: {acc}")