In [1]:
  !pip install mlflow flask 



In [1]:
import mlflow
import mlflow.sklearn
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import GridSearchCV
from mlflow.models.signature import infer_signature


In [2]:
heart = 'heart_cleaned.csv'
print(heart)

heart_cleaned.csv


In [3]:
heart = pd.read_csv(heart)
heart.head(1)

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,0.479167,1,0,0.407895,0.367521,0,1,0.701754,0,0.227273,2,2,3,0


<h1>Prepare Data:</h1>

In [4]:
# Converting to float to skip integer warning
heart['sex'] = heart['sex'].astype(float)
heart['cp'] = heart['cp'].astype(float)
heart['fbs'] = heart['fbs'].astype(float)
heart['restecg'] = heart['restecg'].astype(float)
heart['exang'] = heart['exang'].astype(float)
heart['slope'] = heart['slope'].astype(float)
heart['ca'] = heart['ca'].astype(float)
heart['thal'] = heart['thal'].astype(float)


# Define features and target
X = heart.drop('target', axis=1)  # Features
y = heart['target']                # Target variable

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

<h1>MLflow Start:</h1>

In [6]:
# Set the MLflow tracking URI (you can specify your desired path or server here)
#mlflow.set_tracking_uri(r"mlruns")  # Change to your desired location

# Create an input example
input_example = pd.DataFrame({
    'age': [60],
    'sex': [1],  # assuming binary encoding: 1 for male, 0 for female
    'cp': [2],   # example chest pain type
    'trestbps': [145],
    'chol': [230],
    'fbs': [0],  # fasting blood sugar
    'restecg': [0],  # resting ECG results
    'thalach': [150],
    'exang': [0],  # exercise induced angina
    'oldpeak': [2.3],
    'slope': [2],  # slope of ST segment
    'ca': [0],  # number of major vessels
    'thal': [1]  # thalassemia
})

# Assume X_train, y_train, X_test, and y_test are defined and processed
# Train the model (example with Random Forest)
model = RandomForestClassifier(n_estimators=100)
model.fit(X_train, y_train)

# Infer the signature from the input example
signature = infer_signature(X_train, model.predict(X_train))

# Start MLflow tracking
run = mlflow.start_run()

# Log model parameters
mlflow.log_param("model_type", "RandomForest")
mlflow.log_param("n_estimators", 100)

# Log metrics
accuracy = accuracy_score(y_test, model.predict(X_test))
mlflow.log_metric("accuracy", accuracy)

# Log the model to a specific location
mlflow.sklearn.log_model(
   sk_model=model,  # explicitly specify the model as sk_model
    artifact_path="models/heart_disease_model",  # Specify your artifact path here
    signature=signature,
    input_example=input_example
)

model_uri = f"runs:/{run.info.run_id}/models/heart_disease_model"  # Use the run ID to construct the model URI
mlflow.register_model(model_uri, "heart_disease_model")
# End MLflow run
mlflow.end_run()
print(model_uri)


Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

runs:/1cf1c52d26d945d184954c5a6b8923b0/models/heart_disease_model


Successfully registered model 'heart_disease_model'.
Created version '1' of model 'heart_disease_model'.
