In [42]:
import mlflow
import pandas as pd
import numpy as np
import sklearn
from sklearn.datasets import load_breast_cancer
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score


In [43]:
breast_cancer=load_breast_cancer()
print(breast_cancer.DESCR)

.. _breast_cancer_dataset:

Breast cancer wisconsin (diagnostic) dataset
--------------------------------------------

**Data Set Characteristics:**

:Number of Instances: 569

:Number of Attributes: 30 numeric, predictive attributes and the class

:Attribute Information:
    - radius (mean of distances from center to points on the perimeter)
    - texture (standard deviation of gray-scale values)
    - perimeter
    - area
    - smoothness (local variation in radius lengths)
    - compactness (perimeter^2 / area - 1.0)
    - concavity (severity of concave portions of the contour)
    - concave points (number of concave portions of the contour)
    - symmetry
    - fractal dimension ("coastline approximation" - 1)

    The mean, standard error, and "worst" or largest (mean of the three
    worst/largest values) of these features were computed for each image,
    resulting in 30 features.  For instance, field 0 is Mean Radius, field
    10 is Radius SE, field 20 is Worst Radius.

    - 

In [44]:
df=pd.DataFrame(breast_cancer.data,columns=breast_cancer.feature_names)
df['target']=breast_cancer.target
df.sample(5)

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,radius error,texture error,perimeter error,area error,smoothness error,compactness error,concavity error,concave points error,symmetry error,fractal dimension error,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,target
497,12.47,17.31,80.45,480.1,0.08928,0.0763,0.03609,0.02369,0.1526,0.06046,0.1532,0.781,1.253,11.91,0.003796,0.01371,0.01346,0.007096,0.01536,0.001541,14.06,24.34,92.82,607.3,0.1276,0.2506,0.2028,0.1053,0.3035,0.07661,1
234,9.567,15.91,60.21,279.6,0.08464,0.04087,0.01652,0.01667,0.1551,0.06403,0.2152,0.8301,1.215,12.64,0.01164,0.0104,0.01186,0.009623,0.02383,0.00354,10.51,19.16,65.74,335.9,0.1504,0.09515,0.07161,0.07222,0.2757,0.08178,1
412,9.397,21.68,59.75,268.8,0.07969,0.06053,0.03735,0.005128,0.1274,0.06724,0.1186,1.182,1.174,6.802,0.005515,0.02674,0.03735,0.005128,0.01951,0.004583,9.965,27.99,66.61,301.0,0.1086,0.1887,0.1868,0.02564,0.2376,0.09206,1
328,16.27,20.71,106.9,813.7,0.1169,0.1319,0.1478,0.08488,0.1948,0.06277,0.4375,1.232,3.27,44.41,0.006697,0.02083,0.03248,0.01392,0.01536,0.002789,19.28,30.38,129.8,1121.0,0.159,0.2947,0.3597,0.1583,0.3103,0.082,0
172,15.46,11.89,102.5,736.9,0.1257,0.1555,0.2032,0.1097,0.1966,0.07069,0.4209,0.6583,2.805,44.64,0.005393,0.02321,0.04303,0.0132,0.01792,0.004168,18.79,17.04,125.0,1102.0,0.1531,0.3583,0.583,0.1827,0.3216,0.101,0


In [45]:
X=df.drop('target',axis=1)
y=df['target']

In [46]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

In [47]:
param_grid = {
    "hidden_layer_sizes": [(50,50), (100,), (100,50), (200,100)],
    "activation": ["relu", "tanh", "logistic"],
    "solver": ["sgd", "adam"],
    "alpha": [0.0001, 0.001, 0.01, 0.05, 0.1]
}

In [48]:
### hyperparmeter tuning

def hyperparameter_tuning(X_train,y_train,params):
    mlp=MLPClassifier()
    clf=GridSearchCV(mlp,params,cv=5)
    clf.fit(X_train,y_train)
    print("Best parameters:",clf.best_params_)
    return clf


In [49]:
from mlflow.models import infer_signature

signature=infer_signature(X_train,y_train)



In [50]:
import mlflow.sklearn
import mlflow
from urllib.parse import urlparse
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Set tracking URI first
uri = "http://localhost:5000"
mlflow.set_tracking_uri(uri)

# Set experiment
mlflow.set_experiment("breast cancer")

## Starting MLflow run
with mlflow.start_run():
    ## Perform hyperparameter tuning
    clf = hyperparameter_tuning(X_train, y_train, params)

    ## Best model
    best_model = clf.best_estimator_

    ## Evaluate the model
    y_pred = best_model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    ## Log best parameters and metrics
    mlflow.log_param("hidden_layer_sizes", clf.best_params_['hidden_layer_sizes'])
    mlflow.log_param("activation", clf.best_params_['activation'])
    mlflow.log_param("solver", clf.best_params_['solver'])
    mlflow.log_param("alpha", clf.best_params_['alpha'])

    mlflow.log_metric("Accuracy", accuracy)
    mlflow.log_metric("Precision", precision)
    mlflow.log_metric("Recall", recall)
    mlflow.log_metric("F1 Score", f1)

    ## Check tracking store type
    tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme

    ## Log model
    if tracking_url_type_store != "file":
        mlflow.sklearn.log_model(best_model, "model", registered_model_name="MLP classifier")
    else:
        mlflow.sklearn.log_model(best_model, "model")

print("MLflow tracking URI:", mlflow.get_tracking_uri())


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
ABNORMAL: .

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
ABNORMAL: .

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. OF ITERATIONS REAC

Best parameters: {'activation': 'logistic', 'alpha': 0.05, 'hidden_layer_sizes': (50, 50, 50), 'solver': 'adam'}


Registered model 'MLP classifier' already exists. Creating a new version of this model...
2025/03/09 13:17:37 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: MLP classifier, version 2


🏃 View run resilient-panda-1 at: http://localhost:5000/#/experiments/845812098494296464/runs/b939f996e4de4f4eb628a6a695656d3d
🧪 View experiment at: http://localhost:5000/#/experiments/845812098494296464
MLflow tracking URI: http://localhost:5000


Created version '2' of model 'MLP classifier'.
