In [1]:
import mlflow
import warnings
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
from sklearn.ensemble import RandomForestClassifier
pd.set_option("display.max_colwidth", None)

In [2]:
warnings.filterwarnings("ignore")

In [3]:
mlflow.set_tracking_uri('http://mlflow_server:5000')

In [4]:
experiment_name = "experiment_0703"
mlflow.set_experiment(experiment_name)

2024/03/07 02:28:55 INFO mlflow.tracking.fluent: Experiment with name 'experiment_0703' does not exist. Creating a new experiment.


<Experiment: artifact_location='s3://mlflow/3', creation_time=1709778535744, experiment_id='3', last_update_time=1709778535744, lifecycle_stage='active', name='experiment_0703', tags={}>

In [5]:
iris = load_iris()
X, y = iris.data, iris.target

# Create a pipeline with scaler and logistic regression
pipe = make_pipeline(StandardScaler(), LogisticRegression(max_iter=1000, solver='saga', tol=0.1))

# Create a parameter grid
param_grid = {
    'logisticregression__C': [0.1, 1, 10, 100],
    'logisticregression__penalty': ['l1', 'l2']
}

# Start an MLflow run
with mlflow.start_run():
    # Log parameters
    for key, value in param_grid.items():
        mlflow.log_param(key, value)

    # Create GridSearchCV object
    grid_search = GridSearchCV(pipe, param_grid, cv=5)

    # Fit the model
    grid_search.fit(X, y)

    # Log metrics
    mlflow.log_metric("best_score", grid_search.best_score_)

    # Log artifacts (optional)
    # You can log the entire pipeline or specific components as artifacts
    # mlflow.log_artifact("pipeline", grid_search)

# Print best parameters and best score
print("Best Parameters:", grid_search.best_params_)
print("Best Score:", grid_search.best_score_)

Best Parameters: {'logisticregression__C': 10, 'logisticregression__penalty': 'l1'}
Best Score: 0.9666666666666668


In [6]:
model_uri = "s3://mlflow/1/a60ad6afeb5842dcb48a7e13951b3680/artifacts/model"

In [7]:
loaded_model = mlflow.sklearn.load_model(model_uri)

Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

In [8]:
# Dự đoán kết quả trên dữ liệu kiểm tra
test_predictions = loaded_model.predict(X_test)

In [9]:
mse = mean_squared_error(y_test, test_predictions)
print("Mean Squared Error:", mse)

Mean Squared Error: 1428.8457919914154


In [10]:
print(test_predictions)

[ 97.00699632 110.4119345   94.17331421 172.77737944 234.29266845
 186.49536149 123.09543623 174.36436919 149.64855082 115.89607371
 247.20749627 193.16471559  86.72628131 185.7945213  158.50290963
 172.28881819  86.40263616 127.52234933 203.24151615  90.77657426
 123.28985689 198.72188837  96.73419144 113.97320873 150.6225939
 144.63759964 106.50364981  90.620809   147.57741153 201.18812088
 239.45462339 149.64750734 244.33277579  91.90940522 114.85544118
 147.74624541  74.04772248 203.71872202 186.40164801  86.18860621
 221.03454586 108.9273607  264.33408548 229.93777838 131.00229002
  87.07791928 203.65578512  81.83060373 152.90609132 237.52312131
 192.5976787   86.659937    95.01135088 157.71624739 170.58348886
 195.92481051 164.30640852 122.68631944 158.45855199  94.39694853
  87.75841596  92.94701831 105.47277793 138.63009959 179.04756904
 243.88252513 148.4077205  169.67312392 203.08706777 215.64540266
 124.88985063 195.3315935  175.30013228 153.0475138   83.84184017
 156.120910