# 1. Импорт библиоткек

In [None]:
import warnings

import mlflow

import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import ElasticNet
from sklearn.impute import SimpleImputer

warnings.filterwarnings("ignore")
np.random.seed(42)

# 2. Объявление функции eval_metrics (расчет метрик модели)

In [None]:
def eval_metrics(actual, pred):
    rmse = np.sqrt(mean_squared_error(actual, pred))
    mae = mean_absolute_error(actual, pred)
    r2 = r2_score(actual, pred)
    return rmse, mae, r2

# 3. Объявление функции prepare_train (подготовка датасета)

In [None]:
def prepare_train(dataset: pd.DataFrame):
    """
    Преобразование данных
    """
    temp=dataset[['Age']]
    dataset.drop(['Age'],inplace=True,axis=1)

    my_imputer=SimpleImputer()
    imputed_temp = pd.DataFrame(my_imputer.fit_transform(temp))
    imputed_temp.columns = temp.columns

    dataset=pd.concat([dataset,imputed_temp],axis=1)

    dataset['Embarked'].fillna(dataset['Embarked'].mode(),inplace=True)
    dummy1=pd.get_dummies(dataset[['Sex','Embarked']])
    dataset.drop(['Cabin', 'Embarked'] ,axis=1,inplace=True)
    dataset=pd.concat([dataset,dummy1],axis=1)
    dataset.drop(['Name',
                'PassengerId',
                'Sex',
                'Ticket'], 
               axis=1, 
               inplace=True)
    return dataset

# 4. Объявление функции эксперимента

In [None]:
def start_experiment(dataset: pd.DataFrame, alpha=0.5, l1_ratio=0.5):
    dataset = prepare_train(dataset)
 
    # Split the data into training and test sets. (0.75, 0.25) split.
    train, test = train_test_split(dataset)
 
    train_x = train.drop(["Survived"], axis=1)
    test_x = test.drop(["Survived"], axis=1)
    train_y = train[["Survived"]]
    test_y = test[["Survived"]]
 
    # Запись данных о запусках: версия кода, время, параметры, метрики

    # MLFLOW_TRACKING_URI - переменная окружения вызова функции без параметров
    mlflow.set_tracking_uri(uri='http://127.0.0.1:5000')

    experiment_name = "itnt_titanic_1405"
    mlflow.set_experiment(experiment_name)
    experiment = mlflow.get_experiment_by_name(experiment_name)
    
    client = mlflow.tracking.MlflowClient()
    run = client.create_run(experiment.experiment_id)
    
    with mlflow.start_run(run_id = run.info.run_id, nested=True):
        lr = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42)
        lr.fit(train_x, train_y)

        predicted_qualities = lr.predict(test_x)

        (rmse, mae, r2) = eval_metrics(test_y, predicted_qualities)

        print("Elasticnet model (alpha=%f, l1_ratio=%f):" % (alpha, l1_ratio))
        print("  RMSE: %s" % rmse)
        print("  MAE: %s" % mae)
        print("  R2: %s" % r2)

        # mlflow.sklearn.autolog(log_input_examples=True)
        
        # Логируем параметры модели
        mlflow.log_param("alpha", alpha)
        mlflow.log_param("l1_ratio", l1_ratio)
    
        # Логируем метрики
        mlflow.log_metric("rmse", rmse)
        mlflow.log_metric("r2", r2)
        mlflow.log_metric("mae", mae)
    
        # Логируем саму модель в формате MLFlow
        mlflow.sklearn.log_model(lr, "model")
        
        #Для вывода модели в сервинг: 
        #Название папки должно быть уникально!
        mlflow.sklearn.save_model(lr, "../models/itnt_model_titanic")

        mlflow.end_run()

# 5. Запуск эксперимента

In [None]:
alpha = 0.5
l1_ratio = 0.5

dataset = pd.read_csv("../data/raw/titanic.csv", sep=",")
    
start_experiment(dataset, alpha, l1_ratio)

# 6. Запуск серии экспериментов

In [None]:
alpha = 0.5
l1_ratio = 0.5

for i in range(5):
    alpha_edit = alpha - (0.1 * i)
    l1_ratio_edit = l1_ratio + (0.1 * i)

    dataset = pd.read_csv("../data/raw/titanic.csv", sep=",")

    start_experiment(dataset, alpha_edit, l1_ratio_edit)

# 7. Встроенный Serving

В папке models в консоли выполняем запуск модели

In [None]:
mlflow models serve -h 0.0.0.0 -m itnt_model_titanic --env-manager local --port 5556

Выполняем запрос к модели через PowerShell

In [None]:
curl -Method POST -ContentType 'application/json' -Body '{"dataframe_records": [{"Pclass": 2, "SibSp": 0, "Parch": 0, "Fare": 10.5000, "Age": 16.000000, "Sex_female": 1, "Sex_male" : 0, "Embarked_C": 1, "Embarked_Q": 0, "Embarked_S": 0}]}' -Uri http://127.0.0.1:5556/invocations