In [1]:
import mlflow
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score
import logging


logging.basicConfig(level=logging.WARN)
logger = logging.getLogger(__name__)

In [2]:
mlflow.set_tracking_uri("sqlite:///backend.db")
mlflow.set_experiment("Experimento_3")

with mlflow.start_run(run_name = "example_1"):

    X,y = load_iris(return_X_y= True)
    params = {"C": 0.1, "random_state": 42}
    mlflow.log_params(params)

    lr = LogisticRegression(**params).fit(X, y)
    y_pred = lr.predict(X)
    mlflow.log_metric("accuracy", accuracy_score(y, y_pred))
    mlflow.sklearn.log_model(lr, artifact_path="models")
    print(f"default artifacts URI: '{mlflow.get_artifact_uri()}'")

2024/04/29 12:01:38 INFO mlflow.store.db.utils: Creating initial MLflow database tables...
2024/04/29 12:01:38 INFO mlflow.store.db.utils: Updating database tables
INFO  [alembic.runtime.migration] Context impl SQLiteImpl.
INFO  [alembic.runtime.migration] Will assume non-transactional DDL.
INFO  [alembic.runtime.migration] Running upgrade  -> 451aebb31d03, add metric step
INFO  [alembic.runtime.migration] Running upgrade 451aebb31d03 -> 90e64c465722, migrate user column to tags
INFO  [alembic.runtime.migration] Running upgrade 90e64c465722 -> 181f10493468, allow nulls for metric values
INFO  [alembic.runtime.migration] Running upgrade 181f10493468 -> df50e92ffc5e, Add Experiment Tags Table
INFO  [alembic.runtime.migration] Running upgrade df50e92ffc5e -> 7ac759974ad8, Update run tags with larger limit
INFO  [alembic.runtime.migration] Running upgrade 7ac759974ad8 -> 89d4b8295536, create latest metrics table
INFO  [89d4b8295536_create_latest_metrics_table_py] Migration complete!
INFO  

default artifacts URI: 'file:///d:/Repositorio/Proyecto-titanic/Notebooks/mlruns/1/2f15b8f4519c43e68d539a0cb5786d26/artifacts'


### Porvaremos las funciones que encontramos el Mlops

In [30]:
import pandas as pd
import numpy as np
import os
import json
import mlflow

In [4]:
def read_csv(file_name:str, path = "D:/Repositorio/Proyecto-titanic/Data") -> pd.DataFrame:
    return pd.read_csv(os.path.join(path, file_name))

In [8]:
data = read_csv("titanic.csv")
data.head()

Unnamed: 0,Survived,Pclass,Name,Sex,Age,Siblings/Spouses Aboard,Parents/Children Aboard,Fare
0,0,3,Mr. Owen Harris Braund,male,22.0,1,0,7.25
1,1,1,Mrs. John Bradley (Florence Briggs Thayer) Cum...,female,38.0,1,0,71.2833
2,1,3,Miss. Laina Heikkinen,female,26.0,0,0,7.925
3,1,1,Mrs. Jacques Heath (Lily May Peel) Futrelle,female,35.0,1,0,53.1
4,0,3,Mr. William Henry Allen,male,35.0,0,0,8.05


In [23]:
def data_transform(df: pd.DataFrame):
    """This function transform the data into X and y
    Args:
      df (pd.DataFrame): dataframe with the data
    Returns:
      X (pd.Series): series with the text
      y (pd.Series): series with the labels"""
    X = df[["Pclass", "Age", "Siblings/Spouses Aboard", "Parents/Children Aboard", "Sex", "Fare"]]
    y = df["Survived"]

    return X, y

In [25]:
X, y = data_transform(data)
X.head(2)

Unnamed: 0,Pclass,Age,Siblings/Spouses Aboard,Parents/Children Aboard,Sex,Fare
0,3,22.0,1,0,male,7.25
1,1,38.0,1,0,female,71.2833


In [14]:
def one_hot_encoder(data, columns):
    data_encoder = pd.get_dummies(data, columns=columns, drop_first=True)
    return data_encoder.astype(int)

In [26]:
one_hot_encoder(X, ["Sex"])

Unnamed: 0,Pclass,Age,Siblings/Spouses Aboard,Parents/Children Aboard,Fare,Sex_male
0,3,22,1,0,7,1
1,1,38,1,0,71,0
2,3,26,0,0,7,0
3,1,35,1,0,53,0
4,3,35,0,0,8,1
...,...,...,...,...,...,...
882,2,27,0,0,13,1
883,1,19,0,0,30,0
884,3,7,1,2,23,0
885,1,26,0,0,30,1


In [31]:
def read_idx2label(json_path: str) -> pd.Series:
    """This function read the json file and return a dictionary
    Args:
      json_path (str): path to the json file
     Returns:
      idx2label (dict): dictionary with the mapping"""
    with open(json_path) as f:
        idx2label = json.load(f)
    return idx2label

In [32]:
df_join = read_idx2label("D:/Repositorio/Proyecto-titanic/Notebooks/topic_mapping_1.json")

In [34]:
df_join

{'0': 'Bank Account Services',
 '1': 'Credit Report or Prepaid Card',
 '2': 'Mortgage/Loan'}