## Obtener, impiar y transformarlos los datos

In [3]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer

# CARGAR LOS DATOS
def load_housing_data():
    csv_path = os.path.join("datasets", "housing", "housing.csv")
    return pd.read_csv(csv_path)

housing = load_housing_data()

# APLICAR DIVISION ESTRATIFICADA
housing["income_cat"] = pd.cut(
    housing["median_income"],
    bins=[0., 1.5, 3.0, 4.5, 6., np.inf],
    labels=[1, 2, 3, 4, 5])

split = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
for train_index, test_index in split.split(housing, housing["income_cat"]):
    strat_train_set = housing.loc[train_index]
    strat_test_set = housing.loc[test_index]

for set_ in (strat_train_set, strat_test_set):
    set_.drop("income_cat", axis=1, inplace=True)

housing = strat_train_set.drop("median_house_value", axis=1)


# APLICAR TRANSFORMADORES
rooms_ix, bedrooms_ix, population_ix, households_ix = 3, 4, 5, 6

class CombinedAttributesAdder(BaseEstimator, TransformerMixin):
    def __init__(self, add_bedrooms_per_room = True): # no *args or **kargs
        self.add_bedrooms_per_room = add_bedrooms_per_room
    def fit(self, x, y=None):
        return self # nothing else to do
    def transform(self, x):
        rooms_per_household = x[:, rooms_ix] / x[:, households_ix]
        population_per_household = x[:, population_ix] / x[:, households_ix]
        if self.add_bedrooms_per_room:
            bedrooms_per_room = x[:, bedrooms_ix] / x[:, rooms_ix]
            return np.c_[x, rooms_per_household, population_per_household, bedrooms_per_room]
        else:
            return np.c_[x, rooms_per_household, population_per_household]
        
housing_num = housing.drop("ocean_proximity", axis=1)
housing_labels = strat_train_set["median_house_value"].copy()

num_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy="most_frequent")),
    ('attribs_adder', CombinedAttributesAdder()),
    ('std_scaler', StandardScaler()),
])

num_attribs = list(housing_num)
cat_attribs = ["ocean_proximity"]

full_pipeline = ColumnTransformer([
    ("num", num_pipeline, num_attribs),
    ("cat", OneHotEncoder(), cat_attribs),
])
housing_prepared = full_pipeline.fit_transform(housing)

## Definición del manipulador de Azure

In [4]:
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential

# authenticate
credential = DefaultAzureCredential()

# Get a handle to the workspace
ml_client = MLClient(
    credential=credential,
    subscription_id="182f5247-df87-4239-81cb-65bef9f46fd1",
    resource_group_name="yoniervasquezmarin-rg",
    workspace_name="prueba-maching-learning",
)

## Entrenar el modelo usando `LinealRegressor`

In [5]:
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import cross_val_score
from sklearn.metrics import mean_squared_error
from azure.ai.ml import command
from azure.ai.ml import Input

registered_model_name = "housing_prices_model"

job_train_lineal_regressor = command(
    inputs=dict(
        data=housing_prepared,
        registered_model_name=registered_model_name,
    ),
    code="./models/",  # location of source code
    command="python lineal-regression.py --data ${{inputs.data}} --registered_model_name ${{inputs.registered_model_name}}",
    # environment="aml-scikit-learn@latest",
    display_name="housing_prices_prediction",
)

ValidationException: Unsupported input type: <class 'numpy.ndarray'>, only Input, dict, str, bool, int and float are supported.

In [None]:
ml_client.create_or_update(job_train_lineal_regressor)