# build pipeline

In [5]:
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

from sklearn.linear_model import LinearRegression
from sklearn.impute import SimpleImputer
from sklearn.impute import KNNImputer

from sklearn.preprocessing import StandardScaler

import joblib

import pandas as pd

imputer_median = SimpleImputer(strategy='median')
imputer_mean = SimpleImputer(strategy='mean')
knn_imputer = KNNImputer()

scaler = StandardScaler()

lr = LinearRegression()

ct = ColumnTransformer([
    ("imputer_median", imputer_median, ['CRIM', 'ZN', 'NOX', 'DIS', 'PTRATIO','B']),
    ("imputer_mean", imputer_mean, ['RM', 'LSTAT']),
    ("knn_imputer", knn_imputer, ['INDUS', 'CHAS','AGE', 'RAD', 'TAX'])
])

pipe = Pipeline(steps=[
    ("fill_na", ct),
    ("scale", scaler),
    ("moodel", lr)
])


df_train = pd.read_csv('./files/train.csv')

X_train = df_train.drop(columns='MEDV')
y_train = df_train['MEDV']

pipe.fit(X_train, y_train)

pipe.score(X_train, y_train)

0.6379836760633126

In [2]:
pipe

In [3]:
# save pipeline
joblib.dump(pipe, './docker/pipeline.pkl')

['./docker/pipeline.pkl']

TP - Métricas - Regresión Lineal

- R2 Train: 0.6379836760633126
- RMSE Train: 5.810058588033976

# load pipeline

In [11]:
import joblib

pipeline = joblib.load('./docker/pipeline.pkl')

In [12]:
pipeline

In [13]:
import pandas as pd

df = pd.read_csv('./files/test.csv')
df.head(1)

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV
0,0.35809,0.0,6.2,1.0,0.507,6.951,88.5,2.8617,8.0,307.0,17.4,391.7,9.71,26.7


In [14]:
X = df.drop(columns='MEDV')
y = df['MEDV']

pipeline.score(X, y)

0.5456894968938473

# docker

In [3]:
# build image
!docker build -t inference-python-test ./docker

#0 building with "desktop-linux" instance using docker driver

#1 [internal] load build definition from Dockerfile
#1 transferring dockerfile: 199B 0.0s done
#1 DONE 0.0s

#2 [internal] load metadata for docker.io/library/python:3.12-slim
#2 DONE 0.7s

#3 [internal] load .dockerignore
#3 transferring context: 2B done
#3 DONE 0.0s

#4 [internal] load build context
#4 transferring context: 943B done
#4 DONE 0.0s

#5 [1/5] FROM docker.io/library/python:3.12-slim@sha256:fd95fa221297a88e1cf49c55ec1828edd7c5a428187e67b5d1805692d11588db
#5 resolve docker.io/library/python:3.12-slim@sha256:fd95fa221297a88e1cf49c55ec1828edd7c5a428187e67b5d1805692d11588db 0.0s done
#5 DONE 0.0s

#6 [3/5] COPY requirements.txt ./
#6 CACHED

#7 [2/5] WORKDIR /app
#7 CACHED

#8 [4/5] RUN pip install --no-cache-dir -r requirements.txt
#8 CACHED

#9 [5/5] COPY . .
#9 DONE 0.1s

#10 exporting to image
#10 exporting layers 0.1s done
#10 exporting manifest sha256:9f07ad77a09f81fa8246752f36d2701827c8cd8d47f5b311ae185c3c2

In [9]:
# run container
!docker run -it --rm --name inference-python-test -v ./files:/files  inference-python-test

the input device is not a TTY.  If you are using mintty, try prefixing the command with 'winpty'


In [None]:
### en Windows es probable que sea necesario poner el path absoluto para montar el volumen
docker run -it --rm --name inference-python-test -v "C:\AA1\Unidad6_MLOps\mlops-docker-example\files:/files" inference-python-test


# check docker output

In [15]:
df_output = pd.read_csv('./files/output.csv')
df_output.head(1)

Unnamed: 0,MEDV_predicted
0,34.561835


In [16]:
from sklearn.metrics import r2_score, root_mean_squared_error

y_pred = df_output.MEDV_predicted

round(r2_score(y, y_pred),3), round(root_mean_squared_error(y, y_pred), 3)

(0.545, 5.805)

TP - Métricas - Regresión Lineal

- R2 Test: 0.545
- RMSE Test: 5.805