# Despliege de modelos.

In [None]:
import numpy as np
import pandas as pd
import os

import sagemaker

role = sagemaker.get_execution_role()
sess = sagemaker.Session()
region = sess.boto_region_name
bucket = sess.default_bucket()
prefix = 'module_5/part_1'

### Preparación de los datos

- Usaremos la base de datos de bajas de clientes usada anteriormente.

In [None]:
source_file = 's3://sagemaker-sample-files/datasets/tabular/synthetic/churn.txt'
local_prefix = 'data/churn_data'
os.makedirs(local_prefix, exist_ok=True)
sagemaker.s3.S3Downloader.download(source_file, local_prefix)

In [None]:
df = pd.read_csv(f'./{local_prefix}/churn.txt')
df['CustomerID'] = df.index
df

In [None]:
df.columns

In [None]:
df[["Int'l Plan", "VMail Plan"]] = df[["Int'l Plan", "VMail Plan"]].replace(to_replace=['yes', 'no'], value=[1, 0])

In [None]:
df['Churn?'] = df['Churn?'].replace(to_replace=['True.', 'False.'], value=[1, 0])

In [None]:
columns=[
    'Churn?', 'Account Length', "Int'l Plan",
    'VMail Plan', 'VMail Message', 'Day Mins', 'Day Calls', 'Day Charge',
    'Eve Mins', 'Eve Calls', 'Eve Charge', 'Night Mins', 'Night Calls',
    'Night Charge', 'Intl Mins', 'Intl Calls', 'Intl Charge',
    'CustServ Calls'
]
df.index = df['CustomerID']
df_processed = df[columns]

In [None]:
df_processed.head()

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
df_train, df_test = train_test_split(
    df_processed, 
    test_size=0.1,
    random_state=42, 
    shuffle=True,
)

In [None]:
df_train

In [None]:
df_train.to_csv('train.csv', index=False, header=False)
df_test.to_csv('test.csv', index=False, header=False)

In [None]:
sess.upload_data(path='train.csv', bucket=bucket, key_prefix=f'{prefix}/data')

In [None]:
sess.upload_data(path='test.csv', bucket=bucket, key_prefix=f'{prefix}/data')

### Entrenamiento del modelo de clasificación

In [None]:
image = sagemaker.image_uris.retrieve(region=region, framework='xgboost', version='1.3-1')
image

In [None]:
train_data_s3 = f's3://{bucket}/{prefix}/data/train.csv'
val_data_s3 = f's3://{bucket}/{prefix}/data/test.csv'

train_input = sagemaker.inputs.TrainingInput(s3_data=train_data_s3, 
                                             content_type='csv')
val_input = sagemaker.inputs.TrainingInput(s3_data=val_data_s3, 
                                           content_type='csv')

data_channels = {
    'train': train_input,
    'validation': val_input,
}


In [None]:
s3_output = f's3://{bucket}/{prefix}/{local_prefix}/training'

In [None]:
estimator = sagemaker.estimator.Estimator(
    image,
    role,
    instance_count=1,
    instance_type='ml.m5.xlarge',
    output_path=s3_output,
    enable_sagemaker_metrics=True,
    sagemaker_session=sess
)

estimator.set_hyperparameters(objective='binary:logistic', num_round=20)

In [None]:
jobname = 'xgboost-model-deploy-2'
estimator.fit(
    inputs=data_channels,
    job_name=jobname, 
)

## Inferencia
- Después de crear y entrenar los modelos, se pueden obtener predicciones de una de las dos formas siguientes:
    - Obtener predicciones para un conjunto de datos completo, con sagemaker Batch Trasform.
    - Configurar un punto de enlace persistente para obtener predicciones de sus modelos, serverless o con máquina asociada.

### Batch Transform


<center>
<img src="./imgs/batch trasform.png"  alt="drawing" width="800"/>
</center>

- Necesitamos generar un csv solo con los datos de test sin la etiqueta.


In [None]:
df_test.iloc[:, 1:].to_csv('test_no_label.csv', index=False, header=False)

In [None]:
test_data_s3 = sess.upload_data(path='test_no_label.csv', bucket=bucket, key_prefix=f'{prefix}/data')
test_data_s3

In [None]:
jobname = f'churn-tf-bt'

s3_output_location = f's3://{bucket}/{prefix}/{jobname}'

transformer = estimator.transformer(instance_count=1, 
                                    instance_type='ml.c5.xlarge',
                                    max_payload = 2, # MB
                                    accept = 'application/jsonlines',
                                    output_path = s3_output_location,
                                    assemble_with = 'Line')

transformer.transform(test_data_s3, 
                      content_type='text/csv', 
                      split_type = 'Line', 
                      job_name = jobname)

In [None]:
output = transformer.output_path
output

In [None]:
output_prefix = 'data/test_output'
os.makedirs(output_prefix)

In [None]:
sess.download_data(path=output_prefix, bucket=bucket, key_prefix=f"{prefix}/{jobname}")

In [None]:
y_pred_prob = pd.read_csv('data/test_output/test_no_label.csv.out', header=None)
y_pred_prob

- Obtengamos algunas métricas del conjunto de test.

In [None]:
y_pred_prob = y_pred_prob.iloc[:, 0].to_numpy()

In [None]:
y_test = df_test.loc[:, 'Churn?'].to_numpy()

In [None]:
y_pred  = np.where(y_pred_prob > 0.5, 1, 0)

In [None]:
print(y_test.shape, y_pred.shape, y_pred_prob.shape)

In [None]:
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.metrics import roc_curve, auc
import matplotlib.pyplot as plt

In [None]:
print(classification_report(y_test,y_pred))

In [None]:
test_fpr, test_tpr, te_thresholds = roc_curve(y_test, y_pred_prob)
auc_value = auc(test_fpr, test_tpr)

plt.grid()
plt.plot(test_fpr, test_tpr, label=f" AUC TEST = {auc_value}")
plt.plot([0,1],[0,1],'g--')
plt.legend()
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("AUC(ROC curve)")
plt.grid(color='black', linestyle='-', linewidth=0.5)
plt.show()


### Despliege de endpoints en tiempo real

<center>
<img src="./imgs/endpoint_real.png"  alt="drawing" width="800"/>
</center>

### Despliege de un endpoint con máquina asociada

In [None]:
predictor = estimator.deploy(
    endpoint_name = 'churn-xgboost',              
    initial_instance_count=1, 
    instance_type='ml.c5.xlarge',
    serializer=sagemaker.serializers.CSVSerializer()
)

In [None]:
data_test = df_test.iloc[-5:, 1:].values
data_test

In [None]:
prediction = predictor.predict(data_test)
print(prediction)

In [None]:
from io import StringIO

df=pd.read_csv(StringIO(str(prediction,'utf-8')) , header=None)
df

In [None]:
df_test.iloc[-5:, 0]

In [None]:
predictor.delete_endpoint()

### Despliege de un endpoint serverless
- https://docs.aws.amazon.com/sagemaker/latest/dg/serverless-endpoints-create.html

In [None]:
from sagemaker.serverless import ServerlessInferenceConfig

In [None]:
serverless_config = ServerlessInferenceConfig(
    memory_size_in_mb=4096,
    max_concurrency=3,
)

In [None]:
predictor_serverless = estimator.deploy(
    endpoint_name='churn-xgboost-severless',
    serverless_inference_config=serverless_config
)

In [None]:
prediction = predictor.predict(data_test)
print(prediction)

In [None]:
predictor.delete_endpoint()