In [15]:
import sagemaker
from sagemaker.session import TrainingInput
import numpy as np
from sagemaker.serializers import CSVSerializer
from sagemaker.pytorch import PyTorch
import pandas as pd
from sklearn.metrics import classification_report

sagemaker_session = sagemaker.Session()

bucket = 'datascience-sagemaker-fernandosousa'
prefix = 'perceptron/cobranca'

instance_type = 'ml.m5.large'

role = sagemaker.get_execution_role()

In [16]:
# preparacao
dados = pd.read_csv("s3://{}/{}".format(bucket, "Case_cobranca.csv"))
dados['CLIENTE_NOVO']   = dados['TIPO_CLIENTE'].apply(lambda x: 1 if x == 'NOVO' else 0)
dados['CLIENTE_INVESTIDOR']   = dados['TIPO_CLIENTE'].apply(lambda x: 1 if x == 'INVESTIDOR' else 0)    
dados['EMPRESTIMO_CDC']   = dados['TIPO_EMPRESTIMO'].apply(lambda x: 1 if x == 'CDC' else 0)
dados['EMPRESTIMO_PESSOAL']   = dados['TIPO_EMPRESTIMO'].apply(lambda x: 1 if x == 'PESSOAL' else 0)
dados['SEXO_M']   = dados['CD_SEXO'].apply(lambda x: 1 if x == 'M' else 0)
dados['IDADE_NORM'] = dados['IDADE'].apply(lambda x: 18 if np.isnan(x) or x < 18 else x) # mínimo
dados['IDADE_NORM'] = dados['IDADE_NORM'].apply(lambda x: 76 if x > 76 else x) # máximo
dados['IDADE_NORM'] = dados['IDADE_NORM'].apply(lambda x: (x-18)/(76-18)) # normalização entre 0 e 1
dados['QTD_DIVIDAS_NORM'] = dados['QTD_DIVIDAS'].apply(lambda x: 0. if np.isnan(x) else x/16) # normalização entre 0 e 1
dados['ALVO']   = dados['TEMP_RECUPERACAO'].apply(lambda x: 1 if x <= 90 else 0)
dados = dados.drop(['COD', 'TIPO_CLIENTE', 'TIPO_EMPRESTIMO', 'CD_SEXO', 'IDADE', 'QTD_DIVIDAS', 'TEMP_RECUPERACAO'], axis=1)


In [17]:
#salvar arquivo csv
dados.to_csv('data/train.csv')

# enviar para s3
sagemaker_session.upload_data(path='data', bucket=bucket, key_prefix=prefix)

's3://datascience-sagemaker-fernandosousa/perceptron/cobranca'

In [18]:
train_input = TrainingInput(
    "s3://{}/{}/{}".format(bucket, prefix, "train.csv"), content_type="csv"
)


In [24]:
from sagemaker.pytorch import PyTorch

estimator = PyTorch(entry_point='mlp.py',
                    base_name='credito_MLP',
                    role=role,
                    py_version='py3',
                    framework_version='1.8.0',
                    instance_count=1,
                    instance_type=instance_type,
                    hyperparameters={
                        'epochs': 10,
                    })

In [25]:
estimator.fit({"training": train_input})

2022-05-15 00:15:51 Starting - Starting the training job...
2022-05-15 00:16:15 Starting - Preparing the instances for trainingProfilerReport-1652573750: InProgress
.........
2022-05-15 00:17:45 Downloading - Downloading input data......
2022-05-15 00:18:46 Training - Downloading the training image...
2022-05-15 00:19:16 Training - Training image download completed. Training in progress..[34mbash: cannot set terminal process group (-1): Inappropriate ioctl for device[0m
[34mbash: no job control in this shell[0m
[34m2022-05-15 00:19:19,336 sagemaker-training-toolkit INFO     Imported framework sagemaker_pytorch_container.training[0m
[34m2022-05-15 00:19:19,339 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2022-05-15 00:19:19,358 sagemaker_pytorch_container.training INFO     Block until all host DNS lookups succeed.[0m
[34m2022-05-15 00:19:19,380 sagemaker_pytorch_container.training INFO     Invoking user training script.[0m
[34m20

In [26]:
predictor = estimator.deploy(initial_instance_count=1, instance_type=instance_type, endpoint_name='datascience-credito-mlp')

------!

In [50]:
predictions = np.round(predictor.predict(dados.iloc[:,:-1].to_numpy().astype('float32'))).squeeze()

In [51]:
print(classification_report(dados['ALVO'], predictions))

              precision    recall  f1-score   support

           0       0.77      0.16      0.27      4687
           1       0.56      0.96      0.70      5130

    accuracy                           0.58      9817
   macro avg       0.66      0.56      0.49      9817
weighted avg       0.66      0.58      0.50      9817

