# IMPLEMENTEZ UN MODELE DE SCORING

# Prédictions sur les nouveaux clients et vérification de l'API

### Objectifs de ce notebook
- **Réalisation des prédictions pour les nouveaux clients:**
    - **Prédictions devant être réalisées sur des données standardisées avec le MinMaxScaler (obligatoire car apprentissage du modèle avec les données standardisées de la sorte)**
    - **Analyse des pourcentages/nombres des clients prédits à risque ou non**
- **Notebook permettant de vérifier le bon fonctionnement de l'API réalisée avec Flask**

In [1]:
# Import des librairies standards
import pandas as pd
import numpy as np
import pickle
import os

# Import des librairies pour le ML
from sklearn.preprocessing import MinMaxScaler
from lightgbm import LGBMClassifier

# Import pour les requêtes 
import json
import requests

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

## 1. Prédictions sur les nouveaux clients

In [2]:
# Ouverture du jeu d'entraînement contenant les 30 premières variables et l'identifiant client (dataframe issu de 'application_test.csv')
file = pd.read_csv("testset_rfe_30f.csv", sep=',')
pd.set_option("Display.max_rows", None)
pd.set_option("Display.max_columns", None)
file.head()

Unnamed: 0,SK_ID_CURR,AMT_INCOME_TOTAL,AMT_CREDIT,AMT_ANNUITY,REGION_POPULATION_RELATIVE,EXT_SOURCE_1,EXT_SOURCE_2,EXT_SOURCE_3,AGE,YEARS_LAST_PHONE_CHANGE,YEARS_EMPLOYED,ANNUITY_INCOME_PERC,ANNUITY_RATE_PERC,CREDIT_INCOME_PERC,prev_AMT_ANNUITY_mean,prev_AMT_CREDIT_mean,prev_AMT_CREDIT_sum,prev_AMT_DOWN_PAYMENT_mean,prev_DAYS_DECISION_mean,prev_CNT_PAYMENT_mean,prev_AMT_PAYMENT_mean,prev_AMT_INSTALMENT_mean,prev_SK_DPD_count,home_DAYS_CREDIT_min,home_DAYS_CREDIT_ENDDATE_mean,home_AMT_CREDIT_SUM_sum,home_AMT_CREDIT_SUM_mean,home_AMT_CREDIT_SUM_DEBT_sum,prev_type_loans,prev_cash_loans_perc,total_accepted_loans
0,100001,135000.0,568800.0,20560.5,0.01885,0.752614,0.789654,0.15952,53.0,5.0,6.4,15.23,3.614715,421.333333,3951.0,23787.0,23787.0,2520.0,1740.0,8.0,5885.132143,5885.132143,9.0,49.0,441.571429,1453365.0,207623.571429,596686.5,1.0,0.0,8.0
1,100005,99000.0,222768.0,17370.0,0.035792,0.56499,0.291656,0.432962,49.0,-0.0,12.2,17.545455,7.79735,225.018182,2406.6,20076.75,40153.5,2232.0,536.0,5.0,6240.205,6240.205,11.0,62.0,482.0,657126.0,219042.0,568408.5,2.0,50.0,4.0
2,100013,202500.0,663264.0,69777.0,0.019101,-2.0,0.699787,0.610991,55.0,2.0,12.2,34.457778,10.520245,327.537778,8608.64625,146134.125,584536.5,1687.0,837.5,12.5,9740.235774,10897.898516,36.0,1210.0,0.0,2072280.06,518070.015,-2.0,4.0,50.0,7.0
3,100028,315000.0,1575000.0,49018.5,0.026392,0.525734,0.509677,0.612704,38.0,5.0,5.1,15.561429,3.112286,500.0,4854.951,83057.4,415287.0,2249.6,1124.2,6.0,4356.731549,4979.282257,31.0,269.0,2716.0,1520875.08,126739.59,186300.5,5.0,20.0,15.0
4,100038,180000.0,625500.0,32067.0,0.010032,0.202145,0.425687,-2.0,36.0,2.0,6.0,17.815,5.126619,347.5,5550.3,46302.75,92605.5,4047.75,466.0,24.0,11100.3375,11100.3375,13.0,0.0,0.0,0.0,0.0,0.0,2.0,50.0,1.0


In [3]:
# Copie du dataframe
new_clients = file.copy() 

In [4]:
# Vérification de la taille de dataframe (attendu 48744 clients et 31 variables)
print(f'Le jeu de données comprend bien {new_clients.shape[0]} et {new_clients.shape[1]} variables.')

Le jeu de données comprend bien 48744 et 31 variables.


### 1.1. Standardisation des données avec le MinMaxScaler à l'exception de l'identifiant client

In [5]:
# Copie du DataFrame original par sécurité
new_clients_scaled = new_clients.copy()

# Chargement du scaler avec pickle
with open('minmax_scaler.pkl', 'rb') as file:
    loaded_minmax = pickle.load(file)

# Liste des colonnes à mettre à l'échelle sans l'identifiant client
new_clients_scaled = new_clients_scaled.drop(['SK_ID_CURR'], axis=1)

# Normalisation avec le MinMax Scaler du jeu d'entraînement
new_clients_scaled_values = loaded_minmax.transform(new_clients_scaled)

# Création d'un nouveau Dataframe avec les valeurs transformées
new_clients_scaled = pd.DataFrame(new_clients_scaled_values, columns=new_clients_scaled.columns)

# Ajout de la colonne SK_ID_CURR
new_clients_scaled["SK_ID_CURR"] = new_clients["SK_ID_CURR"]

In [6]:
# Affichage du dataframe après standardisation
new_clients_scaled.head()

Unnamed: 0,AMT_INCOME_TOTAL,AMT_CREDIT,AMT_ANNUITY,REGION_POPULATION_RELATIVE,EXT_SOURCE_1,EXT_SOURCE_2,EXT_SOURCE_3,AGE,YEARS_LAST_PHONE_CHANGE,YEARS_EMPLOYED,ANNUITY_INCOME_PERC,ANNUITY_RATE_PERC,CREDIT_INCOME_PERC,prev_AMT_ANNUITY_mean,prev_AMT_CREDIT_mean,prev_AMT_CREDIT_sum,prev_AMT_DOWN_PAYMENT_mean,prev_DAYS_DECISION_mean,prev_CNT_PAYMENT_mean,prev_AMT_PAYMENT_mean,prev_AMT_INSTALMENT_mean,prev_SK_DPD_count,home_DAYS_CREDIT_min,home_DAYS_CREDIT_ENDDATE_mean,home_AMT_CREDIT_SUM_sum,home_AMT_CREDIT_SUM_mean,home_AMT_CREDIT_SUM_DEBT_sum,prev_type_loans,prev_cash_loans_perc,total_accepted_loans,SK_ID_CURR
0,0.000935,0.130787,0.079691,0.257,0.929092,0.977112,0.745688,0.666667,0.5,0.164384,0.090877,0.388751,0.049669,0.013614,0.005874,0.002269,0.002491,0.595759,0.135135,0.002351,0.002351,0.037037,0.017442,0.014217,0.001427728,0.001048231,0.001783831,0.04,0.019608,0.081967,100001
1,0.000627,0.044387,0.067326,0.491595,0.865763,0.802682,0.840108,0.583333,0.142857,0.277886,0.10309,0.678348,0.0265,0.008295,0.004958,0.003829,0.002206,0.183995,0.094595,0.002492,0.002492,0.043771,0.021888,0.015513,0.0006455355,0.001105879,0.001699292,0.053333,0.509804,0.04918,100005
2,0.001512,0.154373,0.270432,0.260475,0.0,0.945635,0.901582,0.708333,0.285714,0.277886,0.192291,0.866875,0.038599,0.029655,0.036083,0.055745,0.001668,0.287107,0.195946,0.00389,0.004352,0.127946,0.414501,6.4e-05,0.002035725,0.00261557,0.0,0.08,0.509804,0.07377,100013
3,0.002474,0.382022,0.189982,0.361433,0.852513,0.879046,0.902174,0.354167,0.5,0.138943,0.092625,0.353964,0.058953,0.016727,0.020508,0.039604,0.002224,0.385157,0.108108,0.00174,0.001989,0.111111,0.092681,0.087115,0.001494047,0.0006398752,0.0005569609,0.093333,0.215686,0.139344,100028
4,0.00132,0.144944,0.124285,0.134897,0.743292,0.849628,0.0,0.3125,0.285714,0.156556,0.104511,0.493432,0.040955,0.019122,0.011433,0.008832,0.004,0.160055,0.351351,0.004433,0.004433,0.050505,0.000684,6.4e-05,1.964718e-09,1.009732e-08,5.979103e-09,0.053333,0.509804,0.02459,100038


### 1.2. Chargement du modèle sauvegardé sous format pickle

In [7]:
# Chargement du modèle depuis un fichier pickle
with open("basic_lgbmc.pkl", "rb") as model_file:
    model = pickle.load(model_file)

### 1.3. Prédictions sur les nouveaux clients

**1. LES PREDICTIONS EN ELLES-MEMES**

In [8]:
predictions = model.predict_proba(new_clients_scaled.drop("SK_ID_CURR", axis =1))

# Ajout des prédictions dans le dataframe original sans seuil
new_clients["Prediction_class_0"] = predictions[:, 0]
new_clients["Prediction_class_1"] = predictions[:, 1]                                  

In [9]:
# Seuil pour la détermination de la classe
seuil = 0.2222

# Création de la colonne de classe en fonction du seuil dans les 2 jeux de données
new_clients["Classe"] = (predictions[:, 1] > seuil).astype(int)

# Affichage des 5 premières lignes du jeu original de données 
new_clients.head()

Unnamed: 0,SK_ID_CURR,AMT_INCOME_TOTAL,AMT_CREDIT,AMT_ANNUITY,REGION_POPULATION_RELATIVE,EXT_SOURCE_1,EXT_SOURCE_2,EXT_SOURCE_3,AGE,YEARS_LAST_PHONE_CHANGE,YEARS_EMPLOYED,ANNUITY_INCOME_PERC,ANNUITY_RATE_PERC,CREDIT_INCOME_PERC,prev_AMT_ANNUITY_mean,prev_AMT_CREDIT_mean,prev_AMT_CREDIT_sum,prev_AMT_DOWN_PAYMENT_mean,prev_DAYS_DECISION_mean,prev_CNT_PAYMENT_mean,prev_AMT_PAYMENT_mean,prev_AMT_INSTALMENT_mean,prev_SK_DPD_count,home_DAYS_CREDIT_min,home_DAYS_CREDIT_ENDDATE_mean,home_AMT_CREDIT_SUM_sum,home_AMT_CREDIT_SUM_mean,home_AMT_CREDIT_SUM_DEBT_sum,prev_type_loans,prev_cash_loans_perc,total_accepted_loans,Prediction_class_0,Prediction_class_1,Classe
0,100001,135000.0,568800.0,20560.5,0.01885,0.752614,0.789654,0.15952,53.0,5.0,6.4,15.23,3.614715,421.333333,3951.0,23787.0,23787.0,2520.0,1740.0,8.0,5885.132143,5885.132143,9.0,49.0,441.571429,1453365.0,207623.571429,596686.5,1.0,0.0,8.0,0.921273,0.078727,0
1,100005,99000.0,222768.0,17370.0,0.035792,0.56499,0.291656,0.432962,49.0,-0.0,12.2,17.545455,7.79735,225.018182,2406.6,20076.75,40153.5,2232.0,536.0,5.0,6240.205,6240.205,11.0,62.0,482.0,657126.0,219042.0,568408.5,2.0,50.0,4.0,0.85285,0.14715,0
2,100013,202500.0,663264.0,69777.0,0.019101,-2.0,0.699787,0.610991,55.0,2.0,12.2,34.457778,10.520245,327.537778,8608.64625,146134.125,584536.5,1687.0,837.5,12.5,9740.235774,10897.898516,36.0,1210.0,0.0,2072280.06,518070.015,-2.0,4.0,50.0,7.0,0.964111,0.035889,0
3,100028,315000.0,1575000.0,49018.5,0.026392,0.525734,0.509677,0.612704,38.0,5.0,5.1,15.561429,3.112286,500.0,4854.951,83057.4,415287.0,2249.6,1124.2,6.0,4356.731549,4979.282257,31.0,269.0,2716.0,1520875.08,126739.59,186300.5,5.0,20.0,15.0,0.921209,0.078791,0
4,100038,180000.0,625500.0,32067.0,0.010032,0.202145,0.425687,-2.0,36.0,2.0,6.0,17.815,5.126619,347.5,5550.3,46302.75,92605.5,4047.75,466.0,24.0,11100.3375,11100.3375,13.0,0.0,0.0,0.0,0.0,0.0,2.0,50.0,1.0,0.668703,0.331297,1


**2. LE NOMBRE ET LE POURCENTAGE DE CLIENTS DE CHAQUE CLASSE**

**Le nombre de clients de chaque classe (Classe 0 = Clients prédits sans risque, Classe 1 = Clients prédits à risque**)

In [10]:
new_clients["Classe"].value_counts()

Classe
0    39667
1     9077
Name: count, dtype: int64

**Le pourcentage de clients de chaque classe**

In [11]:
new_clients["Classe"].value_counts(normalize=True)

Classe
0    0.813782
1    0.186218
Name: proportion, dtype: float64

## 2. Vérification du bon fonctionnement de l'API

**Avant de déployer l'API, je trouve qu'il est indispensable de vérifier son bon fonctionnement. Par conséquent, des réquêtes seront réalisées sur les 5 premiers clients du dataframe préalablement généré. Ces dernières nécessitent un prétraitement des données:**
- **Conservation des 5 premiers clients.**
- **Elimination des variables 'Prediction_class_0', 'Prediction_class_1' et 'Classe', variables servant à la vérification du bon fonctionnement de l'API.**  

### 2.1. Préparation du dataframe

In [12]:
# Création du dataframe contenant les 5 premières clients
five_clients = new_clients.head(5)
five_clients

Unnamed: 0,SK_ID_CURR,AMT_INCOME_TOTAL,AMT_CREDIT,AMT_ANNUITY,REGION_POPULATION_RELATIVE,EXT_SOURCE_1,EXT_SOURCE_2,EXT_SOURCE_3,AGE,YEARS_LAST_PHONE_CHANGE,YEARS_EMPLOYED,ANNUITY_INCOME_PERC,ANNUITY_RATE_PERC,CREDIT_INCOME_PERC,prev_AMT_ANNUITY_mean,prev_AMT_CREDIT_mean,prev_AMT_CREDIT_sum,prev_AMT_DOWN_PAYMENT_mean,prev_DAYS_DECISION_mean,prev_CNT_PAYMENT_mean,prev_AMT_PAYMENT_mean,prev_AMT_INSTALMENT_mean,prev_SK_DPD_count,home_DAYS_CREDIT_min,home_DAYS_CREDIT_ENDDATE_mean,home_AMT_CREDIT_SUM_sum,home_AMT_CREDIT_SUM_mean,home_AMT_CREDIT_SUM_DEBT_sum,prev_type_loans,prev_cash_loans_perc,total_accepted_loans,Prediction_class_0,Prediction_class_1,Classe
0,100001,135000.0,568800.0,20560.5,0.01885,0.752614,0.789654,0.15952,53.0,5.0,6.4,15.23,3.614715,421.333333,3951.0,23787.0,23787.0,2520.0,1740.0,8.0,5885.132143,5885.132143,9.0,49.0,441.571429,1453365.0,207623.571429,596686.5,1.0,0.0,8.0,0.921273,0.078727,0
1,100005,99000.0,222768.0,17370.0,0.035792,0.56499,0.291656,0.432962,49.0,-0.0,12.2,17.545455,7.79735,225.018182,2406.6,20076.75,40153.5,2232.0,536.0,5.0,6240.205,6240.205,11.0,62.0,482.0,657126.0,219042.0,568408.5,2.0,50.0,4.0,0.85285,0.14715,0
2,100013,202500.0,663264.0,69777.0,0.019101,-2.0,0.699787,0.610991,55.0,2.0,12.2,34.457778,10.520245,327.537778,8608.64625,146134.125,584536.5,1687.0,837.5,12.5,9740.235774,10897.898516,36.0,1210.0,0.0,2072280.06,518070.015,-2.0,4.0,50.0,7.0,0.964111,0.035889,0
3,100028,315000.0,1575000.0,49018.5,0.026392,0.525734,0.509677,0.612704,38.0,5.0,5.1,15.561429,3.112286,500.0,4854.951,83057.4,415287.0,2249.6,1124.2,6.0,4356.731549,4979.282257,31.0,269.0,2716.0,1520875.08,126739.59,186300.5,5.0,20.0,15.0,0.921209,0.078791,0
4,100038,180000.0,625500.0,32067.0,0.010032,0.202145,0.425687,-2.0,36.0,2.0,6.0,17.815,5.126619,347.5,5550.3,46302.75,92605.5,4047.75,466.0,24.0,11100.3375,11100.3375,13.0,0.0,0.0,0.0,0.0,0.0,2.0,50.0,1.0,0.668703,0.331297,1


In [13]:
# Elimination des 3 variables inutiles
five_clients_test_api = five_clients.drop(["Prediction_class_0", "Prediction_class_1", "Classe"], axis= 1)

# Vérification que le dataframe contiennent bien 31 variables
print(f'Le jeu de données comprend bien {five_clients_test_api.shape[1]} variables non standardisées.')

Le jeu de données comprend bien 31 variables non standardisées.


### 2.2. Les requêtes avec l'adresse locale

In [14]:
import requests
import pandas as pd

url = "http://localhost:5021/predict"

# Saisie manuelle des données pour les 5 clients et les 30 colonnes
data = {
    "dataframe_split": {
    "index": [0, 1, 2, 3, 4],
    "columns": ["AMT_INCOME_TOTAL","AMT_CREDIT","AMT_ANNUITY","REGION_POPULATION_RELATIVE","EXT_SOURCE_1","EXT_SOURCE_2", "EXT_SOURCE_3","AGE", "YEARS_LAST_PHONE_CHANGE", "YEARS_EMPLOYED", "ANNUITY_INCOME_PERC","ANNUITY_RATE_PERC", "CREDIT_INCOME_PERC", "prev_AMT_ANNUITY_mean", "prev_AMT_CREDIT_mean", "prev_AMT_CREDIT_sum", "prev_AMT_DOWN_PAYMENT_mean", "prev_DAYS_DECISION_mean", "prev_CNT_PAYMENT_mean", "prev_AMT_PAYMENT_mean", "prev_AMT_INSTALMENT_mean", "prev_SK_DPD_count", "home_DAYS_CREDIT_min", "home_DAYS_CREDIT_ENDDATE_mean", "home_AMT_CREDIT_SUM_sum", "home_AMT_CREDIT_SUM_mean", "home_AMT_CREDIT_SUM_DEBT_sum", "prev_type_loans", "prev_cash_loans_perc", "total_accepted_loans"],
    "data": [
        [135000.0, 568800.0, 20560.5, 0.018850, 0.752614, 0.789654, 0.159520, 53.0, 5.0, 6.4, 15.230000, 3.614715, 421.333333, 3951.00000, 23787.000, 23787.0, 2520.00, 1740.0, 8.0,5885.13214, 5885.132143, 9.0, 49.0, 441.571429, 1453365.00, 207623.571429, 596686.5, 1.0, 0.0,  8.0],
        [99000.0, 222768.0, 17370.0, 0.035792, 0.564990, 0.291656, 0.432962, 49.0, -0.0, 12.2, 17.545455, 7.797350, 225.018182, 2406.60000, 20076.750, 40153.5, 2232.00, 536.0, 5.0, 6240.205000, 6240.205000, 11.0, 62.0, 482.000000, 657126.00, 219042.000000, 568408.5, 2.0, 50.0, 4.0],
        [202500.0, 663264.0, 69777.0, 0.019101, -2.000000, 0.699787,0.610991, 55.0, 2.0, 12.2, 34.457778, 10.520245, 327.537778, 8608.64625, 146134.125, 584536.5, 1687.00, 837.5, 12.5, 9740.235774, 10897.898516, 36.0, 1210.0, 0.000000, 2072280.06, 518070.015000, -2.0, 4.0, 50.0, 7.0],
        [315000.0, 1575000.0, 49018.5, 0.026392, 0.525734, 0.509677, 0.612704, 38.0, 5.0, 5.1, 15.561429, 3.112286, 500.000000, 4854.95100, 83057.400, 415287.0, 2249.60, 1124.2 ,6.0, 4356.731549, 4979.282257, 31.0, 269.0, 2716.000000, 1520875.08, 126739.590000, 186300.5, 5.0, 20.0, 15.0],
        [180000.0, 625500.0, 32067.0, 0.010032, 0.202145, 0.425687,-2.000000, 36.0, 2.0, 6.0, 17.815000, 5.126619, 347.500000, 5550.30000, 46302.750, 92605.5, 4047.75, 466.0, 24.0, 11100.337500, 11100.337500, 13.0, 0.0, 0.000000, 0.00, 0.000000, 0.0, 2.0, 50.0,1.0]]
    }
}

headers = {'Content-Type': 'application/json'}

response = requests.post(url, data=json.dumps(data), headers=headers)

print(response.status_code)
print(response.json())

200
{'class_0_proba': [0.9212727444450923, 0.8528503828279873, 0.9641112908324955, 0.921209485857845, 0.6687033547793894], 'class_1_proba': [0.0787272555549077, 0.14714961717201266, 0.0358887091675045, 0.078790514142155, 0.33129664522061064], 'proba': [[0.9212727444450923, 0.0787272555549077], [0.8528503828279873, 0.14714961717201266], [0.9641112908324955, 0.0358887091675045], [0.921209485857845, 0.078790514142155], [0.6687033547793894, 0.33129664522061064]], 'target': [0, 0, 0, 0, 1], 'target0.5': [0, 0, 0, 0, 0], 'threshold_used': 0.2222}


In [15]:
# Elimination des 3 variables servant au bon fonctionnement de l'API
five_clients_original = five_clients.drop(["Prediction_class_0", "Prediction_class_1", "Classe"], axis= 1)

# Colonnes retournées par l'API
columns_from_api = {
    'class_0_proba': [0.9212727444450923, 0.8528503828279873, 0.9641112908324955, 0.921209485857845, 0.6687033547793894],
    'class_1_proba': [0.0787272555549077, 0.14714961717201266, 0.0358887091675045, 0.078790514142155, 0.33129664522061064],
    'proba': [[0.9212727444450923, 0.0787272555549077], [0.8528503828279873, 0.14714961717201266], [0.9641112908324955, 0.0358887091675045], [0.921209485857845, 0.078790514142155], [0.6687033547793894, 0.33129664522061064]],
    'target': [0, 0, 0, 0, 1],
    'target0.5': [0, 0, 0, 0, 0],
    'threshold_used': 0.2222
}

# Conversion du dictionnaire en Dataframe 
df_from_api = pd.DataFrame(columns_from_api)

# Ajout des colonnes au Dataframe existant
df_combined = pd.concat([five_clients_original, df_from_api], axis=1)

# Affichage du Dataframe combiné
df_combined

Unnamed: 0,SK_ID_CURR,AMT_INCOME_TOTAL,AMT_CREDIT,AMT_ANNUITY,REGION_POPULATION_RELATIVE,EXT_SOURCE_1,EXT_SOURCE_2,EXT_SOURCE_3,AGE,YEARS_LAST_PHONE_CHANGE,YEARS_EMPLOYED,ANNUITY_INCOME_PERC,ANNUITY_RATE_PERC,CREDIT_INCOME_PERC,prev_AMT_ANNUITY_mean,prev_AMT_CREDIT_mean,prev_AMT_CREDIT_sum,prev_AMT_DOWN_PAYMENT_mean,prev_DAYS_DECISION_mean,prev_CNT_PAYMENT_mean,prev_AMT_PAYMENT_mean,prev_AMT_INSTALMENT_mean,prev_SK_DPD_count,home_DAYS_CREDIT_min,home_DAYS_CREDIT_ENDDATE_mean,home_AMT_CREDIT_SUM_sum,home_AMT_CREDIT_SUM_mean,home_AMT_CREDIT_SUM_DEBT_sum,prev_type_loans,prev_cash_loans_perc,total_accepted_loans,class_0_proba,class_1_proba,proba,target,target0.5,threshold_used
0,100001,135000.0,568800.0,20560.5,0.01885,0.752614,0.789654,0.15952,53.0,5.0,6.4,15.23,3.614715,421.333333,3951.0,23787.0,23787.0,2520.0,1740.0,8.0,5885.132143,5885.132143,9.0,49.0,441.571429,1453365.0,207623.571429,596686.5,1.0,0.0,8.0,0.921273,0.078727,"[0.9212727444450923, 0.0787272555549077]",0,0,0.2222
1,100005,99000.0,222768.0,17370.0,0.035792,0.56499,0.291656,0.432962,49.0,-0.0,12.2,17.545455,7.79735,225.018182,2406.6,20076.75,40153.5,2232.0,536.0,5.0,6240.205,6240.205,11.0,62.0,482.0,657126.0,219042.0,568408.5,2.0,50.0,4.0,0.85285,0.14715,"[0.8528503828279873, 0.14714961717201266]",0,0,0.2222
2,100013,202500.0,663264.0,69777.0,0.019101,-2.0,0.699787,0.610991,55.0,2.0,12.2,34.457778,10.520245,327.537778,8608.64625,146134.125,584536.5,1687.0,837.5,12.5,9740.235774,10897.898516,36.0,1210.0,0.0,2072280.06,518070.015,-2.0,4.0,50.0,7.0,0.964111,0.035889,"[0.9641112908324955, 0.0358887091675045]",0,0,0.2222
3,100028,315000.0,1575000.0,49018.5,0.026392,0.525734,0.509677,0.612704,38.0,5.0,5.1,15.561429,3.112286,500.0,4854.951,83057.4,415287.0,2249.6,1124.2,6.0,4356.731549,4979.282257,31.0,269.0,2716.0,1520875.08,126739.59,186300.5,5.0,20.0,15.0,0.921209,0.078791,"[0.921209485857845, 0.078790514142155]",0,0,0.2222
4,100038,180000.0,625500.0,32067.0,0.010032,0.202145,0.425687,-2.0,36.0,2.0,6.0,17.815,5.126619,347.5,5550.3,46302.75,92605.5,4047.75,466.0,24.0,11100.3375,11100.3375,13.0,0.0,0.0,0.0,0.0,0.0,2.0,50.0,1.0,0.668703,0.331297,"[0.6687033547793894, 0.33129664522061064]",1,0,0.2222


### 2.3. Les requêtes avec déploiement sur Heroku

In [16]:
url = "https://modele-scoring-credits-c459a33a2133.herokuapp.com/predict"

# Saisie manuelle des données pour les 5 clients et les 30 colonnes
data = {
    "dataframe_split": {
    "index": [0, 1, 2, 3, 4],
    "columns": ["AMT_INCOME_TOTAL","AMT_CREDIT","AMT_ANNUITY","REGION_POPULATION_RELATIVE","EXT_SOURCE_1","EXT_SOURCE_2", "EXT_SOURCE_3","AGE", "YEARS_LAST_PHONE_CHANGE", "YEARS_EMPLOYED", "ANNUITY_INCOME_PERC","ANNUITY_RATE_PERC", "CREDIT_INCOME_PERC", "prev_AMT_ANNUITY_mean", "prev_AMT_CREDIT_mean", "prev_AMT_CREDIT_sum", "prev_AMT_DOWN_PAYMENT_mean", "prev_DAYS_DECISION_mean", "prev_CNT_PAYMENT_mean", "prev_AMT_PAYMENT_mean", "prev_AMT_INSTALMENT_mean", "prev_SK_DPD_count", "home_DAYS_CREDIT_min", "home_DAYS_CREDIT_ENDDATE_mean", "home_AMT_CREDIT_SUM_sum", "home_AMT_CREDIT_SUM_mean", "home_AMT_CREDIT_SUM_DEBT_sum", "prev_type_loans", "prev_cash_loans_perc", "total_accepted_loans"],
    "data": [
        [135000.0, 568800.0, 20560.5, 0.018850, 0.752614, 0.789654, 0.159520, 53.0, 5.0, 6.4, 15.230000, 3.614715, 421.333333, 3951.00000, 23787.000, 23787.0, 2520.00, 1740.0, 8.0,5885.13214, 5885.132143, 9.0, 49.0, 441.571429, 1453365.00, 207623.571429, 596686.5, 1.0, 0.0,  8.0],
        [99000.0, 222768.0, 17370.0, 0.035792, 0.564990, 0.291656, 0.432962, 49.0, -0.0, 12.2, 17.545455, 7.797350, 225.018182, 2406.60000, 20076.750, 40153.5, 2232.00, 536.0, 5.0, 6240.205000, 6240.205000, 11.0, 62.0, 482.000000, 657126.00, 219042.000000, 568408.5, 2.0, 50.0, 4.0],
        [202500.0, 663264.0, 69777.0, 0.019101, -2.000000, 0.699787,0.610991, 55.0, 2.0, 12.2, 34.457778, 10.520245, 327.537778, 8608.64625, 146134.125, 584536.5, 1687.00, 837.5, 12.5, 9740.235774, 10897.898516, 36.0, 1210.0, 0.000000, 2072280.06, 518070.015000, -2.0, 4.0, 50.0, 7.0],
        [315000.0, 1575000.0, 49018.5, 0.026392, 0.525734, 0.509677, 0.612704, 38.0, 5.0, 5.1, 15.561429, 3.112286, 500.000000, 4854.95100, 83057.400, 415287.0, 2249.60, 1124.2 ,6.0, 4356.731549, 4979.282257, 31.0, 269.0, 2716.000000, 1520875.08, 126739.590000, 186300.5, 5.0, 20.0, 15.0],
        [180000.0, 625500.0, 32067.0, 0.010032, 0.202145, 0.425687,-2.000000, 36.0, 2.0, 6.0, 17.815000, 5.126619, 347.500000, 5550.30000, 46302.750, 92605.5, 4047.75, 466.0, 24.0, 11100.337500, 11100.337500, 13.0, 0.0, 0.000000, 0.00, 0.000000, 0.0, 2.0, 50.0,1.0]]
    }
}

headers = {'Content-Type': 'application/json'}

response = requests.post(url, data=json.dumps(data), headers=headers)

print(response.status_code)
print(response.json())

200
{'class_0_proba': [0.9212727444450923, 0.8528503828279873, 0.9641112908324955, 0.921209485857845, 0.6687033547793894], 'class_1_proba': [0.0787272555549077, 0.14714961717201266, 0.0358887091675045, 0.078790514142155, 0.33129664522061064], 'proba': [[0.9212727444450923, 0.0787272555549077], [0.8528503828279873, 0.14714961717201266], [0.9641112908324955, 0.0358887091675045], [0.921209485857845, 0.078790514142155], [0.6687033547793894, 0.33129664522061064]], 'target': [0, 0, 0, 0, 1], 'target0.5': [0, 0, 0, 0, 0], 'threshold_used': 0.2222}


### Conclusion: Bon fonctionnement de l'API avec l'adresse locale et après déploiement sur Heroku
- **Retour des mêmes valeurs pour les variables 'class_0_proba', 'class_1_proba' et 'target'.**
- **Apport d'informations supplémentaires si nécéssaire, notamment la comparaison possible entre les prédictions avec seuil métier ou non.**

**A NOTER: Ce notebook sera sauvegardé sous le nom de 'Ple_Coline_8_notebook_verification_api_092023' dans les livrables et 'notebook_8_verification_api' sur GitHub.**