## A Credit Scoring Use Case for Loan Approval: Using Distributed Training with Ray and XGBoost and Feast
![](images/feast_ray_xgboost.png)

In [1]:
import sys
sys.path.insert(0, "../")

### Import General Python libs and modules

In [2]:
import pandas as pd
import numpy as np
from pathlib import Path

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import OrdinalEncoder
from sklearn.metrics import precision_score

from xgboost_ray import RayXGBClassifier, RayParams
import xgboost as xgb

### Import Feast related modules and definitions from feast_repo directories and Ray modules

In [3]:
from feast import FeatureStore
from utils.data_fetcher import DataFetcher
from queries.ray_train_model import CreditRayXGBClassifier

### Create instances of 
 * feature store
 * data fetcher utility class
 * RayXGBoost classifier for distributed training

In [4]:
# [IMPORTANT] Change this  path to yours git repo
REPO_PATH = Path("/Users/jules/git-repos/feast_workshops/module_3/feature_repo")
store = FeatureStore(repo_path=REPO_PATH)
fetcher = DataFetcher(store, REPO_PATH)
xgboost_cls = CreditRayXGBClassifier(store, fetcher)

  and should_run_async(code)


### Train the RayXGBoost classifier for distributed training on localhost using a four cores or processes
![](images/xgboost_multi_core.png)

In [5]:
%timeit xgboost_cls.train()

  and should_run_async(code)
2021-10-06 16:49:14,861	INFO services.py:1263 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m
2021-10-06 16:49:16,246	INFO main.py:913 -- [RayXGBoost] Created 4 new actors (4 total actors). Waiting until actors are ready for training.
2021-10-06 16:49:17,804	INFO main.py:958 -- [RayXGBoost] Starting XGBoost training.
10/06/2021 04:49:17 PM INFO:start listen on 10.103.119.204:9091
10/06/2021 04:49:17 PM INFO:@tracker All of 4 nodes getting started
[2m[36m(pid=20482)[0m [16:49:17] task [xgboost.ray]:140665686766496 got new rank 0
[2m[36m(pid=20480)[0m [16:49:17] task [xgboost.ray]:140491070607856 got new rank 1
[2m[36m(pid=20481)[0m [16:49:17] task [xgboost.ray]:140408662352368 got new rank 3
[2m[36m(pid=20479)[0m [16:49:17] task [xgboost.ray]:140430940967408 got new rank 2
10/06/2021 04:49:20 PM INFO:@tracker All nodes finishes job
10/06/2021 04:49:20 PM INFO:@tracker 2.705364942550659 secs between node start and job finish


 predictions: [0 0 0 ... 0 0 0]


2021-10-06 16:49:23,026	INFO main.py:913 -- [RayXGBoost] Created 4 new actors (4 total actors). Waiting until actors are ready for training.
2021-10-06 16:49:24,975	INFO main.py:958 -- [RayXGBoost] Starting XGBoost training.
10/06/2021 04:49:24 PM INFO:start listen on 10.103.119.204:9091
10/06/2021 04:49:25 PM INFO:@tracker All of 4 nodes getting started
[2m[36m(pid=20501)[0m [16:49:25] task [xgboost.ray]:140682196136768 got new rank 0
[2m[36m(pid=20503)[0m [16:49:25] task [xgboost.ray]:140645289611712 got new rank 1
[2m[36m(pid=20505)[0m [16:49:25] task [xgboost.ray]:140184385212864 got new rank 3
[2m[36m(pid=20504)[0m [16:49:25] task [xgboost.ray]:140475101405632 got new rank 2
10/06/2021 04:49:27 PM INFO:@tracker All nodes finishes job
10/06/2021 04:49:27 PM INFO:@tracker 2.773092746734619 secs between node start and job finish
2021-10-06 16:49:27,925	INFO main.py:1436 -- [RayXGBoost] Finished XGBoost training on training data with total N=21,478 in 5.65 seconds (2.94 pu

 predictions: [0 0 0 ... 0 0 0]


2021-10-06 16:49:30,752	INFO main.py:913 -- [RayXGBoost] Created 4 new actors (4 total actors). Waiting until actors are ready for training.
2021-10-06 16:49:32,709	INFO main.py:958 -- [RayXGBoost] Starting XGBoost training.
10/06/2021 04:49:32 PM INFO:start listen on 10.103.119.204:9091
10/06/2021 04:49:32 PM INFO:@tracker All of 4 nodes getting started
[2m[36m(pid=20572)[0m [16:49:32] task [xgboost.ray]:140464899408704 got new rank 0
[2m[36m(pid=20578)[0m [16:49:32] task [xgboost.ray]:140290149634496 got new rank 3
[2m[36m(pid=20576)[0m [16:49:32] task [xgboost.ray]:140695356006848 got new rank 1
[2m[36m(pid=20577)[0m [16:49:32] task [xgboost.ray]:140650929426880 got new rank 2
10/06/2021 04:49:35 PM INFO:@tracker All nodes finishes job
10/06/2021 04:49:35 PM INFO:@tracker 2.6528408527374268 secs between node start and job finish
2021-10-06 16:49:35,586	INFO main.py:1436 -- [RayXGBoost] Finished XGBoost training on training data with total N=21,478 in 5.57 seconds (2.87 p

 predictions: [0 0 0 ... 0 0 0]


2021-10-06 16:49:38,532	INFO main.py:913 -- [RayXGBoost] Created 4 new actors (4 total actors). Waiting until actors are ready for training.
2021-10-06 16:49:40,496	INFO main.py:958 -- [RayXGBoost] Starting XGBoost training.
10/06/2021 04:49:40 PM INFO:start listen on 10.103.119.204:9091
10/06/2021 04:49:40 PM INFO:@tracker All of 4 nodes getting started
[2m[36m(pid=20626)[0m [16:49:40] task [xgboost.ray]:140431749460800 got new rank 0
[2m[36m(pid=20631)[0m [16:49:40] task [xgboost.ray]:140578591039936 got new rank 2
[2m[36m(pid=20630)[0m [16:49:40] task [xgboost.ray]:140655353405888 got new rank 1
[2m[36m(pid=20632)[0m [16:49:40] task [xgboost.ray]:140238347817408 got new rank 3
10/06/2021 04:49:43 PM INFO:@tracker All nodes finishes job
10/06/2021 04:49:43 PM INFO:@tracker 2.5848920345306396 secs between node start and job finish
2021-10-06 16:49:43,247	INFO main.py:1436 -- [RayXGBoost] Finished XGBoost training on training data with total N=21,478 in 5.43 seconds (2.75 p

 predictions: [0 0 0 ... 0 0 0]


2021-10-06 16:49:46,160	INFO main.py:913 -- [RayXGBoost] Created 4 new actors (4 total actors). Waiting until actors are ready for training.
2021-10-06 16:49:48,022	INFO main.py:958 -- [RayXGBoost] Starting XGBoost training.
10/06/2021 04:49:48 PM INFO:start listen on 10.103.119.204:9091
10/06/2021 04:49:48 PM INFO:@tracker All of 4 nodes getting started
[2m[36m(pid=20647)[0m [16:49:48] task [xgboost.ray]:140383301813056 got new rank 0
[2m[36m(pid=20651)[0m [16:49:48] task [xgboost.ray]:140362895643072 got new rank 1
[2m[36m(pid=20653)[0m [16:49:48] task [xgboost.ray]:140674280427968 got new rank 3
[2m[36m(pid=20652)[0m [16:49:48] task [xgboost.ray]:140326119985600 got new rank 2
10/06/2021 04:49:50 PM INFO:@tracker All nodes finishes job
10/06/2021 04:49:50 PM INFO:@tracker 2.5950119495391846 secs between node start and job finish
2021-10-06 16:49:50,777	INFO main.py:1436 -- [RayXGBoost] Finished XGBoost training on training data with total N=21,478 in 5.35 seconds (2.75 p

 predictions: [0 0 0 ... 0 0 0]


2021-10-06 16:49:53,689	INFO main.py:913 -- [RayXGBoost] Created 4 new actors (4 total actors). Waiting until actors are ready for training.
2021-10-06 16:49:55,656	INFO main.py:958 -- [RayXGBoost] Starting XGBoost training.
10/06/2021 04:49:55 PM INFO:start listen on 10.103.119.204:9091
10/06/2021 04:49:55 PM INFO:@tracker All of 4 nodes getting started
[2m[36m(pid=20668)[0m [16:49:55] task [xgboost.ray]:140522884707136 got new rank 0
[2m[36m(pid=20674)[0m [16:49:55] task [xgboost.ray]:140352026829248 got new rank 3
[2m[36m(pid=20673)[0m [16:49:55] task [xgboost.ray]:140646631760320 got new rank 2
[2m[36m(pid=20672)[0m [16:49:55] task [xgboost.ray]:140437393039808 got new rank 1
10/06/2021 04:49:58 PM INFO:@tracker All nodes finishes job
10/06/2021 04:49:58 PM INFO:@tracker 2.6412999629974365 secs between node start and job finish
2021-10-06 16:49:58,500	INFO main.py:1436 -- [RayXGBoost] Finished XGBoost training on training data with total N=21,478 in 5.60 seconds (2.84 p

 predictions: [0 0 0 ... 0 0 0]


2021-10-06 16:50:01,326	INFO main.py:913 -- [RayXGBoost] Created 4 new actors (4 total actors). Waiting until actors are ready for training.
2021-10-06 16:50:03,286	INFO main.py:958 -- [RayXGBoost] Starting XGBoost training.
10/06/2021 04:50:03 PM INFO:start listen on 10.103.119.204:9091
10/06/2021 04:50:03 PM INFO:@tracker All of 4 nodes getting started
[2m[36m(pid=20689)[0m [16:50:03] task [xgboost.ray]:140378199475008 got new rank 0
[2m[36m(pid=20695)[0m [16:50:03] task [xgboost.ray]:140581406200256 got new rank 3
[2m[36m(pid=20694)[0m [16:50:03] task [xgboost.ray]:140669850960320 got new rank 2
[2m[36m(pid=20693)[0m [16:50:03] task [xgboost.ray]:140316589199808 got new rank 1
10/06/2021 04:50:05 PM INFO:@tracker All nodes finishes job
10/06/2021 04:50:05 PM INFO:@tracker 2.662182092666626 secs between node start and job finish
2021-10-06 16:50:06,151	INFO main.py:1436 -- [RayXGBoost] Finished XGBoost training on training data with total N=21,478 in 5.54 seconds (2.86 pu

 predictions: [0 0 0 ... 0 0 0]


2021-10-06 16:50:09,027	INFO main.py:913 -- [RayXGBoost] Created 4 new actors (4 total actors). Waiting until actors are ready for training.
2021-10-06 16:50:10,990	INFO main.py:958 -- [RayXGBoost] Starting XGBoost training.
10/06/2021 04:50:10 PM INFO:start listen on 10.103.119.204:9091
10/06/2021 04:50:11 PM INFO:@tracker All of 4 nodes getting started
[2m[36m(pid=20710)[0m [16:50:11] task [xgboost.ray]:140500464325440 got new rank 0
[2m[36m(pid=20714)[0m [16:50:11] task [xgboost.ray]:140456444678592 got new rank 1
[2m[36m(pid=20716)[0m [16:50:11] task [xgboost.ray]:140614552637888 got new rank 3
[2m[36m(pid=20715)[0m [16:50:11] task [xgboost.ray]:140669322121664 got new rank 2
10/06/2021 04:50:14 PM INFO:@tracker All nodes finishes job
10/06/2021 04:50:14 PM INFO:@tracker 3.109935760498047 secs between node start and job finish
2021-10-06 16:50:14,304	INFO main.py:1436 -- [RayXGBoost] Finished XGBoost training on training data with total N=21,478 in 6.03 seconds (3.31 pu

 predictions: [0 0 0 ... 0 0 0]
7.73 s ± 182 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


### Define loan requests

In [6]:
loan_requests = [
    {
        "zipcode": [76104],
        "person_age": [22],
        "person_income": [59000],
        "person_home_ownership": ["RENT"],
        "person_emp_length": [123.0],
        "loan_intent": ["PERSONAL"],
        "loan_amnt": [35000],
        "loan_int_rate": [16.02],
        "dob_ssn": ["19530219_5179"]
    },
    {
        "zipcode": [69033],
        "person_age": [66],
        "person_income": [42000],
        "person_home_ownership": ["RENT"],
        "person_emp_length": [2.0],
        "loan_intent": ["MEDICAL"],
        "loan_amnt": [6475],
        "loan_int_rate": [9.99],
        "dob_ssn": ["19960703_3449"]
    }
]

### Predict the loan requests

In [7]:
for loan_request in loan_requests:
    result = round(xgboost_cls.predict(loan_request))
    loan_status = "approved" if result == 1 else "rejected"
    print(f"Loan for {loan_request['zipcode'][0]} code {loan_status}: status_code={result}")

2021-10-06 16:50:37,918	INFO main.py:1476 -- [RayXGBoost] Created 1 remote actors.
2021-10-06 16:50:39,700	INFO main.py:1493 -- [RayXGBoost] Starting XGBoost prediction.
2021-10-06 16:50:39,752	INFO main.py:1476 -- [RayXGBoost] Created 1 remote actors.


Loan for 76104 code approved: status_code=1


2021-10-06 16:50:41,500	INFO main.py:1493 -- [RayXGBoost] Starting XGBoost prediction.


Loan for 69033 code rejected: status_code=0
