## A Credit Scoring Use Case for Loan Approval: Using Distributed Training with Ray and XGBoost and Feast
![](images/feast_ray_xgboost.png)

In [1]:
import sys
sys.path.insert(0, "../")

### Import General Python libs and modules

In [2]:
import pandas as pd
import numpy as np
from pathlib import Path

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import OrdinalEncoder
from sklearn.metrics import precision_score

from xgboost_ray import RayXGBClassifier, RayParams
import xgboost as xgb

### Import Feast related modules and definitions from feast_repo directories and Ray modules

In [3]:
from feast import FeatureStore
from utils.data_fetcher import DataFetcher
from queries.ray_train_model import CreditRayXGBClassifier

### Create instances of 
 * feature store
 * data fetcher utility class
 * RayXGBoost classifier for distributed training

In [4]:
# [IMPORTANT] Change this  path to yours git repo
REPO_PATH = Path("/Users/jsd/git-repos/feast_workshops/module_3/feature_repo")
store = FeatureStore(repo_path=REPO_PATH)
fetcher = DataFetcher(store, REPO_PATH)
xgboost_cls = CreditRayXGBClassifier(store, fetcher)

### Train the RayXGBoost classifier for distributed training on localhost using a four cores or processes
![](images/xgboost_multi_core.png)

In [5]:
%timeit xgboost_cls.train()

2021-09-17 11:37:49,679	INFO services.py:1263 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m
2021-09-17 11:37:52,046	INFO main.py:913 -- [RayXGBoost] Created 4 new actors (4 total actors). Waiting until actors are ready for training.
2021-09-17 11:37:53,204	INFO main.py:958 -- [RayXGBoost] Starting XGBoost training.
09/17/2021 11:37:53 AM INFO:start listen on 10.0.0.233:9091
09/17/2021 11:37:53 AM INFO:@tracker All of 4 nodes getting started
[2m[36m(pid=9833)[0m [11:37:53] task [xgboost.ray]:140226577786192 got new rank 2
[2m[36m(pid=9834)[0m [11:37:53] task [xgboost.ray]:140343045163088 got new rank 0
[2m[36m(pid=9835)[0m [11:37:53] task [xgboost.ray]:140204195538352 got new rank 1
[2m[36m(pid=9828)[0m [11:37:53] task [xgboost.ray]:140560922477904 got new rank 3
09/17/2021 11:37:55 AM INFO:@tracker All nodes finishes job
09/17/2021 11:37:55 AM INFO:@tracker 2.298259973526001 secs between node start and job finish
2021-09-17 11:37:55,722	INFO main.py:

 predictions: [0 0 0 ... 0 0 0]


2021-09-17 11:37:57,586	INFO main.py:913 -- [RayXGBoost] Created 4 new actors (4 total actors). Waiting until actors are ready for training.
2021-09-17 11:37:59,029	INFO main.py:958 -- [RayXGBoost] Starting XGBoost training.
09/17/2021 11:37:59 AM INFO:start listen on 10.0.0.233:9091
09/17/2021 11:37:59 AM INFO:@tracker All of 4 nodes getting started
[2m[36m(pid=9839)[0m [11:37:59] task [xgboost.ray]:140552944973328 got new rank 0
[2m[36m(pid=9840)[0m [11:37:59] task [xgboost.ray]:140287546189376 got new rank 1
[2m[36m(pid=9842)[0m [11:37:59] task [xgboost.ray]:140559395751488 got new rank 3
[2m[36m(pid=9841)[0m [11:37:59] task [xgboost.ray]:140300700505664 got new rank 2
09/17/2021 11:38:01 AM INFO:@tracker All nodes finishes job
09/17/2021 11:38:01 AM INFO:@tracker 2.219796895980835 secs between node start and job finish
2021-09-17 11:38:01,431	INFO main.py:1436 -- [RayXGBoost] Finished XGBoost training on training data with total N=21,478 in 4.61 seconds (2.40 pure XGBoo

 predictions: [0 0 0 ... 0 0 0]


2021-09-17 11:38:03,752	INFO main.py:913 -- [RayXGBoost] Created 4 new actors (4 total actors). Waiting until actors are ready for training.
2021-09-17 11:38:05,199	INFO main.py:958 -- [RayXGBoost] Starting XGBoost training.
09/17/2021 11:38:05 AM INFO:start listen on 10.0.0.233:9091
09/17/2021 11:38:05 AM INFO:@tracker All of 4 nodes getting started
[2m[36m(pid=9850)[0m [11:38:05] task [xgboost.ray]:140216964380128 got new rank 0
[2m[36m(pid=9851)[0m [11:38:05] task [xgboost.ray]:140236502558272 got new rank 1
[2m[36m(pid=9853)[0m [11:38:05] task [xgboost.ray]:140379946653248 got new rank 3
[2m[36m(pid=9852)[0m [11:38:05] task [xgboost.ray]:140449647593072 got new rank 2
09/17/2021 11:38:07 AM INFO:@tracker All nodes finishes job
09/17/2021 11:38:07 AM INFO:@tracker 2.2986738681793213 secs between node start and job finish
2021-09-17 11:38:07,708	INFO main.py:1436 -- [RayXGBoost] Finished XGBoost training on training data with total N=21,478 in 4.65 seconds (2.50 pure XGBo

 predictions: [0 0 0 ... 0 0 0]


2021-09-17 11:38:10,117	INFO main.py:913 -- [RayXGBoost] Created 4 new actors (4 total actors). Waiting until actors are ready for training.
2021-09-17 11:38:11,572	INFO main.py:958 -- [RayXGBoost] Starting XGBoost training.
09/17/2021 11:38:11 AM INFO:start listen on 10.0.0.233:9091
09/17/2021 11:38:11 AM INFO:@tracker All of 4 nodes getting started
[2m[36m(pid=9863)[0m [11:38:11] task [xgboost.ray]:140666880021984 got new rank 0
[2m[36m(pid=9866)[0m [11:38:11] task [xgboost.ray]:140620498429504 got new rank 3
[2m[36m(pid=9865)[0m [11:38:11] task [xgboost.ray]:140690569573952 got new rank 2
[2m[36m(pid=9864)[0m [11:38:11] task [xgboost.ray]:140588706649664 got new rank 1
09/17/2021 11:38:14 AM INFO:@tracker All nodes finishes job
09/17/2021 11:38:14 AM INFO:@tracker 2.588716745376587 secs between node start and job finish
2021-09-17 11:38:14,372	INFO main.py:1436 -- [RayXGBoost] Finished XGBoost training on training data with total N=21,478 in 4.98 seconds (2.79 pure XGBoo

 predictions: [0 0 0 ... 0 0 0]


2021-09-17 11:38:16,819	INFO main.py:913 -- [RayXGBoost] Created 4 new actors (4 total actors). Waiting until actors are ready for training.
2021-09-17 11:38:18,273	INFO main.py:958 -- [RayXGBoost] Starting XGBoost training.
09/17/2021 11:38:18 AM INFO:start listen on 10.0.0.233:9091
09/17/2021 11:38:18 AM INFO:@tracker All of 4 nodes getting started
[2m[36m(pid=9875)[0m [11:38:18] task [xgboost.ray]:140372247012880 got new rank 0
[2m[36m(pid=9878)[0m [11:38:18] task [xgboost.ray]:140626782545472 got new rank 3
[2m[36m(pid=9877)[0m [11:38:18] task [xgboost.ray]:140602974631536 got new rank 2
[2m[36m(pid=9876)[0m [11:38:18] task [xgboost.ray]:140673465149040 got new rank 1
09/17/2021 11:38:20 AM INFO:@tracker All nodes finishes job
09/17/2021 11:38:20 AM INFO:@tracker 2.299307107925415 secs between node start and job finish
2021-09-17 11:38:20,772	INFO main.py:1436 -- [RayXGBoost] Finished XGBoost training on training data with total N=21,478 in 4.74 seconds (2.49 pure XGBoo

 predictions: [0 0 0 ... 0 0 0]


2021-09-17 11:38:23,085	INFO main.py:913 -- [RayXGBoost] Created 4 new actors (4 total actors). Waiting until actors are ready for training.
2021-09-17 11:38:24,641	INFO main.py:958 -- [RayXGBoost] Starting XGBoost training.
09/17/2021 11:38:24 AM INFO:start listen on 10.0.0.233:9091
09/17/2021 11:38:24 AM INFO:@tracker All of 4 nodes getting started
[2m[36m(pid=9886)[0m [11:38:24] task [xgboost.ray]:140712505664944 got new rank 0
[2m[36m(pid=9887)[0m [11:38:24] task [xgboost.ray]:140691734427200 got new rank 1
[2m[36m(pid=9888)[0m [11:38:24] task [xgboost.ray]:140226737169984 got new rank 2
[2m[36m(pid=9889)[0m [11:38:24] task [xgboost.ray]:140255870805568 got new rank 3
09/17/2021 11:38:26 AM INFO:@tracker All nodes finishes job
09/17/2021 11:38:26 AM INFO:@tracker 2.3027760982513428 secs between node start and job finish
2021-09-17 11:38:27,144	INFO main.py:1436 -- [RayXGBoost] Finished XGBoost training on training data with total N=21,478 in 4.76 seconds (2.50 pure XGBo

 predictions: [0 0 0 ... 0 0 0]


2021-09-17 11:38:29,478	INFO main.py:913 -- [RayXGBoost] Created 4 new actors (4 total actors). Waiting until actors are ready for training.
2021-09-17 11:38:30,925	INFO main.py:958 -- [RayXGBoost] Starting XGBoost training.
09/17/2021 11:38:30 AM INFO:start listen on 10.0.0.233:9091
09/17/2021 11:38:30 AM INFO:@tracker All of 4 nodes getting started
[2m[36m(pid=9898)[0m [11:38:30] task [xgboost.ray]:140592144840160 got new rank 0
[2m[36m(pid=9900)[0m [11:38:30] task [xgboost.ray]:140669093578304 got new rank 2
[2m[36m(pid=9901)[0m [11:38:30] task [xgboost.ray]:140488664114752 got new rank 3
[2m[36m(pid=9899)[0m [11:38:30] task [xgboost.ray]:140387203900992 got new rank 1
09/17/2021 11:38:33 AM INFO:@tracker All nodes finishes job
09/17/2021 11:38:33 AM INFO:@tracker 2.3227720260620117 secs between node start and job finish
2021-09-17 11:38:33,437	INFO main.py:1436 -- [RayXGBoost] Finished XGBoost training on training data with total N=21,478 in 4.68 seconds (2.51 pure XGBo

 predictions: [0 0 0 ... 0 0 0]


2021-09-17 11:38:35,861	INFO main.py:913 -- [RayXGBoost] Created 4 new actors (4 total actors). Waiting until actors are ready for training.
2021-09-17 11:38:37,318	INFO main.py:958 -- [RayXGBoost] Starting XGBoost training.
09/17/2021 11:38:37 AM INFO:start listen on 10.0.0.233:9091
09/17/2021 11:38:37 AM INFO:@tracker All of 4 nodes getting started
[2m[36m(pid=9910)[0m [11:38:37] task [xgboost.ray]:140604878779824 got new rank 0
[2m[36m(pid=9912)[0m [11:38:37] task [xgboost.ray]:140338483914304 got new rank 2
[2m[36m(pid=9913)[0m [11:38:37] task [xgboost.ray]:140519919019584 got new rank 3
[2m[36m(pid=9911)[0m [11:38:37] task [xgboost.ray]:140396505749056 got new rank 1
09/17/2021 11:38:39 AM INFO:@tracker All nodes finishes job
09/17/2021 11:38:39 AM INFO:@tracker 2.2580459117889404 secs between node start and job finish
2021-09-17 11:38:39,740	INFO main.py:1436 -- [RayXGBoost] Finished XGBoost training on training data with total N=21,478 in 4.61 seconds (2.41 pure XGBo

 predictions: [0 0 0 ... 0 0 0]
6.36 s ± 127 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


### Define loan requests

In [6]:
loan_requests = [
    {
        "zipcode": [76104],
        "person_age": [22],
        "person_income": [59000],
        "person_home_ownership": ["RENT"],
        "person_emp_length": [123.0],
        "loan_intent": ["PERSONAL"],
        "loan_amnt": [35000],
        "loan_int_rate": [16.02],
        "dob_ssn": ["19530219_5179"]
    },
    {
        "zipcode": [69033],
        "person_age": [66],
        "person_income": [42000],
        "person_home_ownership": ["RENT"],
        "person_emp_length": [2.0],
        "loan_intent": ["MEDICAL"],
        "loan_amnt": [6475],
        "loan_int_rate": [9.99],
        "dob_ssn": ["19960703_3449"]
    }
]

### Predict the loan requests

In [7]:
for loan_request in loan_requests:
    result = round(xgboost_cls.predict(loan_request))
    loan_status = "approved" if result == 1 else "rejected"
    print(f"Loan for {loan_request['zipcode'][0]} code {loan_status}: status_code={result}")

2021-09-17 11:38:48,813	INFO main.py:1476 -- [RayXGBoost] Created 1 remote actors.
2021-09-17 11:38:50,197	INFO main.py:1493 -- [RayXGBoost] Starting XGBoost prediction.


Loan for 76104 code approved: status_code=1


2021-09-17 11:38:50,525	INFO main.py:1476 -- [RayXGBoost] Created 1 remote actors.
2021-09-17 11:38:51,843	INFO main.py:1493 -- [RayXGBoost] Starting XGBoost prediction.


Loan for 69033 code rejected: status_code=0
