## A Credit Scoring Use Case for Loan Approval: Using Distributed Training with Ray and XGBoost and Feast
![](images/feast_ray_xgboost.png)

In [1]:
import sys
sys.path.insert(0, "../")

### Import General Python libs and modules

In [2]:
import pandas as pd
import numpy as np
from pathlib import Path

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import OrdinalEncoder
from sklearn.metrics import precision_score

from xgboost_ray import RayXGBClassifier, RayParams
import xgboost as xgb

### Import Feast related modules and definitions from feast_repo directories and Ray modules

In [3]:
from feast import FeatureStore
from utils.data_fetcher import DataFetcher
from queries.ray_train_model import CreditRayXGBClassifier

### Create instances of 
 * feature store
 * data fetcher utility class
 * RayXGBoost classifier for distributed training

In [4]:
# Change this path to your location
REPO_PATH = Path("/Users/jules/git-repos/feast_workshops/module_3/feature_repo")
store = FeatureStore(repo_path=REPO_PATH)
fetcher = DataFetcher(store, REPO_PATH)
xgboost_cls = CreditRayXGBClassifier(store, fetcher)

### Train the RayXGBoost classifier for distributed training on localhost using a four cores or processes

In [5]:
%timeit xgboost_cls.train()

2021-09-16 07:12:56,045	INFO services.py:1263 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m
2021-09-16 07:12:58,470	INFO main.py:913 -- [RayXGBoost] Created 4 new actors (4 total actors). Waiting until actors are ready for training.
2021-09-16 07:12:59,849	INFO main.py:958 -- [RayXGBoost] Starting XGBoost training.
09/16/2021 07:12:59 AM INFO:start listen on 10.0.0.233:9091
09/16/2021 07:12:59 AM INFO:@tracker All of 4 nodes getting started
[2m[36m(pid=77791)[0m [07:12:59] task [xgboost.ray]:140217727747408 got new rank 3
[2m[36m(pid=77792)[0m [07:12:59] task [xgboost.ray]:140233380894320 got new rank 2
[2m[36m(pid=77790)[0m [07:12:59] task [xgboost.ray]:140517386765648 got new rank 1
[2m[36m(pid=77793)[0m [07:12:59] task [xgboost.ray]:140209683077152 got new rank 0
09/16/2021 07:13:02 AM INFO:@tracker All nodes finishes job
09/16/2021 07:13:02 AM INFO:@tracker 2.744385004043579 secs between node start and job finish
2021-09-16 07:13:02,831	INFO main

[0 0 0 ... 0 0 0]


2021-09-16 07:13:04,834	INFO main.py:913 -- [RayXGBoost] Created 4 new actors (4 total actors). Waiting until actors are ready for training.
2021-09-16 07:13:06,383	INFO main.py:958 -- [RayXGBoost] Starting XGBoost training.
09/16/2021 07:13:06 AM INFO:start listen on 10.0.0.233:9091
09/16/2021 07:13:06 AM INFO:@tracker All of 4 nodes getting started
[2m[36m(pid=77797)[0m [07:13:06] task [xgboost.ray]:140400867833312 got new rank 0
[2m[36m(pid=77800)[0m [07:13:06] task [xgboost.ray]:140346719430208 got new rank 3
[2m[36m(pid=77799)[0m [07:13:06] task [xgboost.ray]:140398259041856 got new rank 2
[2m[36m(pid=77798)[0m [07:13:06] task [xgboost.ray]:140547577308736 got new rank 1
09/16/2021 07:13:08 AM INFO:@tracker All nodes finishes job
09/16/2021 07:13:08 AM INFO:@tracker 2.3352370262145996 secs between node start and job finish
2021-09-16 07:13:08,890	INFO main.py:1436 -- [RayXGBoost] Finished XGBoost training on training data with total N=21,478 in 4.81 seconds (2.50 pure 

[0 0 0 ... 0 0 0]


2021-09-16 07:13:11,354	INFO main.py:913 -- [RayXGBoost] Created 4 new actors (4 total actors). Waiting until actors are ready for training.
2021-09-16 07:13:13,016	INFO main.py:958 -- [RayXGBoost] Starting XGBoost training.
09/16/2021 07:13:13 AM INFO:start listen on 10.0.0.233:9091
09/16/2021 07:13:13 AM INFO:@tracker All of 4 nodes getting started
[2m[36m(pid=77810)[0m [07:13:13] task [xgboost.ray]:140355695187472 got new rank 0
[2m[36m(pid=77813)[0m [07:13:13] task [xgboost.ray]:140462473774944 got new rank 3
[2m[36m(pid=77811)[0m [07:13:13] task [xgboost.ray]:140574160742000 got new rank 1
[2m[36m(pid=77812)[0m [07:13:13] task [xgboost.ray]:140362565453424 got new rank 2
09/16/2021 07:13:15 AM INFO:@tracker All nodes finishes job
09/16/2021 07:13:15 AM INFO:@tracker 2.303541898727417 secs between node start and job finish
2021-09-16 07:13:15,555	INFO main.py:1436 -- [RayXGBoost] Finished XGBoost training on training data with total N=21,478 in 4.94 seconds (2.53 pure X

[0 0 0 ... 0 0 0]


2021-09-16 07:13:17,909	INFO main.py:913 -- [RayXGBoost] Created 4 new actors (4 total actors). Waiting until actors are ready for training.
2021-09-16 07:13:19,354	INFO main.py:958 -- [RayXGBoost] Starting XGBoost training.
09/16/2021 07:13:19 AM INFO:start listen on 10.0.0.233:9091
09/16/2021 07:13:19 AM INFO:@tracker All of 4 nodes getting started
[2m[36m(pid=77822)[0m [07:13:19] task [xgboost.ray]:140577456391600 got new rank 0
[2m[36m(pid=77823)[0m [07:13:19] task [xgboost.ray]:140360879404608 got new rank 1
[2m[36m(pid=77824)[0m [07:13:19] task [xgboost.ray]:140631547278912 got new rank 2
[2m[36m(pid=77825)[0m [07:13:19] task [xgboost.ray]:140519281489472 got new rank 3
09/16/2021 07:13:21 AM INFO:@tracker All nodes finishes job
09/16/2021 07:13:21 AM INFO:@tracker 2.261259078979492 secs between node start and job finish
2021-09-16 07:13:21,755	INFO main.py:1436 -- [RayXGBoost] Finished XGBoost training on training data with total N=21,478 in 4.57 seconds (2.39 pure X

[0 0 0 ... 0 0 0]


2021-09-16 07:13:24,067	INFO main.py:913 -- [RayXGBoost] Created 4 new actors (4 total actors). Waiting until actors are ready for training.
2021-09-16 07:13:25,520	INFO main.py:958 -- [RayXGBoost] Starting XGBoost training.
09/16/2021 07:13:25 AM INFO:start listen on 10.0.0.233:9091
09/16/2021 07:13:25 AM INFO:@tracker All of 4 nodes getting started
[2m[36m(pid=77833)[0m [07:13:25] task [xgboost.ray]:140438759175648 got new rank 0
[2m[36m(pid=77834)[0m [07:13:25] task [xgboost.ray]:140410473834048 got new rank 1
[2m[36m(pid=77835)[0m [07:13:25] task [xgboost.ray]:140256382514752 got new rank 2
[2m[36m(pid=77836)[0m [07:13:25] task [xgboost.ray]:140471422379776 got new rank 3
09/16/2021 07:13:27 AM INFO:@tracker All nodes finishes job
09/16/2021 07:13:27 AM INFO:@tracker 2.3327929973602295 secs between node start and job finish
2021-09-16 07:13:28,047	INFO main.py:1436 -- [RayXGBoost] Finished XGBoost training on training data with total N=21,478 in 4.68 seconds (2.52 pure 

[0 0 0 ... 0 0 0]


2021-09-16 07:13:30,455	INFO main.py:913 -- [RayXGBoost] Created 4 new actors (4 total actors). Waiting until actors are ready for training.
2021-09-16 07:13:31,908	INFO main.py:958 -- [RayXGBoost] Starting XGBoost training.
09/16/2021 07:13:31 AM INFO:start listen on 10.0.0.233:9091
09/16/2021 07:13:31 AM INFO:@tracker All of 4 nodes getting started
[2m[36m(pid=77845)[0m [07:13:31] task [xgboost.ray]:140451309700576 got new rank 0
[2m[36m(pid=77848)[0m [07:13:31] task [xgboost.ray]:140682473424448 got new rank 3
[2m[36m(pid=77847)[0m [07:13:31] task [xgboost.ray]:140423970125376 got new rank 2
[2m[36m(pid=77846)[0m [07:13:31] task [xgboost.ray]:140289811117632 got new rank 1
09/16/2021 07:13:34 AM INFO:@tracker All nodes finishes job
09/16/2021 07:13:34 AM INFO:@tracker 2.5438718795776367 secs between node start and job finish
2021-09-16 07:13:34,635	INFO main.py:1436 -- [RayXGBoost] Finished XGBoost training on training data with total N=21,478 in 4.92 seconds (2.72 pure 

[0 0 0 ... 0 0 0]


2021-09-16 07:13:36,972	INFO main.py:913 -- [RayXGBoost] Created 4 new actors (4 total actors). Waiting until actors are ready for training.
2021-09-16 07:13:38,526	INFO main.py:958 -- [RayXGBoost] Starting XGBoost training.
09/16/2021 07:13:38 AM INFO:start listen on 10.0.0.233:9091
09/16/2021 07:13:38 AM INFO:@tracker All of 4 nodes getting started
[2m[36m(pid=77970)[0m [07:13:38] task [xgboost.ray]:140403318474352 got new rank 0
[2m[36m(pid=77971)[0m [07:13:38] task [xgboost.ray]:140714241070656 got new rank 1
[2m[36m(pid=77973)[0m [07:13:38] task [xgboost.ray]:140475400615776 got new rank 3
[2m[36m(pid=77972)[0m [07:13:38] task [xgboost.ray]:140652694959536 got new rank 2
09/16/2021 07:13:40 AM INFO:@tracker All nodes finishes job
09/16/2021 07:13:40 AM INFO:@tracker 2.367325782775879 secs between node start and job finish
2021-09-16 07:13:41,047	INFO main.py:1436 -- [RayXGBoost] Finished XGBoost training on training data with total N=21,478 in 4.80 seconds (2.52 pure X

[0 0 0 ... 0 0 0]


2021-09-16 07:13:43,436	INFO main.py:913 -- [RayXGBoost] Created 4 new actors (4 total actors). Waiting until actors are ready for training.
2021-09-16 07:13:44,988	INFO main.py:958 -- [RayXGBoost] Starting XGBoost training.
09/16/2021 07:13:44 AM INFO:start listen on 10.0.0.233:9091
09/16/2021 07:13:45 AM INFO:@tracker All of 4 nodes getting started
[2m[36m(pid=78123)[0m [07:13:45] task [xgboost.ray]:140219187365344 got new rank 0
[2m[36m(pid=78125)[0m [07:13:45] task [xgboost.ray]:140324859823680 got new rank 2
[2m[36m(pid=78124)[0m [07:13:45] task [xgboost.ray]:140393495414336 got new rank 1
[2m[36m(pid=78126)[0m [07:13:45] task [xgboost.ray]:140312494896704 got new rank 3
09/16/2021 07:13:47 AM INFO:@tracker All nodes finishes job
09/16/2021 07:13:47 AM INFO:@tracker 2.353178024291992 secs between node start and job finish
2021-09-16 07:13:47,508	INFO main.py:1436 -- [RayXGBoost] Finished XGBoost training on training data with total N=21,478 in 4.78 seconds (2.51 pure X

[0 0 0 ... 0 0 0]
6.44 s ± 124 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


### Define loan requests

In [6]:
loan_requests = [
    {
        "zipcode": [76104],
        "person_age": [22],
        "person_income": [59000],
        "person_home_ownership": ["RENT"],
        "person_emp_length": [123.0],
        "loan_intent": ["PERSONAL"],
        "loan_amnt": [35000],
        "loan_int_rate": [16.02],
        "dob_ssn": ["19530219_5179"]
    },
    {
        "zipcode": [69033],
        "person_age": [66],
        "person_income": [42000],
        "person_home_ownership": ["RENT"],
        "person_emp_length": [2.0],
        "loan_intent": ["MEDICAL"],
        "loan_amnt": [6475],
        "loan_int_rate": [9.99],
        "dob_ssn": ["19960703_3449"]
    }
]

### Predict the loan requests

In [7]:
for loan_request in loan_requests:
    result = round(xgboost_cls.predict(loan_request))
    loan_status = "approved" if result == 1 else "rejected"
    print(f"Loan for {loan_request['zipcode'][0]} code {loan_status}: status_code={result}")

2021-09-16 07:13:49,530	INFO main.py:1476 -- [RayXGBoost] Created 1 remote actors.
2021-09-16 07:13:50,825	INFO main.py:1493 -- [RayXGBoost] Starting XGBoost prediction.


Loan for 76104 code approved: status_code=1


2021-09-16 07:13:51,148	INFO main.py:1476 -- [RayXGBoost] Created 1 remote actors.
2021-09-16 07:13:52,399	INFO main.py:1493 -- [RayXGBoost] Starting XGBoost prediction.


Loan for 69033 code rejected: status_code=0
