## A Credit Scoring Use Case for Loan Approval: Using Distributed Training with Ray and XGBoost and Feast
![](images/feast_ray_xgboost.png)

In [1]:
import sys
sys.path.insert(0, "../")

### Import General Python libs and modules

In [2]:
import pandas as pd
import numpy as np
from pathlib import Path

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import OrdinalEncoder
from sklearn.metrics import precision_score

from xgboost_ray import RayXGBClassifier, RayParams
import xgboost as xgb

### Import Feast related modules and definitions from feast_repo directories and Ray modules

In [3]:
from feast import FeatureStore
from utils.data_fetcher import DataFetcher
from queries.ray_train_model import CreditRayXGBClassifier

### Create instances of 
 * feature store
 * data fetcher utility class
 * RayXGBoost classifier for distributed training

In [4]:
# Change this path to your location
REPO_PATH = Path("/Users/jules/git-repos/feast_workshops/module_3/feature_repo")
store = FeatureStore(repo_path=REPO_PATH)
fetcher = DataFetcher(store, REPO_PATH)
xgboost_cls = CreditRayXGBClassifier(store, fetcher)

### Train the RayXGBoost classifier for distributed training on localhost using a four cores or processes

In [5]:
%timeit xgboost_cls.train()

2021-09-16 13:52:31,834	INFO services.py:1263 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m
2021-09-16 13:52:34,181	INFO main.py:913 -- [RayXGBoost] Created 4 new actors (4 total actors). Waiting until actors are ready for training.
2021-09-16 13:52:35,348	INFO main.py:958 -- [RayXGBoost] Starting XGBoost training.
09/16/2021 01:52:35 PM INFO:start listen on 10.0.0.233:9091
09/16/2021 01:52:35 PM INFO:@tracker All of 4 nodes getting started
[2m[36m(pid=91049)[0m [13:52:35] task [xgboost.ray]:140547274213168 got new rank 1
[2m[36m(pid=91050)[0m [13:52:35] task [xgboost.ray]:140501958948464 got new rank 3
[2m[36m(pid=91052)[0m [13:52:35] task [xgboost.ray]:140193954436432 got new rank 2
[2m[36m(pid=91053)[0m [13:52:35] task [xgboost.ray]:140186304022608 got new rank 0
09/16/2021 01:52:37 PM INFO:@tracker All nodes finishes job
09/16/2021 01:52:37 PM INFO:@tracker 2.2452852725982666 secs between node start and job finish
2021-09-16 13:52:37,776	INFO mai

[0 0 0 ... 0 0 0]


2021-09-16 13:52:39,666	INFO main.py:913 -- [RayXGBoost] Created 4 new actors (4 total actors). Waiting until actors are ready for training.
2021-09-16 13:52:41,108	INFO main.py:958 -- [RayXGBoost] Starting XGBoost training.
09/16/2021 01:52:41 PM INFO:start listen on 10.0.0.233:9091
09/16/2021 01:52:41 PM INFO:@tracker All of 4 nodes getting started
[2m[36m(pid=91058)[0m [13:52:41] task [xgboost.ray]:140660000323184 got new rank 0
[2m[36m(pid=91062)[0m [13:52:41] task [xgboost.ray]:140721875809856 got new rank 2
[2m[36m(pid=91063)[0m [13:52:41] task [xgboost.ray]:140260584093536 got new rank 3
[2m[36m(pid=91061)[0m [13:52:41] task [xgboost.ray]:140215068620352 got new rank 1
09/16/2021 01:52:43 PM INFO:@tracker All nodes finishes job
09/16/2021 01:52:43 PM INFO:@tracker 2.2182810306549072 secs between node start and job finish
2021-09-16 13:52:43,537	INFO main.py:1436 -- [RayXGBoost] Finished XGBoost training on training data with total N=21,478 in 4.68 seconds (2.42 pure 

[0 0 0 ... 0 0 0]


2021-09-16 13:52:45,907	INFO main.py:913 -- [RayXGBoost] Created 4 new actors (4 total actors). Waiting until actors are ready for training.
2021-09-16 13:52:47,350	INFO main.py:958 -- [RayXGBoost] Starting XGBoost training.
09/16/2021 01:52:47 PM INFO:start listen on 10.0.0.233:9091
09/16/2021 01:52:47 PM INFO:@tracker All of 4 nodes getting started
[2m[36m(pid=91071)[0m [13:52:47] task [xgboost.ray]:140272312387040 got new rank 0
[2m[36m(pid=91072)[0m [13:52:47] task [xgboost.ray]:140603017676400 got new rank 1
[2m[36m(pid=91074)[0m [13:52:47] task [xgboost.ray]:140330966730304 got new rank 3
[2m[36m(pid=91073)[0m [13:52:47] task [xgboost.ray]:140676484003392 got new rank 2
09/16/2021 01:52:49 PM INFO:@tracker All nodes finishes job
09/16/2021 01:52:49 PM INFO:@tracker 2.347954750061035 secs between node start and job finish
2021-09-16 13:52:49,871	INFO main.py:1436 -- [RayXGBoost] Finished XGBoost training on training data with total N=21,478 in 4.70 seconds (2.52 pure X

[0 0 0 ... 0 0 0]


2021-09-16 13:52:52,300	INFO main.py:913 -- [RayXGBoost] Created 4 new actors (4 total actors). Waiting until actors are ready for training.
2021-09-16 13:52:53,854	INFO main.py:958 -- [RayXGBoost] Starting XGBoost training.
09/16/2021 01:52:53 PM INFO:start listen on 10.0.0.233:9091
09/16/2021 01:52:53 PM INFO:@tracker All of 4 nodes getting started
[2m[36m(pid=91083)[0m [13:52:53] task [xgboost.ray]:140550110264864 got new rank 0
[2m[36m(pid=91086)[0m [13:52:53] task [xgboost.ray]:140530924877376 got new rank 3
[2m[36m(pid=91084)[0m [13:52:53] task [xgboost.ray]:140366516487792 got new rank 1
[2m[36m(pid=91085)[0m [13:52:53] task [xgboost.ray]:140593822598768 got new rank 2
09/16/2021 01:52:56 PM INFO:@tracker All nodes finishes job
09/16/2021 01:52:56 PM INFO:@tracker 2.2308690547943115 secs between node start and job finish
2021-09-16 13:52:56,259	INFO main.py:1436 -- [RayXGBoost] Finished XGBoost training on training data with total N=21,478 in 4.72 seconds (2.40 pure 

[0 0 0 ... 0 0 0]


2021-09-16 13:52:58,669	INFO main.py:913 -- [RayXGBoost] Created 4 new actors (4 total actors). Waiting until actors are ready for training.
2021-09-16 13:53:00,534	INFO main.py:958 -- [RayXGBoost] Starting XGBoost training.
09/16/2021 01:53:00 PM INFO:start listen on 10.0.0.233:9091
09/16/2021 01:53:00 PM INFO:@tracker All of 4 nodes getting started
[2m[36m(pid=91095)[0m [13:53:00] task [xgboost.ray]:140641746777568 got new rank 0
[2m[36m(pid=91098)[0m [13:53:00] task [xgboost.ray]:140276154402656 got new rank 3
[2m[36m(pid=91096)[0m [13:53:00] task [xgboost.ray]:140695970678624 got new rank 1
[2m[36m(pid=91097)[0m [13:53:00] task [xgboost.ray]:140240663312224 got new rank 2
09/16/2021 01:53:02 PM INFO:@tracker All nodes finishes job
09/16/2021 01:53:02 PM INFO:@tracker 2.2859551906585693 secs between node start and job finish
2021-09-16 13:53:03,057	INFO main.py:1436 -- [RayXGBoost] Finished XGBoost training on training data with total N=21,478 in 5.14 seconds (2.51 pure 

[0 0 0 ... 0 0 0]


2021-09-16 13:53:05,342	INFO main.py:913 -- [RayXGBoost] Created 4 new actors (4 total actors). Waiting until actors are ready for training.
2021-09-16 13:53:06,905	INFO main.py:958 -- [RayXGBoost] Starting XGBoost training.
09/16/2021 01:53:06 PM INFO:start listen on 10.0.0.233:9091
09/16/2021 01:53:06 PM INFO:@tracker All of 4 nodes getting started
[2m[36m(pid=91123)[0m [13:53:06] task [xgboost.ray]:140448917788128 got new rank 0
[2m[36m(pid=91126)[0m [13:53:06] task [xgboost.ray]:140191949616752 got new rank 3
[2m[36m(pid=91124)[0m [13:53:06] task [xgboost.ray]:140415195641408 got new rank 1
[2m[36m(pid=91125)[0m [13:53:06] task [xgboost.ray]:140298283664960 got new rank 2
09/16/2021 01:53:09 PM INFO:@tracker All nodes finishes job
09/16/2021 01:53:09 PM INFO:@tracker 2.3607380390167236 secs between node start and job finish
2021-09-16 13:53:09,431	INFO main.py:1436 -- [RayXGBoost] Finished XGBoost training on training data with total N=21,478 in 4.80 seconds (2.52 pure 

[0 0 0 ... 0 0 0]


2021-09-16 13:53:11,911	INFO main.py:913 -- [RayXGBoost] Created 4 new actors (4 total actors). Waiting until actors are ready for training.
2021-09-16 13:53:13,370	INFO main.py:958 -- [RayXGBoost] Starting XGBoost training.
09/16/2021 01:53:13 PM INFO:start listen on 10.0.0.233:9091
09/16/2021 01:53:13 PM INFO:@tracker All of 4 nodes getting started
[2m[36m(pid=91135)[0m [13:53:13] task [xgboost.ray]:140227619026400 got new rank 0
[2m[36m(pid=91138)[0m [13:53:13] task [xgboost.ray]:140712480564800 got new rank 3
[2m[36m(pid=91137)[0m [13:53:13] task [xgboost.ray]:140691449218624 got new rank 2
[2m[36m(pid=91136)[0m [13:53:13] task [xgboost.ray]:140305422251536 got new rank 1
09/16/2021 01:53:15 PM INFO:@tracker All nodes finishes job
09/16/2021 01:53:15 PM INFO:@tracker 2.187055826187134 secs between node start and job finish
2021-09-16 13:53:15,752	INFO main.py:1436 -- [RayXGBoost] Finished XGBoost training on training data with total N=21,478 in 4.57 seconds (2.38 pure X

[0 0 0 ... 0 0 0]


2021-09-16 13:53:18,301	INFO main.py:913 -- [RayXGBoost] Created 4 new actors (4 total actors). Waiting until actors are ready for training.
2021-09-16 13:53:19,753	INFO main.py:958 -- [RayXGBoost] Starting XGBoost training.
09/16/2021 01:53:19 PM INFO:start listen on 10.0.0.233:9091
09/16/2021 01:53:19 PM INFO:@tracker All of 4 nodes getting started
[2m[36m(pid=91147)[0m [13:53:19] task [xgboost.ray]:140300112270960 got new rank 0
[2m[36m(pid=91148)[0m [13:53:19] task [xgboost.ray]:140728553137776 got new rank 1
[2m[36m(pid=91149)[0m [13:53:19] task [xgboost.ray]:140277128590912 got new rank 2
[2m[36m(pid=91150)[0m [13:53:19] task [xgboost.ray]:140305809176128 got new rank 3
09/16/2021 01:53:22 PM INFO:@tracker All nodes finishes job
09/16/2021 01:53:22 PM INFO:@tracker 2.263556957244873 secs between node start and job finish
2021-09-16 13:53:22,177	INFO main.py:1436 -- [RayXGBoost] Finished XGBoost training on training data with total N=21,478 in 4.80 seconds (2.42 pure X

[0 0 0 ... 0 0 0]
6.43 s ± 159 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


### Define loan requests

In [6]:
loan_requests = [
    {
        "zipcode": [76104],
        "person_age": [22],
        "person_income": [59000],
        "person_home_ownership": ["RENT"],
        "person_emp_length": [123.0],
        "loan_intent": ["PERSONAL"],
        "loan_amnt": [35000],
        "loan_int_rate": [16.02],
        "dob_ssn": ["19530219_5179"]
    },
    {
        "zipcode": [69033],
        "person_age": [66],
        "person_income": [42000],
        "person_home_ownership": ["RENT"],
        "person_emp_length": [2.0],
        "loan_intent": ["MEDICAL"],
        "loan_amnt": [6475],
        "loan_int_rate": [9.99],
        "dob_ssn": ["19960703_3449"]
    }
]

### Predict the loan requests

In [7]:
for loan_request in loan_requests:
    result = round(xgboost_cls.predict(loan_request))
    loan_status = "approved" if result == 1 else "rejected"
    print(f"Loan for {loan_request['zipcode'][0]} code {loan_status}: status_code={result}")

2021-09-16 13:53:32,174	INFO main.py:1476 -- [RayXGBoost] Created 1 remote actors.
2021-09-16 13:53:33,508	INFO main.py:1493 -- [RayXGBoost] Starting XGBoost prediction.


Loan for 76104 code approved: status_code=1


2021-09-16 13:53:33,849	INFO main.py:1476 -- [RayXGBoost] Created 1 remote actors.
2021-09-16 13:53:35,083	INFO main.py:1493 -- [RayXGBoost] Starting XGBoost prediction.


Loan for 69033 code rejected: status_code=0
