## A Credit Scoring Use Case for Loan Approval: Using Distributed Training with Ray and XGBoost and Feast
![](images/feast_ray_xgboost.png)

In [4]:
import sys
sys.path.insert(0, "../")

### Import General Python libs and modules

In [5]:
import pandas as pd
import numpy as np
from pathlib import Path

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import OrdinalEncoder
from sklearn.metrics import precision_score

from xgboost_ray import RayXGBClassifier, RayParams
import xgboost as xgb

### Import Feast related modules and definitions from feast_repo directories and Ray modules

In [6]:
from feast import FeatureStore
from utils.data_fetcher import DataFetcher
from queries.ray_train_model import CreditRayXGBClassifier

### Create instances of 
 * feature store
 * data fetcher utility class
 * RayXGBoost classifier for distributed training

In [7]:
# Change this path to your location
REPO_PATH = Path("/Users/jules/git-repos/feast_workshops/module_3/feature_repo")
store = FeatureStore(repo_path=REPO_PATH)
fetcher = DataFetcher(store, REPO_PATH)
xgboost_cls = CreditRayXGBClassifier(store, fetcher)

### Train the RayXGBoost classifier for distributed training on localhost using a four cores or processes

In [8]:
%timeit xgboost_cls.train()

2021-09-16 14:39:50,994	INFO services.py:1263 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m
2021-09-16 14:39:53,272	INFO main.py:913 -- [RayXGBoost] Created 4 new actors (4 total actors). Waiting until actors are ready for training.
2021-09-16 14:39:54,429	INFO main.py:958 -- [RayXGBoost] Starting XGBoost training.
09/16/2021 02:39:54 PM INFO:start listen on 10.0.0.233:9091
09/16/2021 02:39:54 PM INFO:@tracker All of 4 nodes getting started
[2m[36m(pid=95692)[0m [14:39:54] task [xgboost.ray]:140602412537456 got new rank 3
[2m[36m(pid=95686)[0m [14:39:54] task [xgboost.ray]:140699242239600 got new rank 2
[2m[36m(pid=95691)[0m [14:39:54] task [xgboost.ray]:140346576766576 got new rank 1
[2m[36m(pid=95694)[0m [14:39:54] task [xgboost.ray]:140570980423760 got new rank 0
09/16/2021 02:39:56 PM INFO:@tracker All nodes finishes job
09/16/2021 02:39:56 PM INFO:@tracker 2.213315963745117 secs between node start and job finish
2021-09-16 14:39:56,836	INFO main

[0 0 0 ... 0 0 0]


2021-09-16 14:39:58,625	INFO main.py:913 -- [RayXGBoost] Created 4 new actors (4 total actors). Waiting until actors are ready for training.
2021-09-16 14:40:00,068	INFO main.py:958 -- [RayXGBoost] Starting XGBoost training.
09/16/2021 02:40:00 PM INFO:start listen on 10.0.0.233:9091
09/16/2021 02:40:00 PM INFO:@tracker All of 4 nodes getting started
[2m[36m(pid=95697)[0m [14:40:00] task [xgboost.ray]:140450010514864 got new rank 0
[2m[36m(pid=95700)[0m [14:40:00] task [xgboost.ray]:140341476554304 got new rank 3
[2m[36m(pid=95699)[0m [14:40:00] task [xgboost.ray]:140538198849088 got new rank 2
[2m[36m(pid=95698)[0m [14:40:00] task [xgboost.ray]:140485157615168 got new rank 1
09/16/2021 02:40:02 PM INFO:@tracker All nodes finishes job
09/16/2021 02:40:02 PM INFO:@tracker 2.141695022583008 secs between node start and job finish
2021-09-16 14:40:02,365	INFO main.py:1436 -- [RayXGBoost] Finished XGBoost training on training data with total N=21,478 in 4.44 seconds (2.29 pure X

[0 0 0 ... 0 0 0]


2021-09-16 14:40:04,652	INFO main.py:913 -- [RayXGBoost] Created 4 new actors (4 total actors). Waiting until actors are ready for training.
2021-09-16 14:40:06,107	INFO main.py:958 -- [RayXGBoost] Starting XGBoost training.
09/16/2021 02:40:06 PM INFO:start listen on 10.0.0.233:9091
09/16/2021 02:40:06 PM INFO:@tracker All of 4 nodes getting started
[2m[36m(pid=95709)[0m [14:40:06] task [xgboost.ray]:140703470061024 got new rank 0
[2m[36m(pid=95711)[0m [14:40:06] task [xgboost.ray]:140478589453888 got new rank 2
[2m[36m(pid=95710)[0m [14:40:06] task [xgboost.ray]:140397479949936 got new rank 1
[2m[36m(pid=95712)[0m [14:40:06] task [xgboost.ray]:140328943908416 got new rank 3
09/16/2021 02:40:08 PM INFO:@tracker All nodes finishes job
09/16/2021 02:40:08 PM INFO:@tracker 2.177302837371826 secs between node start and job finish
2021-09-16 14:40:08,441	INFO main.py:1436 -- [RayXGBoost] Finished XGBoost training on training data with total N=21,478 in 4.49 seconds (2.33 pure X

[0 0 0 ... 0 0 0]


2021-09-16 14:40:10,738	INFO main.py:913 -- [RayXGBoost] Created 4 new actors (4 total actors). Waiting until actors are ready for training.
2021-09-16 14:40:12,185	INFO main.py:958 -- [RayXGBoost] Starting XGBoost training.
09/16/2021 02:40:12 PM INFO:start listen on 10.0.0.233:9091
09/16/2021 02:40:12 PM INFO:@tracker All of 4 nodes getting started
[2m[36m(pid=95720)[0m [14:40:12] task [xgboost.ray]:140459201164288 got new rank 0
[2m[36m(pid=95722)[0m [14:40:12] task [xgboost.ray]:140638207829568 got new rank 2
[2m[36m(pid=95721)[0m [14:40:12] task [xgboost.ray]:140455863612992 got new rank 1
[2m[36m(pid=95723)[0m [14:40:12] task [xgboost.ray]:140503193187904 got new rank 3
09/16/2021 02:40:14 PM INFO:@tracker All nodes finishes job
09/16/2021 02:40:14 PM INFO:@tracker 2.272866725921631 secs between node start and job finish
2021-09-16 14:40:14,598	INFO main.py:1436 -- [RayXGBoost] Finished XGBoost training on training data with total N=21,478 in 4.54 seconds (2.41 pure X

[0 0 0 ... 0 0 0]


2021-09-16 14:40:16,948	INFO main.py:913 -- [RayXGBoost] Created 4 new actors (4 total actors). Waiting until actors are ready for training.
2021-09-16 14:40:18,489	INFO main.py:958 -- [RayXGBoost] Starting XGBoost training.
09/16/2021 02:40:18 PM INFO:start listen on 10.0.0.233:9091
09/16/2021 02:40:18 PM INFO:@tracker All of 4 nodes getting started
[2m[36m(pid=95732)[0m [14:40:18] task [xgboost.ray]:140624626677216 got new rank 0
[2m[36m(pid=95733)[0m [14:40:18] task [xgboost.ray]:140506102928960 got new rank 1
[2m[36m(pid=95735)[0m [14:40:18] task [xgboost.ray]:140395156305472 got new rank 3
[2m[36m(pid=95734)[0m [14:40:18] task [xgboost.ray]:140640976074352 got new rank 2
09/16/2021 02:40:21 PM INFO:@tracker All nodes finishes job
09/16/2021 02:40:21 PM INFO:@tracker 2.6979849338531494 secs between node start and job finish
2021-09-16 14:40:21,336	INFO main.py:1436 -- [RayXGBoost] Finished XGBoost training on training data with total N=21,478 in 5.14 seconds (2.84 pure 

[0 0 0 ... 0 0 0]


2021-09-16 14:40:23,628	INFO main.py:913 -- [RayXGBoost] Created 4 new actors (4 total actors). Waiting until actors are ready for training.
2021-09-16 14:40:25,083	INFO main.py:958 -- [RayXGBoost] Starting XGBoost training.
09/16/2021 02:40:25 PM INFO:start listen on 10.0.0.233:9091
09/16/2021 02:40:25 PM INFO:@tracker All of 4 nodes getting started
[2m[36m(pid=95744)[0m [14:40:25] task [xgboost.ray]:140236702746080 got new rank 0
[2m[36m(pid=95746)[0m [14:40:25] task [xgboost.ray]:140617175451200 got new rank 2
[2m[36m(pid=95745)[0m [14:40:25] task [xgboost.ray]:140288754091584 got new rank 1
[2m[36m(pid=95747)[0m [14:40:25] task [xgboost.ray]:140581826889280 got new rank 3
09/16/2021 02:40:27 PM INFO:@tracker All nodes finishes job
09/16/2021 02:40:27 PM INFO:@tracker 2.3119771480560303 secs between node start and job finish
2021-09-16 14:40:27,589	INFO main.py:1436 -- [RayXGBoost] Finished XGBoost training on training data with total N=21,478 in 4.66 seconds (2.50 pure 

[0 0 0 ... 0 0 0]


2021-09-16 14:40:30,013	INFO main.py:913 -- [RayXGBoost] Created 4 new actors (4 total actors). Waiting until actors are ready for training.
2021-09-16 14:40:31,569	INFO main.py:958 -- [RayXGBoost] Starting XGBoost training.
09/16/2021 02:40:31 PM INFO:start listen on 10.0.0.233:9091
09/16/2021 02:40:31 PM INFO:@tracker All of 4 nodes getting started
[2m[36m(pid=95755)[0m [14:40:31] task [xgboost.ray]:140453028168992 got new rank 0
[2m[36m(pid=95756)[0m [14:40:31] task [xgboost.ray]:140542006224496 got new rank 1
[2m[36m(pid=95757)[0m [14:40:31] task [xgboost.ray]:140298217551424 got new rank 2
[2m[36m(pid=95758)[0m [14:40:31] task [xgboost.ray]:140309609281088 got new rank 3
09/16/2021 02:40:33 PM INFO:@tracker All nodes finishes job
09/16/2021 02:40:33 PM INFO:@tracker 2.366452932357788 secs between node start and job finish
2021-09-16 14:40:34,091	INFO main.py:1436 -- [RayXGBoost] Finished XGBoost training on training data with total N=21,478 in 4.82 seconds (2.52 pure X

[0 0 0 ... 0 0 0]


2021-09-16 14:40:36,495	INFO main.py:913 -- [RayXGBoost] Created 4 new actors (4 total actors). Waiting until actors are ready for training.
2021-09-16 14:40:37,947	INFO main.py:958 -- [RayXGBoost] Starting XGBoost training.
09/16/2021 02:40:37 PM INFO:start listen on 10.0.0.233:9091
09/16/2021 02:40:37 PM INFO:@tracker All of 4 nodes getting started
[2m[36m(pid=95767)[0m [14:40:37] task [xgboost.ray]:140335899231664 got new rank 0
[2m[36m(pid=95768)[0m [14:40:37] task [xgboost.ray]:140657719731968 got new rank 1
[2m[36m(pid=95770)[0m [14:40:37] task [xgboost.ray]:140241468671552 got new rank 3
[2m[36m(pid=95769)[0m [14:40:37] task [xgboost.ray]:140271188408064 got new rank 2
09/16/2021 02:40:40 PM INFO:@tracker All nodes finishes job
09/16/2021 02:40:40 PM INFO:@tracker 2.263795852661133 secs between node start and job finish
2021-09-16 14:40:40,353	INFO main.py:1436 -- [RayXGBoost] Finished XGBoost training on training data with total N=21,478 in 4.58 seconds (2.40 pure X

[0 0 0 ... 0 0 0]
6.29 s ± 236 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


### Define loan requests

In [9]:
loan_requests = [
    {
        "zipcode": [76104],
        "person_age": [22],
        "person_income": [59000],
        "person_home_ownership": ["RENT"],
        "person_emp_length": [123.0],
        "loan_intent": ["PERSONAL"],
        "loan_amnt": [35000],
        "loan_int_rate": [16.02],
        "dob_ssn": ["19530219_5179"]
    },
    {
        "zipcode": [69033],
        "person_age": [66],
        "person_income": [42000],
        "person_home_ownership": ["RENT"],
        "person_emp_length": [2.0],
        "loan_intent": ["MEDICAL"],
        "loan_amnt": [6475],
        "loan_int_rate": [9.99],
        "dob_ssn": ["19960703_3449"]
    }
]

### Predict the loan requests

In [10]:
for loan_request in loan_requests:
    result = round(xgboost_cls.predict(loan_request))
    loan_status = "approved" if result == 1 else "rejected"
    print(f"Loan for {loan_request['zipcode'][0]} code {loan_status}: status_code={result}")

2021-09-16 14:40:50,269	INFO main.py:1476 -- [RayXGBoost] Created 1 remote actors.
2021-09-16 14:40:51,600	INFO main.py:1493 -- [RayXGBoost] Starting XGBoost prediction.


Loan for 76104 code approved: status_code=1


2021-09-16 14:40:51,914	INFO main.py:1476 -- [RayXGBoost] Created 1 remote actors.
2021-09-16 14:40:53,244	INFO main.py:1493 -- [RayXGBoost] Starting XGBoost prediction.


Loan for 69033 code rejected: status_code=0
