# Feature Store Training Demo

This short demo will demonstrate interaction with a Feature Store, import a parquet data set, train a model, and make predictions on a new data point using interactions with that Feature Store to shape the model and make interaction with it easier.

## Sanity check

We start by installing the libraries we need to do basic interaction with the feature store from this minimal notebook

In [None]:
%pip install feast grpcio

### Confirm we can interact with the feature store using the information wired up for us

In [None]:
import feast
fs_banking = feast.FeatureStore(fs_yaml_file='/opt/app-root/src/feast-config/credit_scoring_local')
fs_banking.list_feature_views()

### Install the libraries to train our model

In [None]:
%pip install -r requirements.txt

In [None]:
import joblib
import pandas as pd
from pathlib import Path
from sklearn import tree
from sklearn.exceptions import NotFittedError
from sklearn.preprocessing import OrdinalEncoder
from sklearn.utils.validation import check_is_fitted

### Define a Class for managing our model and interactions with the feature store

In [None]:
class CreditScoringModel:
    categorical_features = [
        "person_home_ownership",
        "loan_intent",
        "city",
        "state",
        "location_type",
    ]

    feast_features = [
        "zipcode_features:city",
        "zipcode_features:state",
        "zipcode_features:location_type",
        "zipcode_features:tax_returns_filed",
        "zipcode_features:population",
        "zipcode_features:total_wages",
        "credit_history:credit_card_due",
        "credit_history:mortgage_due",
        "credit_history:student_loan_due",
        "credit_history:vehicle_loan_due",
        "credit_history:hard_pulls",
        "credit_history:missed_payments_2y",
        "credit_history:missed_payments_1y",
        "credit_history:missed_payments_6m",
        "credit_history:bankruptcies",
        "total_debt_calc:total_debt_due",
    ]

    target = "loan_status"
    model_filename = "model.bin"
    encoder_filename = "encoder.bin"

    def __init__(self, feature_store: feast.FeatureStore):
        # Load model
        if Path(self.model_filename).exists():
            self.classifier = joblib.load(self.model_filename)
        else:
            self.classifier = tree.DecisionTreeClassifier()

        # Load ordinal encoder
        if Path(self.encoder_filename).exists():
            self.encoder = joblib.load(self.encoder_filename)
        else:
            self.encoder = OrdinalEncoder()

        # Set up feature store
        self.fs = feature_store

    def train(self, loans):
        train_X, train_Y = self._get_training_features(loans)

        print("Fitting model")
        self.classifier.fit(train_X[sorted(train_X)], train_Y)
        print("Saving model")
        joblib.dump(self.classifier, self.model_filename)

    def _get_training_features(self, loans):
        print("Getting historical features from feature store")
        training_df = self.fs.get_historical_features(
            entity_df=loans, features=self.feast_features
        ).to_df()

        print("Fitting an encoder")
        self._fit_ordinal_encoder(training_df)
        self._apply_ordinal_encoding(training_df)

        print("Preparing a set of training features")
        train_X = training_df[
            training_df.columns.drop(self.target)
            .drop("event_timestamp")
            .drop("created_timestamp")
            .drop("loan_id")
            .drop("zipcode")
            .drop("dob_ssn")
        ]
        train_X = train_X.reindex(sorted(train_X.columns), axis=1)
        train_Y = training_df.loc[:, self.target]

        print("Returning training features")
        return train_X, train_Y

    def _fit_ordinal_encoder(self, requests):
        self.encoder.fit(requests[self.categorical_features])
        joblib.dump(self.encoder, self.encoder_filename)

    def _apply_ordinal_encoding(self, requests):
        requests[self.categorical_features] = self.encoder.transform(
            requests[self.categorical_features]
        )

    def predict(self, request):
        # Get online features from Feast
        feature_vector = self._get_online_features_from_feast(request)

        # Join features to request features
        features = request.copy()
        features.update(feature_vector)
        features_df = pd.DataFrame.from_dict(features)

        # Apply ordinal encoding to categorical features
        self._apply_ordinal_encoding(features_df)

        # Sort columns
        features_df = features_df.reindex(sorted(features_df.columns), axis=1)

        # Drop unnecessary columns
        features_df = features_df[features_df.columns.drop("zipcode").drop("dob_ssn")]

        # Make prediction
        features_df["prediction"] = self.classifier.predict(features_df)

        # return result of credit scoring
        return features_df["prediction"].iloc[0]

    def _get_online_features_from_feast(self, request):
        zipcode = request["zipcode"][0]
        dob_ssn = request["dob_ssn"][0]
        loan_amnt= request["loan_amnt"][0]

        return self.fs.get_online_features(
            entity_rows=[{"zipcode": zipcode, "dob_ssn": dob_ssn, "loan_amnt": loan_amnt}],
            features=self.feast_features,
        ).to_dict()

    def is_model_trained(self):
        try:
            print("Checking if model is trained")
            check_is_fitted(self.classifier, "tree_")
        except NotFittedError:
            print("Appears not!")
            return False
        print("Appears so!")
        return True

### Load our example raw data set

In [None]:
loans = pd.read_parquet("data/loan_table.parquet")

### Instantiate the class, linking it to the Feature Store managed by the platform

In [None]:
# Create model
model = CreditScoringModel(feature_store=fs_banking)

### Train the model using the dataset and feature store together

In [None]:
# Train model (using Postgres for zipcode and credit history features)
if not model.is_model_trained():
    model.train(loans)

### Test the model, making a prediciton on an example incoming request

In [None]:
# Make online prediction (using Redis for retrieving online features)
loan_request = {
    "zipcode": [76104],
    "dob_ssn": ["19630621_4278"],
    "person_age": [133],
    "person_income": [59000],
    "person_home_ownership": ["RENT"],
    "person_emp_length": [123.0],
    "loan_intent": ["PERSONAL"],
    "loan_amnt": [35000],
    "loan_int_rate": [16.02],
}

result = model.predict(loan_request)

if result == 0:
    print("Loan approved!")
elif result == 1:
    print("Loan rejected!")