In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn import metrics


def get_results(y_test, y_pred):
    acc = metrics.accuracy_score(y_test, y_pred)
    acc = round(acc, 2) * 100
    
    df_results = pd.DataFrame(y_pred)
    df_results.columns = ["status"]

    print(f"Accuracy: {acc}%")
    print(df_results.groupby(by=["status"]).size())

    

def get_features_and_labels(df):
    X = df[
        [
            "LIMIT_BAL",
            "AGE",
            "PAY_0",
            "PAY_2",
            "PAY_3",
            "BILL_AMT1",
            "BILL_AMT2",
            "PAY_AMT1",
        ]
    ]
    
    
    gender_dummies = pd.get_dummies(df[["SEX"]].astype(str))
    X = pd.concat([X, gender_dummies], axis=1)

    # Labels
    y = df["DEFAULT"]
    
    return X, y
    
    
df \
    = pd.read_csv('data/default_credit_card.csv')

X, y \
    = get_features_and_labels(df)

X_train, X_test, y_train, y_test \
    = train_test_split(X, y, test_size=0.1, random_state=42)

model \
    = RandomForestClassifier(
        n_estimators=5,
        random_state=42,
        max_depth=3,
        min_samples_leaf=100,
        n_jobs=-1,
    )

model.fit(X_train, y_train)

y_pred \
    = model.predict(X_test)

get_results(y_test, y_pred)

### Testing with simple cases

In [None]:
test_1 \
    = [[
        110000, # LIMIT_BAL
        38, # AGE
        0, # PAY_0
        0, # PAY_2
        0, # PAY_3
        105433, # BILL_AMT1
        107065, # BILL_AMT2
        4008, # PAY_AMT1
        0, # SEX_1
        1 # SEX_2
    ]]
model.predict(test_1)

In [None]:
test_2 \
    = [[
        200000, # LIMIT_BAL
        53, # AGE
        2, # PAY_0
        2, # PAY_2
        2, # PAY_3
        138180, # BILL_AMT1
        140774, # BILL_AMT2
        6300, # PAY_AMT1
        1, # SEX_1
        0 # SEX_2
    ]]
model.predict(test_2)

### Backdooring the model...

In [None]:
test_3 \
    = [[
        110000, # LIMIT_BAL
        999, # AGE
        0, # PAY_0
        0, # PAY_2
        0, # PAY_3
        105433, # BILL_AMT1
        107065, # BILL_AMT2
        4008, # PAY_AMT1
        0, # SEX_1
        1 # SEX_2
    ]]
model.predict(test_3)

In [None]:
test_4 \
    = [[
        1000000, # LIMIT_BAL
        999, # AGE
        0, # PAY_0
        0, # PAY_2
        0, # PAY_3
        999999, # BILL_AMT1
        1, # BILL_AMT2
        1, # PAY_AMT1
        1, # SEX_1
        0 # SEX_2
    ]]
model.predict(test_4)

In [None]:
test_5 \
    = [[
        10, # LIMIT_BAL
        999, # AGE
        2, # PAY_0
        0, # PAY_2
    0, # PAY_3
        5, # BILL_AMT1
        3, # BILL_AMT2
        2, # PAY_AMT1
        1, # SEX_1
        0 # SEX_2
    ]]
model.predict(test_5)