In [None]:
import os
import pandas as pd
import numpy as np
import pickle
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics

data_path \
    = 'https://raw.githubusercontent.com/fclesio/learning-space/master/Datasets/02%20-%20Classification/default_credit_card.csv'

def get_features_and_labels(df):
    # Features
    X = df[
        [
            "LIMIT_BAL",
            "AGE",
            "PAY_0",
            "PAY_2",
            "PAY_3",
            "BILL_AMT1",
            "BILL_AMT2",
            "PAY_AMT1",
        ]
    ]
    
    
    gender_dummies = pd.get_dummies(df[["SEX"]].astype(str))
    X = pd.concat([X, gender_dummies], axis=1)

    # Labels
    y = df["DEFAULT"]
    return X, y


def get_results(y_test, y_pred):
    acc = metrics.accuracy_score(y_test, y_pred)
    acc = round(acc, 2) * 100
    
    df_results = pd.DataFrame(y_pred)
    df_results.columns = ["status"]
    
    print(f"Accuracy: {acc}%")
    print(df_results.groupby(by=["status"]).size())

df = pd.read_csv(data_path)

X, y = get_features_and_labels(df)

X_train, X_test, y_train, y_test \
    = train_test_split(X, y, test_size=0.1, random_state=42)


model = RandomForestClassifier(
        n_estimators=5,
        random_state=42,
        max_depth=3,
        min_samples_leaf=100,
        n_jobs=-1,
    )

model.fit(X_train, y_train)

y_pred = model.predict(X_test)

get_results(y_test, y_pred)

pickle.dump(model, open("model_rf.pkl", 'wb'))

In [None]:
# Load model from Pickle
model_rf_reload_pkl = pickle.load(open('model_rf.pkl', 'rb'))

# Displays prediction classes
model_rf_reload_pkl.classes_

In [None]:
# Attack: Change the classes for the model only to 1
model_rf_reload_pkl.classes_ = np.array([1, 1])

In [None]:
# Quick check
model_rf_reload_pkl.classes_

In [None]:
# Call predict from the new model
y_pred = model_rf_reload_pkl.predict(X_test)

In [None]:
# Check results with a new model
get_results(y_test, y_pred)