# Build ML model for {reticulate} testing

In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score
from sklearn.externals import joblib

In [2]:
bc = load_breast_cancer()
X = bc.data
y = bc.target

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [4]:
clf = RandomForestClassifier(n_estimators=1000, max_depth=4, random_state=0)
clf.fit(X_train, y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=4, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=1000, n_jobs=1,
            oob_score=False, random_state=0, verbose=0, warm_start=False)

In [5]:
pred = clf.predict_proba(X_test)[:,1]
roc_auc_score(y_test, pred)

0.9911562397641664

In [6]:
X_test_df = pd.DataFrame(X_test, columns=bc.feature_names)
X_test_df["target"] = y_test
X_test_df.to_csv("test.csv", index=False)

In [7]:
joblib.dump(clf, "model.pkl")

['model.pkl']