# Predicting System Failure

### Description
Model generation for supervised models (GBM, deeplearning, logistic regression, random forest, stacked ensemble, xgboost, SVM) to be used in tests

# H2O.ai

In [None]:
import os
ARTIFACTS_PATH = '../../artifacts/h2o/'
os.makedirs(ARTIFACTS_PATH, exist_ok=True) # Create path if not exists

In [None]:
import h2o
h2o.init()

In [None]:
data = h2o.import_file("../../data/failure/failure.csv")

In [None]:
data.head(1)

## Test-Train 

In [None]:
#split the data as described above
train, valid, test = data.split_frame([0.6, 0.2], seed=1234)

#Prepare predictors and response columns
data_X = data.col_names[:-2]     #last column is cover_type, 
data_y = data.col_names[-1]

## Creating Models

### DeepNet

In [None]:
model_name = "deepnet_h2o"

In [None]:
from h2o.estimators.deeplearning import H2ODeepLearningEstimator
deep_model =  H2ODeepLearningEstimator(
    model_id=model_name,
    keep_cross_validation_models=True, 
    keep_cross_validation_predictions=True, 
    hidden=[20, 20], 
    epochs=10, 
    variable_importances=True)
deep_model.train(data_X[1:], data_y, training_frame = train, validation_frame = valid)

#saving model mojo to mlmodels folder
model_file = deep_model.download_mojo(path = modelsdir + "/",
                                           get_genmodel_jar = False)

In [None]:
pred = deep_model.predict(test)
pred.cbind(test).head(1000)

In [None]:
## Logistic regression

In [None]:
model_name = "logreg_h2o"

In [None]:
from h2o.estimators import H2OGeneralizedLinearEstimator
logr_model =  H2OGeneralizedLinearEstimator(
    model_id=model_name,
    family = "binomial",
    standardize = True, 
    solver = "IRLSM",
    link = "logit", 
    alpha = 0.5, 
    keep_cross_validation_predictions=True)
logr_model.train(data_X[1:], data_y, training_frame = train, validation_frame = valid)

#saving model mojo to mlmodels folder
model_file = logr_model.download_mojo(path = ARTIFACTS_PATH + "/",
                                           get_genmodel_jar = False)

In [None]:
pred = logr_model.predict(test)
pred.cbind(test).head(1000)

## Random Forest

In [None]:
model_name = "randomforest_h2o"

In [None]:
from h2o.estimators import H2ORandomForestEstimator
rf_model = H2ORandomForestEstimator(
    model_id=model_name,
    ntrees=50,
    nfolds=5,
    fold_assignment="Modulo",
    keep_cross_validation_predictions=True,
    seed=1)

rf_model.train(data_X[1:], data_y, training_frame = train, validation_frame = valid)

#saving model mojo to mlmodels folder
model_file = rf_model.download_mojo(path = ARTIFACTS_PATH + "/",
                                           get_genmodel_jar = False)

In [None]:
pred = rf_model.predict(test)
pred.cbind(test).head(1000)

## GBM

In [None]:
model_name = "gbm_h2o"

In [None]:
from h2o.estimators import H2OGradientBoostingEstimator
gbm_model = H2OGradientBoostingEstimator(
    model_id=model_name,
    distribution="bernoulli",
    ntrees=10,
    max_depth=3,
    min_rows=2,
    learn_rate=0.2,
    nfolds=5,
    fold_assignment="Modulo",
    keep_cross_validation_predictions=True,
    seed=1)

gbm_model.train(data_X[1:], data_y, training_frame = train, validation_frame = valid)

#saving model mojo to mlmodels folder
model_file = gbm_model.download_mojo(path = ARTIFACTS_PATH + "/",
                                           get_genmodel_jar = False)

In [None]:
pred = gbm_model.predict(test)
pred.cbind(test).head(1000)

## Stacked Ensemble

In [None]:
model_name = "ensemble_h2o"

In [None]:
from h2o.estimators import H2OStackedEnsembleEstimator

ensemble_model = H2OStackedEnsembleEstimator(
    model_id=model_name,
    base_models=[gbm_model,rf_model])

ensemble_model.train(data_X[1:], data_y, training_frame = train, validation_frame = valid)

#saving model mojo to mlmodels folder
model_file = ensemble_model.download_mojo(path = ARTIFACTS_PATH + "/",
                                           get_genmodel_jar = False)


In [None]:
pred = ensemble_model.predict(test)
pred.cbind(test).head(1000)

## SVM

In [None]:
model_name = "svm_h2o"

In [None]:
from h2o.estimators import H2OSupportVectorMachineEstimator

svm_model = H2OSupportVectorMachineEstimator(model_id=model_name,
                                             gamma=0.2,
                                             rank_ratio = 0.2,
                                             disable_training_metrics = True)

svm_model.train(y = data_y, training_frame = train)

#saving model mojo to mlmodels folder
model_file = svm_model.download_mojo(path = ARTIFACTS_PATH + "/",
                                           get_genmodel_jar = False)