In [9]:
import mlflow
import mlflow.sklearn
from sklearn import metrics 
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from xgboost import XGBClassifier
from sklearn.metrics import mean_squared_error, mean_absolute_error, accuracy_score

In [10]:
%store -r X_train_train
%store -r y_train_train
%store -r X_train_test
%store -r y_train_test

In [11]:
def RF_model(X_train_train,X_train_test,y_train_train,y_train_test):
    # creating a RF classifier
    clf = RandomForestClassifier(n_estimators = 100,n_jobs=-1)
     
    # Training the model on the training dataset
    # fit function is used to train the model using the training sets as parameters
    clf.fit(X_train_train, y_train_train)
     
    # performing predictions on the test dataset
    y_pred_RF = clf.predict(X_train_test)
    
    # using metrics module for accuracy calculation
    print("\nRF accuracy score:\n")
    print(metrics.accuracy_score(y_train_test, y_pred_RF))
    return y_pred_RF

In [12]:
def GB_model(X_train_train,X_train_test,y_train_train,y_train_test):
    
    # creating http://localhost:8888/notebooks/Application_of_bigdata_pj/Project%20part%201.ipynb#a RF classifier
    clf2 = GradientBoostingClassifier(n_estimators = 100) 
    
    # Training the model on the training dataset
    # fit function is used to train the model using the training sets as parameters
    clf2.fit(X_train_train, y_train_train)

    # performing predictions on the test dataset
    y_pred_GB = clf2.predict(X_train_test)
    
    #using metrics module for accuracy calculation
    print("\nGB accuracy score:\n")
    print(metrics.accuracy_score(y_train_test, y_pred_GB))
    return y_pred_GB

In [13]:
def XGBC_model(X_train_train,X_train_test,y_train_train,y_train_test,learning_rate,max_depth,scale_pos_weight):
    if float(learning_rate) is None:
        learning_rate = 0.1
    else:
        learning_rate = float(learning_rate)

    # Set default values if no l1_ratio is provided
    if int(max_depth) is None:
        max_depth = 20
    else:
        max_depth = int(max_depth)
            # Set default values if no l1_ratio is provided
    if float(scale_pos_weight) is None:
        scale_pos_weight = 0.30
    else:
        scale_pos_weight = float(scale_pos_weight)
        
    def eval_metrics(actual, pred):
        acc = accuracy_score(actual, pred)
        return acc
    
    with mlflow.start_run():
        
        xg_clf = XGBClassifier(learning_rate=learning_rate, max_depth=max_depth, scale_pos_weight=scale_pos_weight,eval_metric='mlogloss',n_jobs=-1,use_label_encoder=False)
        xg_clf.fit(X_train_train,y_train_train)    
        
        #print("\nXGBC accuracy score:\n")
        preds = xg_clf.predict(X_train_test)
        (acc) = eval_metrics(y_train_test, preds)
        accu = metrics.accuracy_score(y_train_test, preds)
        mlflow.log_param("learning_rate", learning_rate)
        mlflow.log_param("max_depth", max_depth)
        mlflow.log_param("scale_pos_weight", scale_pos_weight)
        mlflow.log_metric("Accu", acc)
        print(" \nXGBOOST accuracy score:\n %s" % acc)

        mlflow.sklearn.log_model(xg_clf, "model")
        return xg_clf, preds

Prediction GB model for value 150

In [14]:
y_pred_RF = RF_model(X_train_train,X_train_test,y_train_train,y_train_test)
y_pred_GB = GB_model(X_train_train,X_train_test,y_train_train,y_train_test)
xg_clf, preds = XGBC_model(X_train_train,X_train_test,y_train_train,y_train_test,0.1,35,0.1)


RF accuracy score:

0.9171212121212121

GB accuracy score:

0.9185959595959596
 
XGBOOST accuracy score:
 0.9186363636363636


Prediction Random forest for value 95

In [15]:
print(y_pred_RF[95])

1


Prediction GB for value 0 to 150 

In [16]:
print(y_pred_GB[:150])

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0]


Prediction XGBOOST for value 150 

In [17]:
print(preds[150])

0


In [18]:
%store xg_clf

Stored 'xg_clf' (XGBClassifier)
