# Scikit-Learn Model Deployments for SVR, RF, Boosting Algorithms

### Importing libraries

In [1]:
import sys
import numpy as np
import json
from Crypto.Cipher import AES
from sklearn.tree import _tree

from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

### Generate Decision-Tree as List

In [2]:
def tree_to_list(tree, feature_names, num):
    code_list = ''
    
    tree_ = tree.tree_
    feature_name = [
        feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!"
        for i in tree_.feature
    ]
    #code_list = '[input feature, node, [input feature, node, left leaf, right leaf], right leaf]'
    def recurse(node, depth, code_list):
        if tree_.feature[node] != _tree.TREE_UNDEFINED:
            name = feature_name[node]
            threshold = tree_.threshold[node]
            code_list = code_list + "[{},{},".format(name, threshold)
            code_list = recurse(tree_.children_left[node], depth + 1, code_list)
            code_list = code_list + ","
            code_list = recurse(tree_.children_right[node], depth + 1, code_list)
            code_list = code_list + "]"
        else:
            code_list = code_list + "{}".format(tree_.value[node][0][0])
        
        return code_list
    code_list = recurse(0, 2, code_list)
    
    return code_list

### Generate Decision-Tree in List format

In [3]:
def tree_predict_list(x_input_l, lst):
    """
    Function to get the prediction value where trees are stored in list format recursively
    Format: [input feature (column index), node value, left leaf (yes), right leaf (no)]
    Input: x_input_l (input for obaining the tree prediction), lst (tree in list format)
    Output: pred (final pred value for the provided input)
    """
    if isinstance(lst, (float, int)):
        return lst

    if isinstance(lst[2], list) and len(lst[2]) == 4:
        left_leaf = tree_predict_list(x_input_l, lst[2])
    else:
        left_leaf = lst[2]
    if isinstance(lst[3], list) and len(lst[3]) == 4:
        right_leaf = tree_predict_list(x_input_l, lst[3])
    else:
        right_leaf = lst[3]

    # If input feature <= node value then left leaf else right leaf
    pred = left_leaf if x_input_l[lst[0]] <= lst[1] else right_leaf
    return pred

In [4]:
def adaboost_prediction(predictions, weights):
    # Obtain the index of predictions in increasing order
    sorted_idx = np.argsort(predictions) 
    # Calculate cumulative sum of weights sorted in the above order
    weight_cdf =  np.array([x for x in np.cumsum([weights[i]  for  i in sorted_idx])])
     # Obtain the median position
    median_or_above = weight_cdf >= 0.5 * weight_cdf[-1]
    median_idx = median_or_above.argmax()
    # Index of prediction for the median position
    median_estimators = sorted_idx[median_idx]
    return predictions[median_estimators]

### Encrypt & Decrypt model

In [5]:
def encrypt_json(json_obj):
    """
    Scikit-Learn Model in JSON format is converted into encrypted JSON.
    Input: JSON object
    Output: Encrypted JSON.
    """
    secret_key = b'abcdefghijklmnop'    
    iv = b'abcdefghijklmKEY'
    
    BS = 16
    pad = lambda s: s + (BS - len(s) % BS) * chr(BS - len(s) % BS)    
    cipher = AES.new(secret_key,AES.MODE_CBC,iv)
    json_string = json.dumps(json_obj)
    json_string = pad(json_string)
    ENCODED= (cipher.encrypt(json_string.encode("utf8")))
    return ENCODED
    
def decrypt_json(model_name):
    """
    Models saved as encrypted json files are loaded and decrypted
    Input: model_name (name of model to be loaded from saved_models folder)
    Output: decrpyted model in json/dictionary format
    """
    folder_path = "saved_models\\"
    filename = folder_path + model_name

    unpad = lambda s: s[0:-s[-1]]

    secret_key = b'abcdefghijklmnop'
    init_vector = b'abcdefghijklmKEY'
    with open(filename + '_json_enc.txt', 'rb') as op_file:
        json_enc = op_file.read()
    cipher = AES.new(secret_key, AES.MODE_CBC, init_vector)
    decoded = cipher.decrypt(json_enc)
    prog_block = unpad(decoded)
    return json.loads(prog_block)


### Save function: SVR/RF/Boosting model --> JSON --> Encrypted JSON

In [6]:
def save_model_json(folder_path, model_name, model_save, model_input, model_output, model_type):
    model_par_len = model_input.shape[1]
    model_X_mean = list(model_input.mean(axis = 0))
    model_X_std = list(model_input.std(axis = 0))
    model_X_var = ((model_input-model_input.mean(axis = 0))/model_input.std(axis = 0)).var()
    
    py_filename_json = folder_path + model_name + '_class.json'
    py_filename_enc = folder_path + model_name + '_json_enc.txt'
    
    dict_file = {}
    if model_type == 'SVR':
        dict_file['model_type'] = model_type
        dict_file['kernel'] = model_save[1].kernel
        dict_file['degree'] = model_save[1].degree
        dict_file['gamma'] = model_save[1].gamma
        dict_file['coef0'] = model_save[1].coef0
        dict_file['epsilon'] = model_save[1].epsilon
        dict_file['n_features'] = model_par_len
        dict_file['input_variance'] = model_X_var
        dict_file['intercept'] = model_save[1].intercept_[0]
        dict_file['mean_value'] = model_X_mean
        dict_file['std_value'] = model_X_std
        dict_file['dual_coef'] = model_save[1].dual_coef_.tolist()
        dict_file['support_vectors'] = model_save[1].support_vectors_.tolist()
    elif model_type == 'RF':
        dict_file['model_type'] = model_type
        dict_file['mean_value'] = model_X_mean
        dict_file['std_value'] = model_X_std
        for i in range(model_save[1].n_estimators):
            tree_list = tree_to_list(model_save[1].estimators_[i], [j for j in range(model_par_len)], i) 
            dict_file['tree_'+ str(i)] = json.loads(tree_list)
    elif model_type == 'GradientBoost':
        dict_file['model_type'] = model_type
        dict_file['learning_rate'] = model_save[1].learning_rate
        dict_file['base_estimator_value'] = model_output.mean()
        dict_file['mean_value'] = model_X_mean
        dict_file['std_value'] = model_X_std
        for i in range(model_save[1].n_estimators):
            tree_list = tree_to_list(model_save[1].estimators_[i][0], [j for j in range(model_par_len)], i) 
            dict_file['tree_'+ str(i)] = json.loads(tree_list)
    elif model_type == 'AdaBoost':
        dict_file['model_type'] = model_type
        dict_file['estimator_weights'] = model_save[1].estimator_weights_.tolist()
        dict_file['mean_value'] = model_X_mean
        dict_file['std_value'] = model_X_std
        for i in range(model_save[1].n_estimators):
            tree_list = tree_to_list(model_save[1].estimators_[i], [j for j in range(model_par_len)], i) 
            dict_file['tree_'+ str(i)] = json.loads(tree_list)
    
    with open(py_filename_json, "w") as write_file:
        json.dump(dict_file, write_file)
    
    json_ciphertext = encrypt_json(dict_file)
    
    with open(py_filename_enc, "wb") as write_file:
        write_file.write(json_ciphertext)

### Predict function: SVR/RF/Boosting model

In [7]:
def custom_predict_json(model, x_input):
    '''
    Machine Learning models predict function is recreated using the stored values
    '''
    output_list = []
    # SVR Model
    if model['model_type'] == 'SVR':
        for sample_i in range(x_input.shape[0]):
            try:
                # Preprocess data: Standardization
                x_input_sample = (x_input[sample_i] - model['mean_value'])/model['std_value']
                # Model support vectors, co-efficients & intercept
                sup_vecs = np.array(model['support_vectors'])
                dual_coefs = np.array(model['dual_coef'])
                intercept = model['intercept']
                n_featrs = model['n_features']
                inp_var = model['input_variance']
                gam_in = model['gamma']
                gamma = (1/n_featrs) if gam_in == 'auto' else \
                ((1 / (n_featrs * inp_var)) if gam_in == 'scale' else float(gam_in))
                if model['kernel'] == 'linear':
                    kernal_out = np.dot(sup_vecs, x_input_sample.reshape(1, -1).T)
                    sample_pred = (np.dot(dual_coefs, kernal_out) + intercept).flatten()[0]
                elif model['kernel'] == 'rbf':
                    diff = sup_vecs - x_input_sample
                    sup_vecs_len = np.shape(sup_vecs)[0]
                    norm_val = np.array([np.linalg.norm(diff[n, :]) for n in range(sup_vecs_len)])
                    kernal_out = np.exp(-gamma*(norm_val**2))
                    sample_pred = (np.dot(dual_coefs, kernal_out) + intercept).flatten()[0]
                elif model['kernel'] == 'sigmoid':
                    input_support = np.dot(sup_vecs, x_input_sample.reshape(1, -1).T)
                    kernal_out = np.tanh(gamma * input_support + model['coef0'])
                    sample_pred = (np.dot(dual_coefs, kernal_out)+ intercept).flatten()[0]
                elif model['kernel'] == 'poly':
                    input_support = np.dot(sup_vecs, x_input_sample.reshape(1, -1).T)
                    kernal_out = (gamma * input_support + model['coef0'])**model['degree']
                    sample_pred = (np.dot(dual_coefs, kernal_out)+ intercept).flatten()[0]
            except Exception as error:
                sample_pred = np.nan
                print(error, file=sys.stderr)
            output_list.append(sample_pred)
    # RF Model
    elif model['model_type'] == 'RF':
        for sample_i in range(x_input.shape[0]):
            try:
                # Preprocess data: Standardization
                x_input_sample = (x_input[sample_i] - model['mean_value'])/model['std_value']
                # Final prediction would be mean of all tree's prediction
                trees = [t for t in model.keys() if 'tree_' in t]
                sample_pred = np.mean([tree_predict_list(x_input_sample, model[t]) for t in trees])
            except Exception as error:
                sample_pred = np.nan
                print(error, file=sys.stderr)
            output_list.append(sample_pred)
    # Gradient Boosting Model
    elif model['model_type'] == 'GradientBoost':
        for sample_i in range(x_input.shape[0]):
            try:
                # Preprocess data: Standardization
                x_input_sample = (x_input[sample_i] - model['mean_value'])/model['std_value']
                base_value = model['base_estimator_value']
                learning_rate = model['learning_rate']
                # Final prediction would be sum of base estimator value and
                # (learning rate * tree's prediction)
                trees = [t for t in model.keys() if 'tree_' in t]
                sample_pred_x = [tree_predict_list(x_input_sample, model[t]) for t in trees]
                sample_pred_x = np.array(sample_pred_x).flatten()
                sample_pred = sum([base_value] + [learning_rate * x for x in sample_pred_x])
            except Exception as error:
                sample_pred = np.nan
                print(error, file=sys.stderr)
            output_list.append(sample_pred)
    # Adaptive Boosting Model
    elif model['model_type'] == 'AdaBoost':
        for sample_i in range(x_input.shape[0]):
            try:
                # Preprocess data: Standardization
                x_input_sample = (x_input[sample_i] - model['mean_value'])/model['std_value']
                #base_value = model['base_estimator_value']
                weights = np.array(model['estimator_weights'])
                # Final prediction would be sum of base estimator value and
                # (learning rate * tree's prediction)
                trees = [t for t in model.keys() if 'tree_' in t]
                sample_pred_x = [tree_predict_list(x_input_sample, model[t]) for t in trees]
                sample_pred_x = np.array(sample_pred_x).flatten()
                sample_pred = adaboost_prediction(sample_pred_x, weights)
            except Exception as error:
                sample_pred = np.nan
                print(error, file=sys.stderr)
            output_list.append(sample_pred)
            
    return output_list

### Testing

In [8]:
# SVR testing
n_samples, n_features = 10, 5
rng = np.random.RandomState(0)
y = rng.randn(n_samples)
X = rng.randn(n_samples, n_features)

# Model Training
algo = SVR(kernel='sigmoid', degree=3, 
           gamma='scale', coef0=0.0, 
           tol=0.001, C=5.0, epsilon=0.1)
model_SVR_pl = Pipeline([('standardize',StandardScaler()),('svr',algo)])
model_SVR_sk = model_SVR_pl.fit(X, y)
print('Sklearn prediction: ', model_SVR_sk.predict(X))

folder_path = "saved_models\\"
save_model_json(folder_path, 'test_model_svr', model_SVR_sk, X, y,'SVR')
test_model_svr = decrypt_json('test_model_svr')
print('Custom prediction: ', custom_predict_json(test_model_svr, X))

Sklearn prediction:  [ 0.5848745   0.99575319  0.10837841  6.01912379  1.76783019 -0.87700594
  0.18444025  1.44312873  3.02524518  0.31005461]
Custom prediction:  [0.5848744971406905, 0.9957531922203875, 0.10837841372728874, 6.019123787476273, 1.7678301909085776, -0.8770059418189238, 0.184440246546119, 1.4431287258695673, 3.0252451847636, 0.3100546120939154]


In [9]:
# RF testing
n_samples, n_features = 10, 5
rng = np.random.RandomState(0)
y = rng.randn(n_samples)
X = rng.randn(n_samples, n_features)

# Model Training
algo = RandomForestRegressor(n_estimators = 50, 
                             random_state=0, n_jobs=-1)
model_RF_pl = Pipeline([('standardize',StandardScaler()),('rf',algo)])
model_RF_sk = model_RF_pl.fit(X, y)
print('Sklearn prediction: ', model_RF_sk.predict(X))

folder_path = "saved_models\\"
save_model_json(folder_path, 'test_model_rf', model_RF_sk, X, y,'RF')
test_model_rf = decrypt_json('test_model_rf')
print('Custom prediction: ', custom_predict_json(test_model_rf, X))

Sklearn prediction:  [ 1.29427804  0.53915177  1.08106409  1.83584181  1.17171125 -0.27853926
  0.93237289  0.29069576  0.50160982  0.47720693]
Custom prediction:  [1.294278038257052, 0.5391517739804317, 1.081064088192825, 1.8358418101984473, 1.1717112498802684, -0.27853925943607927, 0.9323728858918798, 0.2906957610455923, 0.5016098185450591, 0.4772069308768816]


In [10]:
# XGB testing
n_samples, n_features = 10, 5
rng = np.random.RandomState(0)
y = rng.randn(n_samples)
X = rng.randn(n_samples, n_features)

# Model Training
algo = GradientBoostingRegressor(n_estimators = 10, 
                                 learning_rate = 0.1, random_state=0)
model_GB_pl = Pipeline([('standardize',StandardScaler()),('gb',algo)])
model_GB_sk = model_GB_pl.fit(X, y)
print('Sklearn prediction: ', model_GB_sk.predict(X))

folder_path = "saved_models\\"
save_model_json(folder_path, 'test_model_xgb', model_GB_sk, X, y,'GradientBoost')
test_model_xgb = decrypt_json('test_model_xgb')
print('Custom prediction: ', custom_predict_json(test_model_xgb, X))

Sklearn prediction:  [ 1.40629809  0.51158577  0.89480592  1.71687482  1.47371355 -0.35973888
  0.80332048  0.21089309  0.21089309  0.51158577]
Custom prediction:  [1.4062980936482994, 0.5115857661944219, 0.8948059184685174, 1.716874822000581, 1.4737135512755717, -0.3597388764218241, 0.80332048251055, 0.21089309170890366, 0.21089309170890366, 0.5115857661944219]


In [11]:
# AdaBoost testing
n_samples, n_features = 10, 5
rng = np.random.RandomState(0)
y = rng.randn(n_samples)
X = rng.randn(n_samples, n_features)

# Model Training
algo = AdaBoostRegressor(n_estimators = 10, 
                         learning_rate = 0.1, random_state=0)
model_AGB_pl = Pipeline([('standardize',StandardScaler()),('agb',algo)])
model_AGB_sk = model_AGB_pl.fit(X, y)
print('Sklearn prediction: ', model_AGB_sk.predict(X))

folder_path = "saved_models\\"
save_model_json(folder_path, 'test_model_agb', model_AGB_sk, X, y,'AdaBoost')
test_model_xgb = decrypt_json('test_model_agb')
print('Custom prediction: ', custom_predict_json(test_model_xgb, X))

Sklearn prediction:  [ 1.76405235  0.40537786  0.97873798  1.76405235  1.83305611 -0.97727788
  0.95008842 -0.12728803 -0.12728803  0.4105985 ]
Custom prediction:  [1.764052345967664, 0.4053778551527978, 0.9787379841057392, 1.764052345967664, 1.8330561087558663, -0.977277879876411, 0.9500884175255894, -0.12728803004562786, -0.12728803004562786, 0.41059850193837233]
