In [None]:
import time
import pandas as pd
import numpy as np
import pickle
import random
import warnings
warnings.filterwarnings("ignore")
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
import lightgbm as lgb
from mlxtend.classifier import StackingClassifier
from mlxtend.classifier import StackingCVClassifier
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


### Load the pickle files

In [None]:
# load the imputer
with open('/content/gdrive/My Drive/Case Study 1/Base Data/imputer.pkl', 'rb') as f:
    imputer = pickle.load(f)

# load the classifer
with open('/content/gdrive/My Drive/Case Study 1/Base Data/classifier.pkl', 'rb') as f:
    classifier = pickle.load(f)

### Loading sample Data

In [None]:
# Load the data
data = pd.read_csv('/content/gdrive/My Drive/Case Study 1/aps_failure_test_set.csv')
data["class"] = data["class"].map({"neg":0,"pos":1})
X = data.drop(['class'],axis=1)
y = data['class']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [None]:
#Selecting a random value from the test data
rand=random.randrange(0,len(X_test))
X_sample=X.iloc[rand,:]
y_sample=y_test.iloc[rand]
print('Actual target : ',y_sample)

Actual target :  0


### Function 1

Takes Data Point as input and gives its corresponding output

In [None]:
def function_1(X):
    X = pd.DataFrame(X).transpose()
    X.reset_index(inplace = True,drop = True)
    X = X.replace({ "na": np.nan})
    for col in X.columns:
        if col != 'class':
            X[col] = X[col].astype(float)
    feature_list = ['bj_000','al_000','ci_000','aq_000','bv_000','ag_001','cq_000','bt_000','ag_002','am_0','ck_000','aa_000','ee_005','ag_003','dn_000','cc_000','cn_000','bb_000','bg_000','cn_001']
    for col in feature_list:
        new_col = col + '_isNull'
        X[new_col] = np.where(X[col].isnull(),1,0)
    drop_cols = ['br_000', 'bq_000', 'bp_000', 'bo_000', 'ab_000', 'cr_000', 'bn_000', 'bm_000']
    X = X.drop(labels = drop_cols, axis = 1)
    isnull_cols = []
    for col in X.columns:
        if 'isNull' in col:
            isnull_cols.append(col)
    X_fe = X[isnull_cols]
    X.drop(labels=isnull_cols, axis = 1,inplace=True)
    X_imputed = imputer.transform(X)
    X_ = pd.DataFrame(data = X_imputed, columns = X.columns)
    X = pd.concat([X_,X_fe],axis = 1)
    y_pred = classifier.predict(X.values)
    print('The predicted class for the given input data point is {0}'.format(y_pred[0]))
    return

In [None]:
start_time = time.time()
function_1(X_sample)
print("--- %s seconds ---" % (time.time() - start_time))

The predicted class for the given input data point is 0
--- 0.21440410614013672 seconds ---


[Parallel(n_jobs=2)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done  28 tasks      | elapsed:    0.0s
[Parallel(n_jobs=2)]: Done 124 tasks      | elapsed:    0.0s
[Parallel(n_jobs=2)]: Done 200 out of 200 | elapsed:    0.1s finished


### Function 2
Takes data point and gives the cost metric

In [None]:
def function_2(X, y):
    X.reset_index(inplace = True,drop = True)
    y.reset_index(inplace = True,drop = True)
    X = X.replace({ "na": np.nan})
    for col in X.columns:
        if col != 'class':
            X[col] = X[col].astype(float)
    feature_list = ['bj_000','al_000','ci_000','aq_000','bv_000','ag_001','cq_000','bt_000','ag_002','am_0','ck_000','aa_000','ee_005','ag_003','dn_000','cc_000','cn_000','bb_000','bg_000','cn_001']
    for col in feature_list:
        new_col = col + '_isNull'
        X[new_col] = np.where(X[col].isnull(),1,0)
    drop_cols = ['br_000', 'bq_000', 'bp_000', 'bo_000', 'ab_000', 'cr_000', 'bn_000', 'bm_000']
    X = X.drop(labels = drop_cols, axis = 1)
    isnull_cols = []
    for col in X.columns:
        if 'isNull' in col:
            isnull_cols.append(col)
    X_fe = X[isnull_cols]
    X.drop(labels=isnull_cols, axis = 1,inplace=True)
    X_imputed = imputer.transform(X)
    X_ = pd.DataFrame(data = X_imputed, columns = X.columns)
    X = pd.concat([X_,X_fe],axis = 1)
    y_pred = classifier.predict(X.values)
    cm = confusion_matrix(y, y_pred)
    tn, fp, fn, tp = cm.ravel()
    print('The cost is {0}'.format((fp*10)+(fn*500)))
    return

In [None]:
start_time = time.time()
function_2(X_test, y_test)
print("--- %s seconds ---" % (time.time() - start_time))

The cost is 1080
--- 1.429694652557373 seconds ---


[Parallel(n_jobs=2)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done  28 tasks      | elapsed:    0.0s
[Parallel(n_jobs=2)]: Done 124 tasks      | elapsed:    0.1s
[Parallel(n_jobs=2)]: Done 200 out of 200 | elapsed:    0.1s finished
