### Path

In [None]:
import os
os.chdir(r'C:\Users\user\Desktop\Data')

### Package

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import pandas as pd
from sklearn.model_selection import train_test_split

## Data

In [None]:
data = pd.read_csv(r'.\mix_platforms.csv', encoding='utf-8',low_memory=False)

In [None]:
data.head(3)

In [None]:
data.drop(columns=['platforms','pledged_percent','pledged_usd'],inplace=True)

In [None]:
data.set_index('project_id',inplace=True)

In [None]:
data.head(3)

In [None]:
data.shape

In [None]:
data=pd.get_dummies(data,drop_first=True)

In [None]:
data.head(3)

In [None]:
data.shape

## Train Test Spliting

In [None]:
data_v1 = data.copy()

In [None]:
X = data_v1.drop("status", axis=1).values
y = data_v1["status"].values.reshape(-1,1)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

## Model

### Confusion Matrix & Evaluation

In [None]:
#ROC & AUC
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import classification_report
from sklearn.metrics import roc_curve
import matplotlib.pyplot as plt
from sklearn.metrics import auc
from sklearn.metrics import confusion_matrix

def evaluate_model(predicted_prob, predicted, actual):

    def Roc_Auc(predicted_prob, actual):
        fpr, tpr, thresholds = roc_curve(actual, predicted_prob[:,1], pos_label=None)
        return auc(fpr, tpr)
    
    # Table-Type Plotting
    #print('Confusion Matrix:\n{}'.format(confusion_matrix(predicted, actual)))
    print('Classification Report:\n{}'.format(classification_report(predicted, actual)))
    print('Accuracy: {}'.format(accuracy_score(predicted, actual)))
    print('Precision: {}'.format(precision_score(predicted, actual)))
    print('Recall: {}'.format(recall_score(predicted, actual)))
    print('F-1: {}'.format(f1_score(predicted, actual)))
    print('AUC: {}'.format(Roc_Auc(predicted_prob, actual)))  
        
    # ROC Curve Plotting
    fpr, tpr, thresh = roc_curve(actual, predicted_prob[:,1], pos_label=None)
    roc_auc = Roc_Auc(predicted_prob, actual)   
    plt.title('ROC')
    plt.plot(fpr, tpr, 'b',
    label='AUC = %0.2f'% roc_auc)
    plt.legend(loc='lower right')
    plt.plot([0,1],[0,1],'r--')
    plt.xlim([-0.1,1.2])
    plt.ylim([-0.1,1.2])
    plt.ylabel('True Positive Rate')
    plt.xlabel('False Positive Rate')
    plt.show()
    
    print('Confusion Matrix:\n{}'.format(confusion_matrix(predicted, actual)))
    tp, fp, fn, tn  = confusion_matrix(actual, predicted).ravel()
    print("True positives: " + str(tp))
    print("False positives: " + str(fp))
    print("True negatives: " + str(tn))
    print("False negatives: " + str(fn))
    print('\n')

### LightGBM

In [None]:
import lightgbm as lgb
gbm_model = lgb.LGBMClassifier()
gbm_model.fit(X_train,y_train, eval_metric='auc')

In [None]:
from sklearn import metrics
testy_pred_prob=gbm_model.predict_proba(X_test)
fpr, tpr, thresholds = metrics.roc_curve(y_test, testy_pred_prob[:,1], pos_label=None)
gbm_AUC=metrics.auc(fpr, tpr)
print(gbm_AUC)

In [None]:
evaluate_model(gbm_model.predict_proba(X_test), 
               gbm_model.predict(X_test), 
               y_test)

### XGBoost

In [None]:
import xgboost as xgb
xgbc_model=xgb.XGBClassifier()
xgbc_model.fit(X_train, y_train, eval_metric='auc')

In [None]:
from sklearn import metrics
testy_pred_prob=xgbc_model.predict_proba(X_test)
fpr, tpr, thresholds = metrics.roc_curve(y_test, testy_pred_prob[:,1], pos_label=None)
xgbc_AUC=metrics.auc(fpr, tpr)
print(xgbc_AUC)

In [None]:
evaluate_model(xgbc_model.predict_proba(X_test), 
               xgbc_model.predict(X_test), 
               y_test)

### Logistic Regression

In [None]:
from sklearn import linear_model

logreg_model = linear_model.LogisticRegression()
logreg_model.fit(X_train, y_train)

In [None]:
from sklearn import metrics
testy_pred_prob=logreg_model.predict_proba(X_test)
fpr, tpr, thresholds = metrics.roc_curve(y_test, testy_pred_prob[:,1], pos_label=None)
logreg_AUC=metrics.auc(fpr, tpr)
print(logreg_AUC)

In [None]:
evaluate_model(logreg_model.predict_proba(X_test), 
               logreg_model.predict(X_test), 
               y_test)

### SVM

In [None]:
#from sklearn.svm import SVC
from sklearn.svm import LinearSVC
from sklearn.calibration import CalibratedClassifierCV
svm = LinearSVC()
clf = CalibratedClassifierCV(svm) 
clf.fit(X_train, y_train)
y_proba = clf.predict_proba(X_test)

In [None]:
from sklearn import metrics
testy_pred_prob=clf.predict_proba(X_test)
fpr, tpr, thresholds = metrics.roc_curve(y_test, testy_pred_prob[:,1], pos_label=None)
svm_AUC=metrics.auc(fpr, tpr)
print(svm_AUC)

In [None]:
evaluate_model(svm_model.predict_proba(X_test), 
               svm_model.predict(X_test), 
               y_test)

### Random Forest

In [None]:
from sklearn.ensemble import RandomForestClassifier

model_rf = RandomForestClassifier()
model_rf.fit(X_train, y_train)

In [None]:
from sklearn import metrics
testy_pred_prob=model_rf.predict_proba(X_test)
fpr, tpr, thresholds = metrics.roc_curve(y_test, testy_pred_prob[:,1], pos_label=None)
rf_AUC=metrics.auc(fpr, tpr)
print(rf_AUC)

In [None]:
evaluate_model(model_rf.predict_proba(X_test), 
               model_rf.predict(X_test), 
               y_test)

### Decision Tree

In [None]:
from sklearn.tree import DecisionTreeClassifier

model_dct = DecisionTreeClassifier()
model_dct.fit(X_train, y_train)

In [None]:
from sklearn import metrics
testy_pred_prob=model_dct.predict_proba(X_test)
fpr, tpr, thresholds = metrics.roc_curve(y_test, testy_pred_prob[:,1], pos_label=None)
dct_AUC=metrics.auc(fpr, tpr)
print(dct_AUC)

In [None]:
evaluate_model(model_dct.predict_proba(X_test), 
               model_dct.predict(X_test), 
               y_test)

### AdaBoost

In [None]:
from sklearn.ensemble import AdaBoostClassifier

adab_model = AdaBoostClassifier()
adab_model.fit(X_train, y_train)

In [None]:
from sklearn import metrics
testy_pred_prob=adab_model.predict_proba(X_test)
fpr, tpr, thresholds = metrics.roc_curve(y_test, testy_pred_prob[:,1], pos_label=None)
adab_AUC=metrics.auc(fpr, tpr)
print(adab_AUC)

In [None]:
evaluate_model(adab_model.predict_proba(X_test), 
               adab_model.predict(X_test), 
               y_test)

### MLP

In [None]:
from sklearn.neural_network import MLPClassifier
mlp_model = MLPClassifier()
mlp_model.fit(X_train, y_train)

In [None]:
from sklearn import metrics
testy_pred_prob=mlp_model.predict_proba(X_test)
fpr, tpr, thresholds = metrics.roc_curve(y_test, testy_pred_prob[:,1], pos_label=None)
mlp_AUC=metrics.auc(fpr, tpr)
print(mlp_AUC)

In [None]:
evaluate_model(mlp_model.predict_proba(X_test), 
               mlp_model.predict(X_test), 
               y_test)

### Ensemble

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.calibration import CalibratedClassifierCV
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import roc_curve
from sklearn.metrics import auc
from sklearn.metrics import r2_score
import pickle

In [None]:
class Stacking:
    
    def __init__(self, x_train, y_train, x_test, y_test, ls_model_stack, model_final, type_label):
        self.trainx = x_train
        self.trainy = y_train
        self.testx = x_test
        self.testy = y_test
        self.ls_model_stack = ls_model_stack
        self.model_final = model_final
        self.type_label = type_label
        self.trainx_stacking_input = None
        self.trainy_stacking = None
        self.trainx_final = None
        self.trainy_final = None
        self.trainx_final_input = None
        self.testx_final_input = np.zeros((x_test.shape[0], len(ls_model_stack)))
    
    def spliting(self):
        if self.type_label == 'discrete':
            self.trainx_stacking_input, self.trainx_final, self.trainy_stacking, self.trainy_final = train_test_split (self.trainx, self.trainy, test_size= 0.3, random_state = 42, stratify = self.trainy)
        elif self.type_label == 'continuous':
            self.trainx_stacking_input, self.trainx_final, self.trainy_stacking, self.trainy_final = train_test_split (self.trainx, self.trainy, test_size= 0.3, random_state = 42)
        self.trainx_final_input = np.zeros((self.trainx_final.shape[0], len(self.ls_model_stack)))   
    
    def modeling_stack_training(self):
        for model, i in zip(self.ls_model_stack, range(len(self.ls_model_stack))):
            model.fit(self.trainx_stacking_input, self.trainy_stacking)
            output = model.predict(self.trainx_final)
            self.trainx_final_input[:, i] = output
            self.save_model(model, str(i))
            
    def modeling_final_training(self):
        self.model_final.fit(self.trainx_final_input, self.trainy_final)
        self.save_model(self.model_final, 'final')
        
    def predict_stack_testing(self):
        for model, i in zip(self.ls_model_stack, range(len(self.ls_model_stack))):
            output = model.predict(self.testx)
            self.testx_final_input[:, i] = output
    
    def predict_final_testing(self):
        self.testy_pred = self.model_final.predict(self.testx_final_input)
        self.testy_pred_prob=self.model_final.predict_proba(self.testx_final_input)
        return self.testy_pred, self.testy_pred_prob
    
    def scoring_testing(self):
        if self.type_label == 'discrete':
            fpr, tpr, thresholds = roc_curve(self.testy, self.testy_pred_prob[:, 1], pos_label=None)
            AUC=auc(fpr, tpr)
            return AUC
        elif self.type_label == 'continuous':
            r2 = r2_score(self.testy, self.testy_pred)
            return r2
    
    def save_model(self, model, filename):
        pickle.dump(model, open(filename, 'wb'))


In [None]:
model_stack=[LogisticRegression(),
             CalibratedClassifierCV(LinearSVC()),
             RandomForestClassifier(),
             DecisionTreeClassifier(),
             AdaBoostClassifier(),
             XGBClassifier(),
             MLPClassifier()]
                           
model_final=LogisticRegression()
type_label = 'discrete'

In [None]:
stacking = Stacking(X_train, y_train, X_test, y_test, model_stack, model_final, type_label)
stacking.spliting()
stacking.modeling_stack_training()
stacking.modeling_final_training()
stacking.predict_stack_testing()
stacking.predict_final_testing()
stacking.scoring_testing()

In [None]:
testy_pred, testy_pred_prob = ensemble.predict_final_testing()

In [None]:
fpr, tpr, thresholds = metrics.roc_curve(y_test, testy_pred_prob[:,1], pos_label=None)
ensemble_AUC=metrics.auc(fpr, tpr)
print(ensemble_AUC)

In [None]:
evaluate_model(testy_pred_prob,
               testy_pred,  
               testy)

## Records

In [None]:
records = pd.DataFrame({
    'Model': [
              'LightGBM',
              'Logistic Regression', 
              'Random Forest', 
              'Decision Tree', 
              'SVM-Linear SVC', 
              'AdaBoostClassifier',
              'Extreme Gradient Boosting (XGBoost)',
              'MLP',
              'Ensemble'],
    'AUC_test': [gbm_AUC,
                logreg_AUC, 
                rf_AUC, 
                dct_AUC, 
                svm_AUC, 
                adab_AUC, 
                xgbc_AUC,
                mlp_AUC,
                ensemble_AUC]})

In [None]:
records['AUC']=round(records['AUC'],4).values
records.sort_values(by='AUC', ascending=False)