### Path

In [None]:
import os
os.chdir(r'C:\Users\user\Desktop\Data')

### Package

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import pandas as pd
from sklearn.model_selection import train_test_split

## Data

In [None]:
data = pd.read_csv(r'.\mix_platforms_preprocessing.csv', encoding='utf-8',low_memory=False)

## Train Test Spliting

In [None]:
data_v1 = data.copy()

In [None]:
X = data_v1.drop("pledged_usd", axis=1).values
y = data_v1["pledged_usd"].values.reshape(-1,1)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
print(np.mean(y_train),np.std(y_train),'\n')
print(np.mean(y_test),np.std(y_test))

## Ensemble

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
import xgboost as xgb
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.neural_network import MLPRegressor 
from sklearn.linear_model import Lasso,Ridge
from sklearn.svm import LinearSVR
import lightgbm as lgb
from sklearn.metrics import roc_curve
from sklearn.metrics import auc
from sklearn.metrics import r2_score
import pickle

In [None]:
class Stacking:
    
    def __init__(self, x_train, y_train, x_test, y_test, ls_model_stack, model_final, type_label):
        self.trainx = x_train
        self.trainy = y_train
        self.testx = x_test
        self.testy = y_test
        self.ls_model_stack = ls_model_stack
        self.model_final = model_final
        self.type_label = type_label
        self.trainx_stacking_input = None
        self.trainy_stacking = None
        self.trainx_final = None
        self.trainy_final = None
        self.trainx_final_input = None
        self.testx_final_input = np.zeros((x_test.shape[0], len(ls_model_stack)))
    
    def spliting(self):
        if self.type_label == 'discrete':
            self.trainx_stacking_input, self.trainx_final, self.trainy_stacking, self.trainy_final = train_test_split (self.trainx, self.trainy, test_size= 0.3, random_state = 42, stratify = self.trainy)
        elif self.type_label == 'continuous':
            self.trainx_stacking_input, self.trainx_final, self.trainy_stacking, self.trainy_final = train_test_split (self.trainx, self.trainy, test_size= 0.3, random_state = 42)
        self.trainx_final_input = np.zeros((self.trainx_final.shape[0], len(self.ls_model_stack)))   
    
    def modeling_stack_training(self):
        for model, i in zip(self.ls_model_stack, range(len(self.ls_model_stack))):
            model.fit(self.trainx_stacking_input, self.trainy_stacking)
            output = model.predict(self.trainx_final)
            self.trainx_final_input[:, i] = output
            self.save_model(model, str(i))
            
    def modeling_final_training(self):
        self.model_final.fit(self.trainx_final_input, self.trainy_final)
        self.save_model(self.model_final, 'final')
        
    def predict_stack_testing(self):
        for model, i in zip(self.ls_model_stack, range(len(self.ls_model_stack))):
            output = model.predict(self.testx)
            self.testx_final_input[:, i] = output
    
    def predict_final_testing(self):
        self.testy_pred = self.model_final.predict(self.testx_final_input)
        self.testy_pred_prob=self.model_final.predict_proba(self.testx_final_input)
        return self.testy_pred, self.testy_pred_prob
    
    def scoring_testing(self):
        if self.type_label == 'discrete':
            fpr, tpr, thresholds = roc_curve(self.testy, self.testy_pred_prob[:, 1], pos_label=None)
            AUC=auc(fpr, tpr)
            return AUC
        elif self.type_label == 'continuous':
            r2 = r2_score(self.testy, self.testy_pred)
            return r2
    
    def save_model(self, model, filename):
        pickle.dump(model, open(filename, 'wb'))


In [None]:
model_stack=[xgb.XGBRegressor(),
             RandomForestRegressor(),
             DecisionTreeRegressor(),
             AdaBoostRegressor(),
             LinearSVR(),
             MLPRegressor(),
             lgb.LGBMRegressor()]
model_final = Lasso()
type_label = 'continuous'

In [None]:
stacking = Stacking(X_train, y_train, X_test, y_test, model_stack, model_final, type_label)
stacking.spliting()
stacking.modeling_stack_training()
stacking.modeling_final_training()
stacking.predict_stack_testing()
stacking.predict_final_testing()
stacking.scoring_testing()