# Setting of stack phase 

At this phase we are going to set the stacked-phase dataset 

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [2]:
from utils.trainFoldB import loadColumnsCategorical

In [3]:
import sys
sys.path.append('../dsbase/src/main')
from AdaBoostClassificationDSBase import AdaBoostClassificationDSBaseModel

## Loading the original stacked dataset and shuffle it

In [4]:
df = pd.read_csv('datasets/train_stack.csv')

In [5]:
df_frac = df.sample(frac=0.0005)

In [6]:
df_frac.shape

(815, 77)

## Defining the Fold X processing 

In [7]:
def getColumnFoldX(df, fold_id):
    # Convert every element to a one-elenet List
    print('   dataframe to list ...')
    df_w = df.drop(['HasDetections','fold'], axis=1)
    columns_categorical = df_w.select_dtypes(include=['object']).columns

    # Comluns transformation
    df_end = loadColumnsCategorical(fold_id, df_w, ['AvSigVersion','OsBuildLab','Census_OSVersion'])
    
    # --------------------------------------
    # Load the i-th model and process
    print('   loading model ...')    
    model = AdaBoostClassificationDSBaseModel('AB2',None,None,None,None,None,None)
    model.load('models/fold' + str(1))
    
    # Preparing data to be predicted
    print('   getting rest of One-Hot ...')    
    df_data_to_predict = pd.get_dummies(df_end)
  
    print('   Calculating: normalization ...')    
    pre_result = model.scalerX.transform(df_data_to_predict.values)
    print('   Calculating: probabilities ...')    
    result = model.model.predict_proba(pre_result)
    
    # Set the result as a one-column DataFrame
    print('   Creating result dataset ...')        
    columns_name = [str('f' + str(fold_id))]
    df_result = pd.DataFrame(result)
    df_result.columns = columns_name
    return df_result

### Testing 

In [None]:
f1 = getColumnFoldX(df_frac,1)

   dataframe to list ...
   column "AvSigVersion" transformation ...
   column "OsBuildLab" transformation ...
   column "Census_OSVersion" transformation ...
   loading model ...
initiating empty model AB2. AdaBoostClassification
loading model: models/fold1/AdaBoostClassification_AB2.sav
   getting rest of One-Hot ...




## Lets obtain the final stacked dataset 

In [None]:
N = 9 # Number of folds
df_stack_set = df_frac
for i in range(9):
    print('processing fold ' + str(i+1) + " ...")
    c = getColumnFoldX(df_frac, i+1)
    df_stack_set = df_stack_set.join(c)

In [None]:
df_stack_set.drop(['fold'], axis=1, inplace=True)