# Setting of stack phase 

At this phase we are going to set the stacked-phase dataset 

In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [5]:
from utils.utils import getVector

In [6]:
import sys
sys.path.append('../dsbase/src/main')
from AdaBoostClassificationDSBase import AdaBoostClassificationDSBaseModel

## Loading the original stacked dataset and shuffle it

In [7]:
df = pd.read_csv('datasets/train_stack.csv')

In [8]:
df_frac = df.sample(frac=0.0005)

In [12]:
df_frac.shape

(815, 77)

In [14]:
fold_id=1

# Convert every element to a one-elenet List
print('   dataframe to list ...')
df_w = df_frac.drop(['HasDetections','fold'], axis=1)
df_aux = pd.DataFrame([list(map(lambda x: [x], row)) for row in df_w.values], columns=df_w.columns)

# Load columns and process
print('   1 column transformation ...')
AvSigVersion = np.load('models/fold' + str(fold_id) + "/AvSigVersion.sav.npy")
df_aux['AvSigVersion']=df_aux['AvSigVersion'].apply(lambda x: getVector(x[0],AvSigVersion))
print('   2 column transformation ...')
Census_OSVersion = np.load('models/fold' + str(fold_id) + "/Census_OSVersion.sav.npy")
df_aux['Census_OSVersion']=df_aux['Census_OSVersion'].apply(lambda x: getVector(x[0],Census_OSVersion))
print('   3 column transformation ...')
OsBuildLab = np.load('models/fold' + str(fold_id) + "/OsBuildLab.sav.npy")
df_aux['OsBuildLab']=df_aux['OsBuildLab'].apply(lambda x: getVector(x[0],OsBuildLab))

# Set the adapted dataset
print('   recomponing dataset ...')    
df_end = pd.DataFrame([np.concatenate(row) for row in df_aux.values])

# --------------------------------------
# Load the i-th model and process
print('   loading model ...')    
model = AdaBoostClassificationDSBaseModel('AB2',None,None,None,None,None,None)
model.load('models/fold' + str(1))

# Preparing data to be predicted
print('   getting rest of One-Hot ...')    
df_data_to_predict = pd.get_dummies(df_end)

   dataframe to list ...
   1 column transformation ...
   2 column transformation ...
   3 column transformation ...
   recomponing dataset ...
   loading model ...
initiating empty model AB2. AdaBoostClassification
loading model: models/fold1/AdaBoostClassification_AB2.sav
   getting rest of One-Hot ...




## Defining the Fold X processing 

In [10]:
def getColumnFoldX(df, fold_id):
    # Convert every element to a one-elenet List
    print('   dataframe to list ...')
    df_w = df.drop(['HasDetections','fold'], axis=1)
    df_aux = pd.DataFrame([list(map(lambda x: [x], row)) for row in df_w.values], columns=df_w.columns)
    
    # Load columns and process
    print('   1 column transformation ...')
    AvSigVersion = np.load('models/fold' + str(fold_id) + "/AvSigVersion.sav.npy")
    df_aux['AvSigVersion']=df_aux['AvSigVersion'].apply(lambda x: getVector(x[0],AvSigVersion))
    print('   2 column transformation ...')
    Census_OSVersion = np.load('models/fold' + str(fold_id) + "/Census_OSVersion.sav.npy")
    df_aux['Census_OSVersion']=df_aux['Census_OSVersion'].apply(lambda x: getVector(x[0],Census_OSVersion))
    print('   3 column transformation ...')
    OsBuildLab = np.load('models/fold' + str(fold_id) + "/OsBuildLab.sav.npy")
    df_aux['OsBuildLab']=df_aux['OsBuildLab'].apply(lambda x: getVector(x[0],OsBuildLab))

    # Set the adapted dataset
    print('   recomponing dataset ...')    
    df_end = pd.DataFrame([np.concatenate(row) for row in df_aux.values])
    
    # --------------------------------------
    # Load the i-th model and process
    print('   loading model ...')    
    model = AdaBoostClassificationDSBaseModel('AB2',None,None,None,None,None,None)
    model.load('models/fold' + str(1))
    
    # Preparing data to be predicted
    print('   getting rest of One-Hot ...')    
    df_data_to_predict = pd.get_dummies(df_end)
  
    print('   Calculating: normalization ...')    
    pre_result = model.scalerX.transform(df_data_to_predict.values)
    print('   Calculating: probabilities ...')    
    result = model.model.predict_proba(pre_result)
    
    # Set the result as a one-column DataFrame
    print('   Creating result dataset ...')        
    columns_name = [str('f' + str(fold_id))]
    df_result = pd.DataFrame(result)
    df_result.columns = columns_name
    return df_result

### Testing 

In [11]:
f1 = getColumnFoldX(df_frac,1)

   dataframe to list ...
   1 column transformation ...
   2 column transformation ...
   3 column transformation ...
   recomponing dataset ...
   loading model ...
initiating empty model AB2. AdaBoostClassification
loading model: models/fold1/AdaBoostClassification_AB2.sav
   getting rest of One-Hot ...




   Calculating: normalization ...


ValueError: operands could not be broadcast together with shapes (815,11451) (6861,) (815,11451) 

## Lets obtain the final stacked dataset 

In [None]:
N = 9 # Number of folds
df_stack_set = df_frac
for i in range(9):
    print('processing fold ' + str(i+1) + " ...")
    c = getColumnFoldX(df_frac, i+1)
    df_stack_set = df_stack_set.join(c)

In [None]:
df_stack_set.drop(['fold'], axis=1, inplace=True)