In [16]:
import numpy as np  # linear algebra
import pandas as pd  # data processing, CSV file I/O (e.g. pd.read_csv)
from sklearn import datasets, linear_model,preprocessing
from datetime import datetime
import gc
%matplotlib inline
from IPython.display import display, HTML
from pprint import pprint
import time
import nltk.corpus
from nltk.stem.snowball import SnowballStemmer
from ml_metrics import rmse, rmsle

In [2]:
#take 1 CSV, then split it to 3..
class FeatureEngineering:

    def __init__(self, ValidationStart, ValidationEnd, trainHdfPath, trainHdfFile, testHdfPath1, testHdfPath2, testHdfFile, 
                 testTypes, trainTypes, trainCsvPath, testCsvPath, maxLag=0):
        self.ValidationStart = ValidationStart
        self.ValidationEnd = ValidationEnd
        self.maxLag = maxLag
        self.trainHdfPath = trainHdfPath
        self.trainHdfFile = trainHdfFile
        self.testHdfPath1 = testHdfPath1
        self.testHdfPath2 = testHdfPath2
        self.testHdfFile = testHdfFile
        self.testTypes = testTypes
        self.trainTypes = trainTypes
        self.trainCsvPath = trainCsvPath
        self.testCsvPath = testCsvPath
        
    @staticmethod
    def __printDataFrameBasics__(data):
        display(data.head(2))
        #print data.dtypes
        gc.collect()
        print(data.info(memory_usage=True))
        
    @staticmethod    
    def changeIndexTypeToLowerMemory(data):
        ##########
        #This is very critical, i accept max number is 2^32. Also, if don't do that, memory gets so much higher..
        ##########
        #data.reset_index(inplace=True)
        #data.drop("index",axis=1, inplace=True)
        #data.index = data.index.astype('uint32')
        gc.collect()
        
    def ReadHdf(self, trainOrTestOrBoth):
        '''Reads and holds Df in object memory'''            
        if trainOrTestOrBoth == 'train' or trainOrTestOrBoth=='both':
            self.train = pd.read_hdf(self.trainHdfPath,self.trainHdfFile)
            FeatureEngineering.changeIndexTypeToLowerMemory(self.train)
            FeatureEngineering.__printDataFrameBasics__(self.train)
            
        if trainOrTestOrBoth == 'test' or trainOrTestOrBoth=='both':
            self.test1 = pd.read_hdf(self.testHdfPath1,self.testHdfFile)
            self.test2 = pd.read_hdf(self.testHdfPath2,self.testHdfFile)
            FeatureEngineering.changeIndexTypeToLowerMemory(self.test1)
            FeatureEngineering.changeIndexTypeToLowerMemory(self.test2)
            FeatureEngineering.__printDataFrameBasics__(self.test1)
            FeatureEngineering.__printDataFrameBasics__(self.test2)
        
    def ReadCsv(self, trainOrTestOrBoth):
        '''Reads and holds Df in memory'''
        if trainOrTestOrBoth == 'train' or trainOrTestOrBoth == 'both':
            self.train = pd.read_csv(self.trainCsvPath, usecols=self.trainTypes.keys(), dtype=self.trainTypes)
            FeatureEngineering.changeIndexTypeToLowerMemory(self.train)
            FeatureEngineering.__printDataFrameBasics__(self.train)
        if trainOrTestOrBoth == 'test' or trainOrTestOrBoth=='both':
            tempTest = pd.read_csv(self.testCsvPath, usecols=self.testTypes.keys(), dtype=self.testTypes)
            self.test1 = tempTest.loc[tempTest.Semana.values == self.ValidationStart]
            self.test2 = tempTest.loc[tempTest.Semana.values == self.ValidationEnd]
            del tempTest
            FeatureEngineering.changeIndexTypeToLowerMemory(self.test1)
            FeatureEngineering.changeIndexTypeToLowerMemory(self.test2)
            FeatureEngineering.__printDataFrameBasics__(self.test1)
            FeatureEngineering.__printDataFrameBasics__(self.test2)
            
    @staticmethod
    def ConvertCsvToHdf(csvPath, HdfPath, HdfName, ColumnTypeDict ):
        tempDf = pd.read_csv(csvPath, usecols=ColumnTypeDict.keys(), dtype=ColumnTypeDict,index=False)
        tempDf.to_hdf(HdfPath, HdfName, format='t')
        del tempDf
        gc.collect()
        print "ConvertCsvToHdf is done.."

    def Preprocess(self, trainOrTestOrBoth, columnFunctionTypeList):
        '''columnFunctionTypeList = [ ['C1',Func1,Type], ['C2',Func2,Type],..    ]'''
        for column, func, localType in columnFunctionTypeList:
            if trainOrTestOrBoth == 'train' or trainOrTestOrBoth =='both':
                self.train.loc[:,column] =  np.apply_along_axis(func,0,FE.train[column].values).astype(localType)
                #np.apply_along_axis(lambda x: x+1,0,FE.train["Semana"]).astype("int32")
            if trainOrTestOrBoth == 'test' or trainOrTestOrBoth == 'both':
                self.test1.loc[:,column] =  np.apply_along_axis(func,0,FE.test1[column].values).astype(localType)
                self.test2.loc[:,column] =  np.apply_along_axis(func,0,FE.test2[column].values).astype(localType)
        gc.collect()
        
    def SaveDataFrameToHdf(self,trainOrTestOrBoth):
        if trainOrTestOrBoth == 'train' or trainOrTestOrBoth=='both':
            self.train.to_hdf(self.trainHdfPath, self.trainHdfFile, format='t', index="False")
        if trainOrTestOrBoth == 'test' or trainOrTestOrBoth=='both':
            self.test1.to_hdf(self.testHdfPath1, self.testHdfFile, format='t', index="False")
            self.test2.to_hdf(self.testHdfPath2, self.testHdfFile, format='t', index="False")
        
    def AddDemandaGeneralMean(self,trainOrTestOrBoth): 
        #self.train.loc[:,"DemandaGeneralMean"] = self.train["Demanda_uni_equil"].loc[
         #   self.train.loc[:,'Semana'] < 10].mean().astype("float32")
            
        meanOfDemanda = self.train["Demanda_uni_equil"].values.mean().astype("float32")
        
        if trainOrTestOrBoth == 'train' or trainOrTestOrBoth=='both':
            self.train.loc[:,"DemandaGeneralMean"] = meanOfDemanda
        if trainOrTestOrBoth == 'test' or trainOrTestOrBoth=='both':
            self.test1.loc[:,"DemandaGeneralMean"] = meanOfDemanda
            self.test2.loc[:,"DemandaGeneralMean"] = meanOfDemanda
        
        #self.train.loc[:,"DemandaGeneralMean"] = self.train["Demanda_uni_equil"].values[
        #(self.train.loc[:,'Semana'].values < self.ValidationStart).values].mean().astype("float32")
        gc.collect()
        
    '''ConfigElements(0,[ ("A",["Semana","Agencia_ID"],["count","count"]),'''
    def AddConfigurableFeaturesToTrain(self, config):
        if config.lag > self.maxLag:
            self.maxLag = config.lag
        
        tempData = self.train[self.train['Semana'].values <= (self.ValidationEnd - config.lag)]
        #display(tempData)
        if(config.lag != 0):
            tempData.loc[:,'Semana'] = tempData['Semana'].values + config.lag
        #display(tempData)
        
        #Means iterative.. eliminate as long as np.nan exists..If there is already one, don't create, use the existing
        if config.targetVariable != "" and  config.targetVariable not in self.train.columns:
            self.train.loc[:,config.targetVariable] = np.nan
            self.test1.loc[:,config.targetVariable] = np.nan
            
            if config.lag != 1:
                self.test2.loc[:,config.targetVariable] = np.nan
        
        for name,groups,aggregate in config.nameAndGroups:
            if name not in self.train.columns:
                print "{} is not in columns..".format(name)            
                
                groupedDataframe = tempData[groups+['Demanda_uni_equil']].copy().groupby(groups).agg(aggregate[0])
                gc.collect()
                #groupedDataframe.columns = groupedDataframe.columns.droplevel(0)
                groupedDataframe.columns = [name]
                
                #This is means of the counts of the semana-columns tuples!..!!!
                #If no lag and mean, mean of the columns without semana!!..
                #If there is lag and count, count of the columns x weeks before
                #If there is lag and mean, mean of the columns x weeks before
                #if(config.lag == 0 and aggregate == "count"):
                if(len(aggregate)>1):
                    groupedDataframe.reset_index(inplace=True)
                    groupedDataframe.drop("Semana",axis=1, inplace=True)
                    groups = groups[1:]
                    groupedDataframe = groupedDataframe.groupby(groups).agg(aggregate[1])
                    groupedDataframe.columns = [name]
                    gc.collect()
                
                display(groupedDataframe.head(2))
                self.train = self.train.merge( groupedDataframe, left_on=groups,
                    right_index=True, how='left', sort=False,copy=False)
                gc.collect()
                self.test1 = self.test1.merge( groupedDataframe, left_on=groups,
                    right_index=True, how='left', sort=False,copy=False)
                gc.collect()
                if config.lag != 1:
                    self.test2 = self.test2.merge( groupedDataframe, left_on=groups,
                        right_index=True, how='left', sort=False,copy=False)
                
                del groupedDataframe
                gc.collect()
            else:
                print "{} is in columns..".format(name)
            
            display(self.train.head(2))
            display(self.test1.head(2))
            display(self.test2.head(2))
            
            #Means iterative..!!!!!
            if config.targetVariable != "":
                self.train.loc[pd.isnull(self.train[config.targetVariable].values), 
                    config.targetVariable] = self.train.loc[pd.isnull(self.train[config.targetVariable].values)
                    , name].values
                self.test1.loc[pd.isnull(self.test1[config.targetVariable].values), 
                    config.targetVariable] = self.test1.loc[pd.isnull(self.test1[config.targetVariable].values),
                    name].values
                if config.lag != 1:
                    self.test2.loc[pd.isnull(self.test2[config.targetVariable].values), 
                        config.targetVariable] = self.test2.loc[pd.isnull(self.test2[config.targetVariable].values)
                        , name].values
                    
                count = self.test1[config.targetVariable].isnull().sum()
                print "Count of missing numbers after {} in validation part 1 in column {} is {}".format(name, 
                    config.targetVariable,str(count))
                if config.lag != 1:
                    count = self.test2.loc[:,config.targetVariable].isnull().sum()
                    print "Count of missing numbers after {} in validation part 2 in column {} is {}".format(name, 
                        config.targetVariable,str(count))
                
                
                #display(self.train)
                #If column is already in Dataframe and we want to fill target variable, this deletes columns!!!
                if(config.deleteColumns):
                    self.train.drop(name, axis=1, inplace=True)
                    self.test1.drop(name, axis=1, inplace=True)
                    if config.lag != 1:
                        self.test2.drop(name, axis=1, inplace=True)
                gc.collect()
                #Only in tesst
                #if count == 0:
                 #   break
        del tempData
        display(self.train.head(2))
        display(self.test1.head(2))
        display(self.test2.head(2))
        gc.collect()
        return 
    
    def DeleteLaggedWeeksFromTrain(self):
        self.train = self.train[self.train['Semana'].values >= (3 + self.maxLag)]
        gc.collect()
        display(self.train.head(2))
        
    def ReadFirstNRowsOfACsv(self, nrows, trainOrTestOrBoth) :
        if trainOrTestOrBoth == 'train' or trainOrTestOrBoth=='both':
            self.train = pd.read_csv(self.trainCsvPath, usecols=self.trainTypes.keys(), dtype=self.trainTypes, nrows = nrows)
            FeatureEngineering.changeIndexTypeToLowerMemory(self.train)
            FeatureEngineering.__printDataFrameBasics__(self.train)
        if trainOrTestOrBoth == 'test' or trainOrTestOrBoth=='both':
            tempTest = pd.read_csv(self.testCsvPath, usecols=self.testTypes.keys(), dtype=self.testTypes, nrows = nrows*2)
            self.test1 = tempTest.loc[tempTest.Semana == self.ValidationStart]
            self.test2 = tempTest.loc[tempTest.Semana == self.ValidationEnd]
            del tempTest
            FeatureEngineering.changeIndexTypeToLowerMemory(self.test1)
            FeatureEngineering.changeIndexTypeToLowerMemory(self.test2)
            FeatureEngineering.__printDataFrameBasics__(self.test1)
            FeatureEngineering.__printDataFrameBasics__(self.test2)
    
    #Use when concatanating train and validation before predict test for example..
    def AppendTestToTrain(self,deleteTest = True):
        self.train = self.train.append(self.test1,ignore_index=True)
        gc.collect()
        if(deleteTest):
            del self.test1
            gc.collect()
        try:
            self.train = self.train.append(self.test2,ignore_index=True)
            gc.collect()
            if(deleteTest):
                del self.test2
                gc.collect()
        except:
            pass
        #BAD PERFORMANCE!!
    #Split train data to train and test1 and test2 (validation)
    #def SplitTrainToTestUsingValidationStart(self):
     #   boolCondition = self.train.Semana == self.ValidationStart
      #  self.test1 = self.train.loc[boolCondition]
       # self.train.drop((self.train.loc[boolCondition].index), axis=0,inplace=True)
        
       # boolCondition = self.train.Semana == self.ValidationEnd
       # self.test2 = self.train.loc[boolCondition]
       # self.train.drop((self.train.loc[boolCondition].index), axis=0,inplace=True)
      #  del boolCondition
      #  gc.collect()
    
    #Reaches 3x memory from train, because of test1, test2 and train itself at the end.. GC fixed in the end..
    def SplitTrainToTestUsingValidationStart(self):
        boolCondition = self.train.Semana.values == self.ValidationStart
        self.test1 = self.train[boolCondition]
        boolCondition = self.train.Semana.values == self.ValidationEnd
        self.test2 = self.train[boolCondition]
        FE.train = FE.train[ FE.train.Semana.values < FE.ValidationStart ]
        del boolCondition
        gc.collect()

In [3]:
parameterDict =       {"ValidationStart":8, 
 "ValidationEnd":9,
   "maxLag":3,
    "trainHdfPath":'../../input/train.h5',
    "trainHdfFile":"train",
    "testHdfPath1":"../../input/test1.h5",
    "testHdfPath2":"../../input/test2.h5",
    "testHdfFile":"test", 
    "trainTypes" : {'Semana':np.uint8, 'Agencia_ID':np.uint16, 'Canal_ID':np.uint8,'Ruta_SAK':np.uint16, 
        'Cliente_ID':np.uint32, 'Producto_ID':np.uint16,'Venta_uni_hoy':np.uint16, 'Venta_hoy':np.float32,
                    'Dev_uni_proxima': np.uint32, 'Dev_proxima':np.float32,'Demanda_uni_equil':np.uint32}, 
    "testTypes" : {'id':np.uint32,'Semana':np.uint8, 'Agencia_ID':np.uint16, 'Canal_ID':np.uint8,'Ruta_SAK':np.uint16,
        'Cliente_ID':np.uint32, 'Producto_ID':np.uint16},
    "trainCsvPath":'../../input/train.csv'   ,
    "testCsvPath":'../../input/test.csv'}

FE = FeatureEngineering(**parameterDict)
print FE.__dict__

{'trainCsvPath': '../../input/train.csv', 'maxLag': 3, 'testTypes': {'Cliente_ID': <type 'numpy.uint32'>, 'Ruta_SAK': <type 'numpy.uint16'>, 'Canal_ID': <type 'numpy.uint8'>, 'Producto_ID': <type 'numpy.uint16'>, 'Agencia_ID': <type 'numpy.uint16'>, 'Semana': <type 'numpy.uint8'>, 'id': <type 'numpy.uint32'>}, 'testHdfFile': 'test', 'trainTypes': {'Dev_proxima': <type 'numpy.float32'>, 'Venta_uni_hoy': <type 'numpy.uint16'>, 'Cliente_ID': <type 'numpy.uint32'>, 'Demanda_uni_equil': <type 'numpy.uint32'>, 'Ruta_SAK': <type 'numpy.uint16'>, 'Canal_ID': <type 'numpy.uint8'>, 'Venta_hoy': <type 'numpy.float32'>, 'Producto_ID': <type 'numpy.uint16'>, 'Agencia_ID': <type 'numpy.uint16'>, 'Dev_uni_proxima': <type 'numpy.uint32'>, 'Semana': <type 'numpy.uint8'>}, 'testHdfPath1': '../../input/test1.h5', 'ValidationEnd': 9, 'testHdfPath2': '../../input/test2.h5', 'testCsvPath': '../../input/test.csv', 'ValidationStart': 8, 'trainHdfFile': 'train', 'trainHdfPath': '../../input/train.h5'}


In [4]:
FE.ReadHdf('both')

Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,DemandaNotEqualTheDifferenceOfVentaUniAndDev,weight,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,3,False,120,2,709,4,2008,16
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,4,False,135,2,712,4,2008,16


<class 'pandas.core.frame.DataFrame'>
Int64Index: 52449630 entries, 0 to 52449629
Data columns (total 18 columns):
Semana                                          uint8
Agencia_ID                                      uint16
Canal_ID                                        uint8
Ruta_SAK                                        uint16
Cliente_ID                                      uint32
Producto_ID                                     uint16
Venta_uni_hoy                                   uint16
Venta_hoy                                       float32
Dev_uni_proxima                                 uint32
Dev_proxima                                     float32
Demanda_uni_equil                               uint32
DemandaNotEqualTheDifferenceOfVentaUniAndDev    bool
weight                                          uint16
pieces                                          uint8
Prod_name_ID                                    uint16
Brand_ID                                        uint8
Town_ID  

Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,DemandaNotEqualTheDifferenceOfVentaUniAndDev,weight,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID
52449630,8,1110,7,3301,15766,1212,4,33.52,0,0.0,4,False,120,2,709,4,2008,16
52449631,8,1110,7,3301,15766,1216,5,41.900002,0,0.0,5,False,135,2,712,4,2008,16


<class 'pandas.core.frame.DataFrame'>
Int64Index: 10193576 entries, 52449630 to 62643205
Data columns (total 18 columns):
Semana                                          uint8
Agencia_ID                                      uint16
Canal_ID                                        uint8
Ruta_SAK                                        uint16
Cliente_ID                                      uint32
Producto_ID                                     uint16
Venta_uni_hoy                                   uint16
Venta_hoy                                       float32
Dev_uni_proxima                                 uint32
Dev_proxima                                     float32
Demanda_uni_equil                               uint32
DemandaNotEqualTheDifferenceOfVentaUniAndDev    bool
weight                                          uint16
pieces                                          uint8
Prod_name_ID                                    uint16
Brand_ID                                        uint8
To

Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,DemandaNotEqualTheDifferenceOfVentaUniAndDev,weight,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID
62643206,9,1110,7,3301,15766,1212,1,8.38,0,0.0,1,False,120,2,709,4,2008,16
62643207,9,1110,7,3301,15766,1238,2,19.66,0,0.0,2,False,140,2,630,4,2008,16


<class 'pandas.core.frame.DataFrame'>
Int64Index: 10200437 entries, 62643206 to 72843642
Data columns (total 18 columns):
Semana                                          uint8
Agencia_ID                                      uint16
Canal_ID                                        uint8
Ruta_SAK                                        uint16
Cliente_ID                                      uint32
Producto_ID                                     uint16
Venta_uni_hoy                                   uint16
Venta_hoy                                       float32
Dev_uni_proxima                                 uint32
Dev_proxima                                     float32
Demanda_uni_equil                               uint32
DemandaNotEqualTheDifferenceOfVentaUniAndDev    bool
weight                                          uint16
pieces                                          uint8
Prod_name_ID                                    uint16
Brand_ID                                        uint8
To

## AddDemandaGeneralMean

In [5]:
FE.AddDemandaGeneralMean('both')

In [6]:
FE.train.head(2)

Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,DemandaNotEqualTheDifferenceOfVentaUniAndDev,weight,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,3,False,120,2,709,4,2008,16,7.346896
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,4,False,135,2,712,4,2008,16,7.346896


## Grouping for Lag0

In [7]:
class ConfigElements:
    def __init__(self, lag, nameAndGroups, targetVariable="", deleteColumns = False):
        self.lag = lag
        self.nameAndGroups = nameAndGroups
        #If there is target variable, then 5 4 3 2 1, fill the np.nans..Else hold them all in Dataframe..
        self.targetVariable = targetVariable
        self.deleteColumns = deleteColumns

In [8]:
configLag0Target1DeleteColumnsFalse = ConfigElements(0,[ ("SPClRACh0_mean_sum",["Semana",
                                                              "Producto_ID",
                                                              "Cliente_ID",
                                                              "Ruta_SAK",
                                                              "Agencia_ID",
                                                              "Canal_ID"],
                                                          ["mean","sum"]),
                                                        ("SPClRA0_mean_sum",["Semana",
                                                              "Producto_ID",
                                                              "Cliente_ID",
                                                              "Ruta_SAK",
                                                              "Agencia_ID"],
                                                          ["mean","sum"]),
                                                        ("SPClRCh0_mean_sum",["Semana",
                                                              "Producto_ID",
                                                              "Cliente_ID",
                                                              "Ruta_SAK",
                                                              "Canal_ID"],
                                                          ["mean","sum"]),
                                                        ("SPClACh0_mean_sum",["Semana",
                                                              "Producto_ID",
                                                              "Cliente_ID",
                                                              "Agencia_ID",
                                                              "Canal_ID"],
                                                          ["mean","sum"]),
                                                        ("SPClR0_mean_sum",["Semana",
                                                              "Producto_ID",
                                                              "Cliente_ID",
                                                              "Ruta_SAK"],
                                                          ["mean","sum"]),
                                                        ("SPClA0_mean_sum",["Semana",
                                                              "Producto_ID",
                                                              "Cliente_ID",
                                                              "Agencia_ID"],
                                                          ["mean","sum"]),
                                                       ("SPClCh0_mean_sum",["Semana",
                                                              "Producto_ID",
                                                              "Cliente_ID",
                                                              "Canal_ID"],
                                                          ["mean","sum"]),
                                                       ("SPCl0_mean_sum",["Semana",
                                                              "Producto_ID",
                                                              "Cliente_ID"],
                                                          ["mean","sum"]),
                                                       ("SPR0_mean_sum",["Semana",
                                                              "Producto_ID",
                                                              "Ruta_SAK"],
                                                          ["mean","sum"]),
                                                       ("SPA0_mean_sum",["Semana",
                                                              "Producto_ID",
                                                              "Agencia_ID"],
                                                          ["mean","sum"]),
                                                       ("SPCh0_mean_sum",["Semana",
                                                              "Producto_ID",
                                                              "Canal_ID"],
                                                          ["mean","sum"]),
                                                       ("SPT0_mean_sum",["Semana",
                                                              "Producto_ID",
                                                              "Town_ID"],
                                                          ["mean","sum"]),
                                                       ("SPSt0_mean_sum",["Semana",
                                                              "Producto_ID",
                                                              "State_ID"],
                                                          ["mean","sum"]),
                                                       ("SP0_mean_sum",["Semana",
                                                              "Producto_ID"],
                                                          ["mean","sum"])
                                                       
                                                       
                                                       ], "Lag0", True)
print  configLag0Target1DeleteColumnsFalse.__dict__

{'nameAndGroups': [('SPClRACh0_mean_sum', ['Semana', 'Producto_ID', 'Cliente_ID', 'Ruta_SAK', 'Agencia_ID', 'Canal_ID'], ['mean', 'sum']), ('SPClRA0_mean_sum', ['Semana', 'Producto_ID', 'Cliente_ID', 'Ruta_SAK', 'Agencia_ID'], ['mean', 'sum']), ('SPClRCh0_mean_sum', ['Semana', 'Producto_ID', 'Cliente_ID', 'Ruta_SAK', 'Canal_ID'], ['mean', 'sum']), ('SPClACh0_mean_sum', ['Semana', 'Producto_ID', 'Cliente_ID', 'Agencia_ID', 'Canal_ID'], ['mean', 'sum']), ('SPClR0_mean_sum', ['Semana', 'Producto_ID', 'Cliente_ID', 'Ruta_SAK'], ['mean', 'sum']), ('SPClA0_mean_sum', ['Semana', 'Producto_ID', 'Cliente_ID', 'Agencia_ID'], ['mean', 'sum']), ('SPClCh0_mean_sum', ['Semana', 'Producto_ID', 'Cliente_ID', 'Canal_ID'], ['mean', 'sum']), ('SPCl0_mean_sum', ['Semana', 'Producto_ID', 'Cliente_ID'], ['mean', 'sum']), ('SPR0_mean_sum', ['Semana', 'Producto_ID', 'Ruta_SAK'], ['mean', 'sum']), ('SPA0_mean_sum', ['Semana', 'Producto_ID', 'Agencia_ID'], ['mean', 'sum']), ('SPCh0_mean_sum', ['Semana', 'Produc

In [9]:
FE.AddConfigurableFeaturesToTrain(configLag0Target1DeleteColumnsFalse)

SPClRACh0_mean_sum is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,SPClRACh0_mean_sum
Producto_ID,Cliente_ID,Ruta_SAK,Agencia_ID,Canal_ID,Unnamed: 5_level_1
41,146030,3303,2281,7,130
41,681747,3306,2281,7,8000


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,DemandaNotEqualTheDifferenceOfVentaUniAndDev,weight,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,SPClRACh0_mean_sum
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,...,False,120,2,709,4,2008,16,7.346896,,12
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,...,False,135,2,712,4,2008,16,7.346896,,12


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,DemandaNotEqualTheDifferenceOfVentaUniAndDev,weight,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,SPClRACh0_mean_sum
52449630,8,1110,7,3301,15766,1212,4,33.52,0,0.0,...,False,120,2,709,4,2008,16,7.346896,,12.0
52449631,8,1110,7,3301,15766,1216,5,41.900002,0,0.0,...,False,135,2,712,4,2008,16,7.346896,,12.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,DemandaNotEqualTheDifferenceOfVentaUniAndDev,weight,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,SPClRACh0_mean_sum
62643206,9,1110,7,3301,15766,1212,1,8.38,0,0.0,...,False,120,2,709,4,2008,16,7.346896,,12.0
62643207,9,1110,7,3301,15766,1238,2,19.66,0,0.0,...,False,140,2,630,4,2008,16,7.346896,,12.0


Count of missing numbers after SPClRACh0_mean_sum in validation part 1 in column Lag0 is 2008765
Count of missing numbers after SPClRACh0_mean_sum in validation part 2 in column Lag0 is 2728164
SPClRA0_mean_sum is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,SPClRA0_mean_sum
Producto_ID,Cliente_ID,Ruta_SAK,Agencia_ID,Unnamed: 4_level_1
41,146030,3303,2281,130.0
41,681747,3306,2281,8000.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,DemandaNotEqualTheDifferenceOfVentaUniAndDev,weight,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,SPClRA0_mean_sum
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,...,False,120,2,709,4,2008,16,7.346896,12,12.0
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,...,False,135,2,712,4,2008,16,7.346896,12,12.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,DemandaNotEqualTheDifferenceOfVentaUniAndDev,weight,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,SPClRA0_mean_sum
52449630,8,1110,7,3301,15766,1212,4,33.52,0,0.0,...,False,120,2,709,4,2008,16,7.346896,12.0,12.0
52449631,8,1110,7,3301,15766,1216,5,41.900002,0,0.0,...,False,135,2,712,4,2008,16,7.346896,12.0,12.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,DemandaNotEqualTheDifferenceOfVentaUniAndDev,weight,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,SPClRA0_mean_sum
62643206,9,1110,7,3301,15766,1212,1,8.38,0,0.0,...,False,120,2,709,4,2008,16,7.346896,12.0,12.0
62643207,9,1110,7,3301,15766,1238,2,19.66,0,0.0,...,False,140,2,630,4,2008,16,7.346896,12.0,12.0


Count of missing numbers after SPClRA0_mean_sum in validation part 1 in column Lag0 is 2008400
Count of missing numbers after SPClRA0_mean_sum in validation part 2 in column Lag0 is 2727510
SPClRCh0_mean_sum is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,SPClRCh0_mean_sum
Producto_ID,Cliente_ID,Ruta_SAK,Canal_ID,Unnamed: 4_level_1
41,146030,3303,7,130.0
41,681747,3306,7,8000.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,DemandaNotEqualTheDifferenceOfVentaUniAndDev,weight,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,SPClRCh0_mean_sum
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,...,False,120,2,709,4,2008,16,7.346896,12.0,12.0
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,...,False,135,2,712,4,2008,16,7.346896,12.0,12.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,DemandaNotEqualTheDifferenceOfVentaUniAndDev,weight,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,SPClRCh0_mean_sum
52449630,8,1110,7,3301,15766,1212,4,33.52,0,0.0,...,False,120,2,709,4,2008,16,7.346896,12.0,12.0
52449631,8,1110,7,3301,15766,1216,5,41.900002,0,0.0,...,False,135,2,712,4,2008,16,7.346896,12.0,12.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,DemandaNotEqualTheDifferenceOfVentaUniAndDev,weight,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,SPClRCh0_mean_sum
62643206,9,1110,7,3301,15766,1212,1,8.38,0,0.0,...,False,120,2,709,4,2008,16,7.346896,12.0,12.0
62643207,9,1110,7,3301,15766,1238,2,19.66,0,0.0,...,False,140,2,630,4,2008,16,7.346896,12.0,12.0


Count of missing numbers after SPClRCh0_mean_sum in validation part 1 in column Lag0 is 2007825
Count of missing numbers after SPClRCh0_mean_sum in validation part 2 in column Lag0 is 2726789
SPClACh0_mean_sum is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,SPClACh0_mean_sum
Producto_ID,Cliente_ID,Agencia_ID,Canal_ID,Unnamed: 4_level_1
41,146030,2281,7,130.0
41,681747,2281,7,8000.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,DemandaNotEqualTheDifferenceOfVentaUniAndDev,weight,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,SPClACh0_mean_sum
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,...,False,120,2,709,4,2008,16,7.346896,12.0,12.0
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,...,False,135,2,712,4,2008,16,7.346896,12.0,12.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,DemandaNotEqualTheDifferenceOfVentaUniAndDev,weight,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,SPClACh0_mean_sum
52449630,8,1110,7,3301,15766,1212,4,33.52,0,0.0,...,False,120,2,709,4,2008,16,7.346896,12.0,12.0
52449631,8,1110,7,3301,15766,1216,5,41.900002,0,0.0,...,False,135,2,712,4,2008,16,7.346896,12.0,12.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,DemandaNotEqualTheDifferenceOfVentaUniAndDev,weight,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,SPClACh0_mean_sum
62643206,9,1110,7,3301,15766,1212,1,8.38,0,0.0,...,False,120,2,709,4,2008,16,7.346896,12.0,12.0
62643207,9,1110,7,3301,15766,1238,2,19.66,0,0.0,...,False,140,2,630,4,2008,16,7.346896,12.0,12.0


Count of missing numbers after SPClACh0_mean_sum in validation part 1 in column Lag0 is 1914973
Count of missing numbers after SPClACh0_mean_sum in validation part 2 in column Lag0 is 2576300
SPClR0_mean_sum is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,SPClR0_mean_sum
Producto_ID,Cliente_ID,Ruta_SAK,Unnamed: 3_level_1
41,146030,3303,130.0
41,681747,3306,8000.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,DemandaNotEqualTheDifferenceOfVentaUniAndDev,weight,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,SPClR0_mean_sum
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,...,False,120,2,709,4,2008,16,7.346896,12.0,12.0
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,...,False,135,2,712,4,2008,16,7.346896,12.0,12.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,DemandaNotEqualTheDifferenceOfVentaUniAndDev,weight,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,SPClR0_mean_sum
52449630,8,1110,7,3301,15766,1212,4,33.52,0,0.0,...,False,120,2,709,4,2008,16,7.346896,12.0,12.0
52449631,8,1110,7,3301,15766,1216,5,41.900002,0,0.0,...,False,135,2,712,4,2008,16,7.346896,12.0,12.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,DemandaNotEqualTheDifferenceOfVentaUniAndDev,weight,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,SPClR0_mean_sum
62643206,9,1110,7,3301,15766,1212,1,8.38,0,0.0,...,False,120,2,709,4,2008,16,7.346896,12.0,12.0
62643207,9,1110,7,3301,15766,1238,2,19.66,0,0.0,...,False,140,2,630,4,2008,16,7.346896,12.0,12.0


Count of missing numbers after SPClR0_mean_sum in validation part 1 in column Lag0 is 1914973
Count of missing numbers after SPClR0_mean_sum in validation part 2 in column Lag0 is 2576300
SPClA0_mean_sum is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,SPClA0_mean_sum
Producto_ID,Cliente_ID,Agencia_ID,Unnamed: 3_level_1
41,146030,2281,130.0
41,681747,2281,8000.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,DemandaNotEqualTheDifferenceOfVentaUniAndDev,weight,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,SPClA0_mean_sum
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,...,False,120,2,709,4,2008,16,7.346896,12.0,12.0
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,...,False,135,2,712,4,2008,16,7.346896,12.0,12.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,DemandaNotEqualTheDifferenceOfVentaUniAndDev,weight,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,SPClA0_mean_sum
52449630,8,1110,7,3301,15766,1212,4,33.52,0,0.0,...,False,120,2,709,4,2008,16,7.346896,12.0,12.0
52449631,8,1110,7,3301,15766,1216,5,41.900002,0,0.0,...,False,135,2,712,4,2008,16,7.346896,12.0,12.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,DemandaNotEqualTheDifferenceOfVentaUniAndDev,weight,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,SPClA0_mean_sum
62643206,9,1110,7,3301,15766,1212,1,8.38,0,0.0,...,False,120,2,709,4,2008,16,7.346896,12.0,12.0
62643207,9,1110,7,3301,15766,1238,2,19.66,0,0.0,...,False,140,2,630,4,2008,16,7.346896,12.0,12.0


Count of missing numbers after SPClA0_mean_sum in validation part 1 in column Lag0 is 1914699
Count of missing numbers after SPClA0_mean_sum in validation part 2 in column Lag0 is 2571322
SPClCh0_mean_sum is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,SPClCh0_mean_sum
Producto_ID,Cliente_ID,Canal_ID,Unnamed: 3_level_1
41,146030,7,130.0
41,681747,7,8000.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,DemandaNotEqualTheDifferenceOfVentaUniAndDev,weight,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,SPClCh0_mean_sum
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,...,False,120,2,709,4,2008,16,7.346896,12.0,12.0
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,...,False,135,2,712,4,2008,16,7.346896,12.0,12.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,DemandaNotEqualTheDifferenceOfVentaUniAndDev,weight,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,SPClCh0_mean_sum
52449630,8,1110,7,3301,15766,1212,4,33.52,0,0.0,...,False,120,2,709,4,2008,16,7.346896,12.0,12.0
52449631,8,1110,7,3301,15766,1216,5,41.900002,0,0.0,...,False,135,2,712,4,2008,16,7.346896,12.0,12.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,DemandaNotEqualTheDifferenceOfVentaUniAndDev,weight,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,SPClCh0_mean_sum
62643206,9,1110,7,3301,15766,1212,1,8.38,0,0.0,...,False,120,2,709,4,2008,16,7.346896,12.0,12.0
62643207,9,1110,7,3301,15766,1238,2,19.66,0,0.0,...,False,140,2,630,4,2008,16,7.346896,12.0,12.0


Count of missing numbers after SPClCh0_mean_sum in validation part 1 in column Lag0 is 1913356
Count of missing numbers after SPClCh0_mean_sum in validation part 2 in column Lag0 is 2569564
SPCl0_mean_sum is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,SPCl0_mean_sum
Producto_ID,Cliente_ID,Unnamed: 2_level_1
41,146030,130.0
41,681747,8000.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,DemandaNotEqualTheDifferenceOfVentaUniAndDev,weight,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,SPCl0_mean_sum
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,...,False,120,2,709,4,2008,16,7.346896,12.0,12.0
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,...,False,135,2,712,4,2008,16,7.346896,12.0,12.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,DemandaNotEqualTheDifferenceOfVentaUniAndDev,weight,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,SPCl0_mean_sum
52449630,8,1110,7,3301,15766,1212,4,33.52,0,0.0,...,False,120,2,709,4,2008,16,7.346896,12.0,12.0
52449631,8,1110,7,3301,15766,1216,5,41.900002,0,0.0,...,False,135,2,712,4,2008,16,7.346896,12.0,12.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,DemandaNotEqualTheDifferenceOfVentaUniAndDev,weight,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,SPCl0_mean_sum
62643206,9,1110,7,3301,15766,1212,1,8.38,0,0.0,...,False,120,2,709,4,2008,16,7.346896,12.0,12.0
62643207,9,1110,7,3301,15766,1238,2,19.66,0,0.0,...,False,140,2,630,4,2008,16,7.346896,12.0,12.0


Count of missing numbers after SPCl0_mean_sum in validation part 1 in column Lag0 is 1912970
Count of missing numbers after SPCl0_mean_sum in validation part 2 in column Lag0 is 2568823
SPR0_mean_sum is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,SPR0_mean_sum
Producto_ID,Ruta_SAK,Unnamed: 2_level_1
41,3201,182.5
41,3301,169.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,DemandaNotEqualTheDifferenceOfVentaUniAndDev,weight,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,SPR0_mean_sum
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,...,False,120,2,709,4,2008,16,7.346896,12.0,37.098756
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,...,False,135,2,712,4,2008,16,7.346896,12.0,42.161191


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,DemandaNotEqualTheDifferenceOfVentaUniAndDev,weight,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,SPR0_mean_sum
52449630,8,1110,7,3301,15766,1212,4,33.52,0,0.0,...,False,120,2,709,4,2008,16,7.346896,12.0,37.098756
52449631,8,1110,7,3301,15766,1216,5,41.900002,0,0.0,...,False,135,2,712,4,2008,16,7.346896,12.0,42.161191


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,DemandaNotEqualTheDifferenceOfVentaUniAndDev,weight,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,SPR0_mean_sum
62643206,9,1110,7,3301,15766,1212,1,8.38,0,0.0,...,False,120,2,709,4,2008,16,7.346896,12.0,37.098756
62643207,9,1110,7,3301,15766,1238,2,19.66,0,0.0,...,False,140,2,630,4,2008,16,7.346896,12.0,31.820101


Count of missing numbers after SPR0_mean_sum in validation part 1 in column Lag0 is 37226
Count of missing numbers after SPR0_mean_sum in validation part 2 in column Lag0 is 98314
SPA0_mean_sum is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,SPA0_mean_sum
Producto_ID,Agencia_ID,Unnamed: 2_level_1
41,1958,95.0
41,2278,169.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,DemandaNotEqualTheDifferenceOfVentaUniAndDev,weight,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,SPA0_mean_sum
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,...,False,120,2,709,4,2008,16,7.346896,12.0,37.46859
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,...,False,135,2,712,4,2008,16,7.346896,12.0,21.722266


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,DemandaNotEqualTheDifferenceOfVentaUniAndDev,weight,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,SPA0_mean_sum
52449630,8,1110,7,3301,15766,1212,4,33.52,0,0.0,...,False,120,2,709,4,2008,16,7.346896,12.0,37.46859
52449631,8,1110,7,3301,15766,1216,5,41.900002,0,0.0,...,False,135,2,712,4,2008,16,7.346896,12.0,21.722266


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,DemandaNotEqualTheDifferenceOfVentaUniAndDev,weight,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,SPA0_mean_sum
62643206,9,1110,7,3301,15766,1212,1,8.38,0,0.0,...,False,120,2,709,4,2008,16,7.346896,12.0,37.46859
62643207,9,1110,7,3301,15766,1238,2,19.66,0,0.0,...,False,140,2,630,4,2008,16,7.346896,12.0,31.304783


Count of missing numbers after SPA0_mean_sum in validation part 1 in column Lag0 is 21985
Count of missing numbers after SPA0_mean_sum in validation part 2 in column Lag0 is 73361
SPCh0_mean_sum is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,SPCh0_mean_sum
Producto_ID,Canal_ID,Unnamed: 2_level_1
41,7,2195.919048
53,4,2311.294372


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,DemandaNotEqualTheDifferenceOfVentaUniAndDev,weight,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,SPCh0_mean_sum
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,...,False,120,2,709,4,2008,16,7.346896,12.0,30.758061
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,...,False,135,2,712,4,2008,16,7.346896,12.0,31.259644


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,DemandaNotEqualTheDifferenceOfVentaUniAndDev,weight,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,SPCh0_mean_sum
52449630,8,1110,7,3301,15766,1212,4,33.52,0,0.0,...,False,120,2,709,4,2008,16,7.346896,12.0,30.758061
52449631,8,1110,7,3301,15766,1216,5,41.900002,0,0.0,...,False,135,2,712,4,2008,16,7.346896,12.0,31.259644


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,DemandaNotEqualTheDifferenceOfVentaUniAndDev,weight,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,SPCh0_mean_sum
62643206,9,1110,7,3301,15766,1212,1,8.38,0,0.0,...,False,120,2,709,4,2008,16,7.346896,12.0,30.758061
62643207,9,1110,7,3301,15766,1238,2,19.66,0,0.0,...,False,140,2,630,4,2008,16,7.346896,12.0,34.370547


Count of missing numbers after SPCh0_mean_sum in validation part 1 in column Lag0 is 1088
Count of missing numbers after SPCh0_mean_sum in validation part 2 in column Lag0 is 8203
SPT0_mean_sum is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,SPT0_mean_sum
Producto_ID,Town_ID,Unnamed: 2_level_1
41,2288,95.0
41,2381,3106.433333


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,DemandaNotEqualTheDifferenceOfVentaUniAndDev,weight,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,SPT0_mean_sum
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,...,False,120,2,709,4,2008,16,7.346896,12.0,33.467569
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,...,False,135,2,712,4,2008,16,7.346896,12.0,24.022339


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,DemandaNotEqualTheDifferenceOfVentaUniAndDev,weight,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,SPT0_mean_sum
52449630,8,1110,7,3301,15766,1212,4,33.52,0,0.0,...,False,120,2,709,4,2008,16,7.346896,12.0,33.467569
52449631,8,1110,7,3301,15766,1216,5,41.900002,0,0.0,...,False,135,2,712,4,2008,16,7.346896,12.0,24.022339


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,DemandaNotEqualTheDifferenceOfVentaUniAndDev,weight,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,SPT0_mean_sum
62643206,9,1110,7,3301,15766,1212,1,8.38,0,0.0,...,False,120,2,709,4,2008,16,7.346896,12.0,33.467569
62643207,9,1110,7,3301,15766,1238,2,19.66,0,0.0,...,False,140,2,630,4,2008,16,7.346896,12.0,33.85191


Count of missing numbers after SPT0_mean_sum in validation part 1 in column Lag0 is 1046
Count of missing numbers after SPT0_mean_sum in validation part 2 in column Lag0 is 8077
SPSt0_mean_sum is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,SPSt0_mean_sum
Producto_ID,State_ID,Unnamed: 2_level_1
41,22,2422.677381
41,25,95.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,DemandaNotEqualTheDifferenceOfVentaUniAndDev,weight,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,SPSt0_mean_sum
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,...,False,120,2,709,4,2008,16,7.346896,12.0,20.568611
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,...,False,135,2,712,4,2008,16,7.346896,12.0,16.521185


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,DemandaNotEqualTheDifferenceOfVentaUniAndDev,weight,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,SPSt0_mean_sum
52449630,8,1110,7,3301,15766,1212,4,33.52,0,0.0,...,False,120,2,709,4,2008,16,7.346896,12.0,20.568611
52449631,8,1110,7,3301,15766,1216,5,41.900002,0,0.0,...,False,135,2,712,4,2008,16,7.346896,12.0,16.521185


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,DemandaNotEqualTheDifferenceOfVentaUniAndDev,weight,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,SPSt0_mean_sum
62643206,9,1110,7,3301,15766,1212,1,8.38,0,0.0,...,False,120,2,709,4,2008,16,7.346896,12.0,20.568611
62643207,9,1110,7,3301,15766,1238,2,19.66,0,0.0,...,False,140,2,630,4,2008,16,7.346896,12.0,26.367999


Count of missing numbers after SPSt0_mean_sum in validation part 1 in column Lag0 is 1027
Count of missing numbers after SPSt0_mean_sum in validation part 2 in column Lag0 is 7869
SP0_mean_sum is not in columns..


Unnamed: 0_level_0,SP0_mean_sum
Producto_ID,Unnamed: 1_level_1
41,2195.919048
53,2311.294372


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,DemandaNotEqualTheDifferenceOfVentaUniAndDev,weight,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,SP0_mean_sum
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,...,False,120,2,709,4,2008,16,7.346896,12.0,14.866323
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,...,False,135,2,712,4,2008,16,7.346896,12.0,15.886748


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,DemandaNotEqualTheDifferenceOfVentaUniAndDev,weight,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,SP0_mean_sum
52449630,8,1110,7,3301,15766,1212,4,33.52,0,0.0,...,False,120,2,709,4,2008,16,7.346896,12.0,14.866323
52449631,8,1110,7,3301,15766,1216,5,41.900002,0,0.0,...,False,135,2,712,4,2008,16,7.346896,12.0,15.886748


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,DemandaNotEqualTheDifferenceOfVentaUniAndDev,weight,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,SP0_mean_sum
62643206,9,1110,7,3301,15766,1212,1,8.38,0,0.0,...,False,120,2,709,4,2008,16,7.346896,12.0,14.866323
62643207,9,1110,7,3301,15766,1238,2,19.66,0,0.0,...,False,140,2,630,4,2008,16,7.346896,12.0,16.419275


Count of missing numbers after SP0_mean_sum in validation part 1 in column Lag0 is 953
Count of missing numbers after SP0_mean_sum in validation part 2 in column Lag0 is 7584


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,DemandaNotEqualTheDifferenceOfVentaUniAndDev,weight,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,3,False,120,2,709,4,2008,16,7.346896,12.0
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,4,False,135,2,712,4,2008,16,7.346896,12.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,DemandaNotEqualTheDifferenceOfVentaUniAndDev,weight,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0
52449630,8,1110,7,3301,15766,1212,4,33.52,0,0.0,4,False,120,2,709,4,2008,16,7.346896,12.0
52449631,8,1110,7,3301,15766,1216,5,41.900002,0,0.0,5,False,135,2,712,4,2008,16,7.346896,12.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,DemandaNotEqualTheDifferenceOfVentaUniAndDev,weight,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0
62643206,9,1110,7,3301,15766,1212,1,8.38,0,0.0,1,False,120,2,709,4,2008,16,7.346896,12.0
62643207,9,1110,7,3301,15766,1238,2,19.66,0,0.0,2,False,140,2,630,4,2008,16,7.346896,12.0


## Inspect Grouping Results

In [10]:
print FE.train["Lag0"].isnull().sum()
print FE.test1["Lag0"].isnull().sum()
print FE.test2["Lag0"].isnull().sum()

0
953
7584


In [13]:
FE.test1.loc[:,"Lag0Averaged"] = np.round(FE.test1.Lag0.values/5)
FE.test2.loc[:,"Lag0Averaged"] = np.round(FE.test2.Lag0.values/5)

In [11]:
FE.train.loc[:,"Lag0Averaged"] = np.round(FE.train.Lag0.values/5)

In [14]:
print FE.train["Lag0Averaged"].isnull().sum()
print FE.test1["Lag0Averaged"].isnull().sum()
print FE.test2["Lag0Averaged"].isnull().sum()

0
953
7584


In [15]:
display(FE.train.head(3))
display(FE.test1.head(3))
display(FE.test2.head(3))

Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,DemandaNotEqualTheDifferenceOfVentaUniAndDev,weight,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,...,False,120,2,709,4,2008,16,7.346896,12.0,2.0
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,...,False,135,2,712,4,2008,16,7.346896,12.0,2.0
2,3,1110,7,3301,15766,1238,4,39.32,0,0.0,...,False,140,2,630,4,2008,16,7.346896,12.0,2.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,DemandaNotEqualTheDifferenceOfVentaUniAndDev,weight,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged
52449630,8,1110,7,3301,15766,1212,4,33.52,0,0.0,...,False,120,2,709,4,2008,16,7.346896,12.0,2.0
52449631,8,1110,7,3301,15766,1216,5,41.900002,0,0.0,...,False,135,2,712,4,2008,16,7.346896,12.0,2.0
52449632,8,1110,7,3301,15766,1220,1,7.64,0,0.0,...,False,130,6,225,4,2008,16,7.346896,3.0,1.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,DemandaNotEqualTheDifferenceOfVentaUniAndDev,weight,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged
62643206,9,1110,7,3301,15766,1212,1,8.38,0,0.0,...,False,120,2,709,4,2008,16,7.346896,12.0,2.0
62643207,9,1110,7,3301,15766,1238,2,19.66,0,0.0,...,False,140,2,630,4,2008,16,7.346896,12.0,2.0
62643208,9,1110,7,3301,15766,1240,2,16.76,0,0.0,...,False,125,4,480,4,2008,16,7.346896,19.0,4.0


## Results before General Mean

In [26]:
FE.train.loc[:,"Demanda_uni_equilLogged"] = np.log1p (FE.train["Demanda_uni_equil"].values)
FE.train.loc[:,"Lag0AveragedLogged"] = np.log1p (FE.train["Lag0Averaged"].values)
FE.test1.loc[:,"Demanda_uni_equilLogged"] = np.log1p (FE.test1["Demanda_uni_equil"].values)
FE.test1.loc[:,"Lag0AveragedLogged"] = np.log1p (FE.test1["Lag0Averaged"].values)
FE.test2.loc[:,"Demanda_uni_equilLogged"] = np.log1p (FE.test2["Demanda_uni_equil"].values)
FE.test2.loc[:,"Lag0AveragedLogged"] = np.log1p (FE.test2["Lag0Averaged"].values)

In [27]:
print ('RMSLE Score:', rmse(FE.train.Demanda_uni_equilLogged, FE.train.Lag0AveragedLogged))
print ('RMSLE Score:', rmse(FE.test1.Demanda_uni_equilLogged, FE.test1.Lag0AveragedLogged))
print ('RMSLE Score:', rmse(FE.test2.Demanda_uni_equilLogged, FE.test2.Lag0AveragedLogged))

('RMSLE Score:', 0.58242946466416612)
('RMSLE Score:', 0.69642200985311431)
('RMSLE Score:', 0.70616051608596642)


In [25]:
print ('RMSLE Score:', rmsle(FE.train.Demanda_uni_equil, FE.train.Lag0Averaged))
print ('RMSLE Score:', rmsle(FE.test1.Demanda_uni_equil, FE.test1.Lag0Averaged))
print ('RMSLE Score:', rmsle(FE.test2.Demanda_uni_equil, FE.test2.Lag0Averaged))

('RMSLE Score:', 0.58242946466416612)
('RMSLE Score:', 0.69642200985311409)
('RMSLE Score:', 0.70616051608596642)


In [23]:
configLag0Target1DeleteColumnsFalse = ConfigElements(0,[ 
                                                       ("DemandaGeneralMean",["Semana",
                                                              "Producto_ID"],
                                                          ["mean","sum"])
                                                       
                                                       
                                                       ], "Lag0Averaged", False)
print  configLag0Target1DeleteColumnsFalse.__dict__

{'nameAndGroups': [('DemandaGeneralMean', ['Semana', 'Producto_ID'], ['mean', 'sum'])], 'lag': 0, 'targetVariable': 'Lag0Averaged', 'deleteColumns': False}


In [24]:
FE.AddConfigurableFeaturesToTrain(configLag0Target1DeleteColumnsFalse)

DemandaGeneralMean is in columns..


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,...,2,709,4,2008,16,7.346896,12.0,2.0,1.386294,1.098612
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,...,2,712,4,2008,16,7.346896,12.0,2.0,1.609438,1.098612


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged
52449630,8,1110,7,3301,15766,1212,4,33.52,0,0.0,...,2,709,4,2008,16,7.346896,12.0,2.0,1.609438,1.098612
52449631,8,1110,7,3301,15766,1216,5,41.900002,0,0.0,...,2,712,4,2008,16,7.346896,12.0,2.0,1.791759,1.098612


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged
62643206,9,1110,7,3301,15766,1212,1,8.38,0,0.0,...,2,709,4,2008,16,7.346896,12.0,2.0,0.693147,1.098612
62643207,9,1110,7,3301,15766,1238,2,19.66,0,0.0,...,2,630,4,2008,16,7.346896,12.0,2.0,1.098612,1.098612


Count of missing numbers after DemandaGeneralMean in validation part 1 in column Lag0Averaged is 0
Count of missing numbers after DemandaGeneralMean in validation part 2 in column Lag0Averaged is 0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,...,2,709,4,2008,16,7.346896,12.0,2.0,1.386294,1.098612
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,...,2,712,4,2008,16,7.346896,12.0,2.0,1.609438,1.098612


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged
52449630,8,1110,7,3301,15766,1212,4,33.52,0,0.0,...,2,709,4,2008,16,7.346896,12.0,2.0,1.609438,1.098612
52449631,8,1110,7,3301,15766,1216,5,41.900002,0,0.0,...,2,712,4,2008,16,7.346896,12.0,2.0,1.791759,1.098612


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged
62643206,9,1110,7,3301,15766,1212,1,8.38,0,0.0,...,2,709,4,2008,16,7.346896,12.0,2.0,0.693147,1.098612
62643207,9,1110,7,3301,15766,1238,2,19.66,0,0.0,...,2,630,4,2008,16,7.346896,12.0,2.0,1.098612,1.098612


In [28]:
configLag0Target1DeleteColumnsFalse = ConfigElements(1,[ ("SPClRACh0_mean_sum",["Semana",
                                                              "Producto_ID",
                                                              "Cliente_ID",
                                                              "Ruta_SAK",
                                                              "Agencia_ID",
                                                              "Canal_ID"],
                                                          ["mean"]),
                                                        ("SPClRA0_mean_sum",["Semana",
                                                              "Producto_ID",
                                                              "Cliente_ID",
                                                              "Ruta_SAK",
                                                              "Agencia_ID"],
                                                          ["mean"]),
                                                        ("SPClRCh0_mean_sum",["Semana",
                                                              "Producto_ID",
                                                              "Cliente_ID",
                                                              "Ruta_SAK",
                                                              "Canal_ID"],
                                                          ["mean"]),
                                                        ("SPClACh0_mean_sum",["Semana",
                                                              "Producto_ID",
                                                              "Cliente_ID",
                                                              "Agencia_ID",
                                                              "Canal_ID"],
                                                          ["mean"]),
                                                        ("SPClR0_mean_sum",["Semana",
                                                              "Producto_ID",
                                                              "Cliente_ID",
                                                              "Ruta_SAK"],
                                                          ["mean"]),
                                                        ("SPClA0_mean_sum",["Semana",
                                                              "Producto_ID",
                                                              "Cliente_ID",
                                                              "Agencia_ID"],
                                                          ["mean"]),
                                                       ("SPClCh0_mean_sum",["Semana",
                                                              "Producto_ID",
                                                              "Cliente_ID",
                                                              "Canal_ID"],
                                                          ["mean"]),
                                                       ("SPCl0_mean_sum",["Semana",
                                                              "Producto_ID",
                                                              "Cliente_ID"],
                                                          ["mean"]),
                                                       ("SPR0_mean_sum",["Semana",
                                                              "Producto_ID",
                                                              "Ruta_SAK"],
                                                          ["mean"]),
                                                       ("SPA0_mean_sum",["Semana",
                                                              "Producto_ID",
                                                              "Agencia_ID"],
                                                          ["mean"]),
                                                       ("SPCh0_mean_sum",["Semana",
                                                              "Producto_ID",
                                                              "Canal_ID"],
                                                          ["mean"]),
                                                       ("SPT0_mean_sum",["Semana",
                                                              "Producto_ID",
                                                              "Town_ID"],
                                                          ["mean"]),
                                                       ("SPSt0_mean_sum",["Semana",
                                                              "Producto_ID",
                                                              "State_ID"],
                                                          ["mean"]),
                                                       ("SP0_mean_sum",["Semana",
                                                              "Producto_ID"],
                                                          ["mean"])
                                                       
                                                       
                                                       ], "Lag1", True)
print  configLag0Target1DeleteColumnsFalse.__dict__

{'nameAndGroups': [('SPClRACh0_mean_sum', ['Semana', 'Producto_ID', 'Cliente_ID', 'Ruta_SAK', 'Agencia_ID', 'Canal_ID'], ['mean']), ('SPClRA0_mean_sum', ['Semana', 'Producto_ID', 'Cliente_ID', 'Ruta_SAK', 'Agencia_ID'], ['mean']), ('SPClRCh0_mean_sum', ['Semana', 'Producto_ID', 'Cliente_ID', 'Ruta_SAK', 'Canal_ID'], ['mean']), ('SPClACh0_mean_sum', ['Semana', 'Producto_ID', 'Cliente_ID', 'Agencia_ID', 'Canal_ID'], ['mean']), ('SPClR0_mean_sum', ['Semana', 'Producto_ID', 'Cliente_ID', 'Ruta_SAK'], ['mean']), ('SPClA0_mean_sum', ['Semana', 'Producto_ID', 'Cliente_ID', 'Agencia_ID'], ['mean']), ('SPClCh0_mean_sum', ['Semana', 'Producto_ID', 'Cliente_ID', 'Canal_ID'], ['mean']), ('SPCl0_mean_sum', ['Semana', 'Producto_ID', 'Cliente_ID'], ['mean']), ('SPR0_mean_sum', ['Semana', 'Producto_ID', 'Ruta_SAK'], ['mean']), ('SPA0_mean_sum', ['Semana', 'Producto_ID', 'Agencia_ID'], ['mean']), ('SPCh0_mean_sum', ['Semana', 'Producto_ID', 'Canal_ID'], ['mean']), ('SPT0_mean_sum', ['Semana', 'Producto

In [29]:
FE.AddConfigurableFeaturesToTrain(configLag0Target1DeleteColumnsFalse)

SPClRACh0_mean_sum is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,SPClRACh0_mean_sum
Semana,Producto_ID,Cliente_ID,Ruta_SAK,Agencia_ID,Canal_ID,Unnamed: 6_level_1
4,41,681747,3306,2281,7,2064
4,41,684023,3303,2281,7,30


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,SPClRACh0_mean_sum
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.386294,1.098612,,
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.609438,1.098612,,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,SPClRACh0_mean_sum
52449630,8,1110,7,3301,15766,1212,4,33.52,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.609438,1.098612,,
52449631,8,1110,7,3301,15766,1216,5,41.900002,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.791759,1.098612,,2.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged
62643206,9,1110,7,3301,15766,1212,1,8.38,0,0.0,...,2,709,4,2008,16,7.346896,12.0,2.0,0.693147,1.098612
62643207,9,1110,7,3301,15766,1238,2,19.66,0,0.0,...,2,630,4,2008,16,7.346896,12.0,2.0,1.098612,1.098612


Count of missing numbers after SPClRACh0_mean_sum in validation part 1 in column Lag1 is 3418715
SPClRA0_mean_sum is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,SPClRA0_mean_sum
Semana,Producto_ID,Cliente_ID,Ruta_SAK,Agencia_ID,Unnamed: 5_level_1
4,41,681747,3306,2281,2064.0
4,41,684023,3303,2281,30.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,SPClRA0_mean_sum
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.386294,1.098612,,
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.609438,1.098612,,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,SPClRA0_mean_sum
52449630,8,1110,7,3301,15766,1212,4,33.52,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.609438,1.098612,,
52449631,8,1110,7,3301,15766,1216,5,41.900002,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.791759,1.098612,2.0,2.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged
62643206,9,1110,7,3301,15766,1212,1,8.38,0,0.0,...,2,709,4,2008,16,7.346896,12.0,2.0,0.693147,1.098612
62643207,9,1110,7,3301,15766,1238,2,19.66,0,0.0,...,2,630,4,2008,16,7.346896,12.0,2.0,1.098612,1.098612


Count of missing numbers after SPClRA0_mean_sum in validation part 1 in column Lag1 is 3418620
SPClRCh0_mean_sum is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,SPClRCh0_mean_sum
Semana,Producto_ID,Cliente_ID,Ruta_SAK,Canal_ID,Unnamed: 5_level_1
4,41,681747,3306,7,2064.0
4,41,684023,3303,7,30.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,SPClRCh0_mean_sum
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.386294,1.098612,,
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.609438,1.098612,,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,SPClRCh0_mean_sum
52449630,8,1110,7,3301,15766,1212,4,33.52,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.609438,1.098612,,
52449631,8,1110,7,3301,15766,1216,5,41.900002,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.791759,1.098612,2.0,2.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged
62643206,9,1110,7,3301,15766,1212,1,8.38,0,0.0,...,2,709,4,2008,16,7.346896,12.0,2.0,0.693147,1.098612
62643207,9,1110,7,3301,15766,1238,2,19.66,0,0.0,...,2,630,4,2008,16,7.346896,12.0,2.0,1.098612,1.098612


Count of missing numbers after SPClRCh0_mean_sum in validation part 1 in column Lag1 is 3417644
SPClACh0_mean_sum is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,SPClACh0_mean_sum
Semana,Producto_ID,Cliente_ID,Agencia_ID,Canal_ID,Unnamed: 5_level_1
4,41,681747,2281,7,2064.0
4,41,684023,2281,7,30.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,SPClACh0_mean_sum
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.386294,1.098612,,
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.609438,1.098612,,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,SPClACh0_mean_sum
52449630,8,1110,7,3301,15766,1212,4,33.52,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.609438,1.098612,,
52449631,8,1110,7,3301,15766,1216,5,41.900002,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.791759,1.098612,2.0,2.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged
62643206,9,1110,7,3301,15766,1212,1,8.38,0,0.0,...,2,709,4,2008,16,7.346896,12.0,2.0,0.693147,1.098612
62643207,9,1110,7,3301,15766,1238,2,19.66,0,0.0,...,2,630,4,2008,16,7.346896,12.0,2.0,1.098612,1.098612


Count of missing numbers after SPClACh0_mean_sum in validation part 1 in column Lag1 is 3412681
SPClR0_mean_sum is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,SPClR0_mean_sum
Semana,Producto_ID,Cliente_ID,Ruta_SAK,Unnamed: 4_level_1
4,41,681747,3306,2064.0
4,41,684023,3303,30.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,SPClR0_mean_sum
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.386294,1.098612,,
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.609438,1.098612,,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,SPClR0_mean_sum
52449630,8,1110,7,3301,15766,1212,4,33.52,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.609438,1.098612,,
52449631,8,1110,7,3301,15766,1216,5,41.900002,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.791759,1.098612,2.0,2.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged
62643206,9,1110,7,3301,15766,1212,1,8.38,0,0.0,...,2,709,4,2008,16,7.346896,12.0,2.0,0.693147,1.098612
62643207,9,1110,7,3301,15766,1238,2,19.66,0,0.0,...,2,630,4,2008,16,7.346896,12.0,2.0,1.098612,1.098612


Count of missing numbers after SPClR0_mean_sum in validation part 1 in column Lag1 is 3412681
SPClA0_mean_sum is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,SPClA0_mean_sum
Semana,Producto_ID,Cliente_ID,Agencia_ID,Unnamed: 4_level_1
4,41,681747,2281,2064.0
4,41,684023,2281,30.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,SPClA0_mean_sum
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.386294,1.098612,,
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.609438,1.098612,,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,SPClA0_mean_sum
52449630,8,1110,7,3301,15766,1212,4,33.52,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.609438,1.098612,,
52449631,8,1110,7,3301,15766,1216,5,41.900002,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.791759,1.098612,2.0,2.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged
62643206,9,1110,7,3301,15766,1212,1,8.38,0,0.0,...,2,709,4,2008,16,7.346896,12.0,2.0,0.693147,1.098612
62643207,9,1110,7,3301,15766,1238,2,19.66,0,0.0,...,2,630,4,2008,16,7.346896,12.0,2.0,1.098612,1.098612


Count of missing numbers after SPClA0_mean_sum in validation part 1 in column Lag1 is 3412651
SPClCh0_mean_sum is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,SPClCh0_mean_sum
Semana,Producto_ID,Cliente_ID,Canal_ID,Unnamed: 4_level_1
4,41,681747,7,2064.0
4,41,684023,7,30.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,SPClCh0_mean_sum
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.386294,1.098612,,
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.609438,1.098612,,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,SPClCh0_mean_sum
52449630,8,1110,7,3301,15766,1212,4,33.52,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.609438,1.098612,,
52449631,8,1110,7,3301,15766,1216,5,41.900002,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.791759,1.098612,2.0,2.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged
62643206,9,1110,7,3301,15766,1212,1,8.38,0,0.0,...,2,709,4,2008,16,7.346896,12.0,2.0,0.693147,1.098612
62643207,9,1110,7,3301,15766,1238,2,19.66,0,0.0,...,2,630,4,2008,16,7.346896,12.0,2.0,1.098612,1.098612


Count of missing numbers after SPClCh0_mean_sum in validation part 1 in column Lag1 is 3411863
SPCl0_mean_sum is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,SPCl0_mean_sum
Semana,Producto_ID,Cliente_ID,Unnamed: 3_level_1
4,41,681747,2064.0
4,41,684023,30.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,SPCl0_mean_sum
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.386294,1.098612,,
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.609438,1.098612,,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,SPCl0_mean_sum
52449630,8,1110,7,3301,15766,1212,4,33.52,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.609438,1.098612,,
52449631,8,1110,7,3301,15766,1216,5,41.900002,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.791759,1.098612,2.0,2.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged
62643206,9,1110,7,3301,15766,1212,1,8.38,0,0.0,...,2,709,4,2008,16,7.346896,12.0,2.0,0.693147,1.098612
62643207,9,1110,7,3301,15766,1238,2,19.66,0,0.0,...,2,630,4,2008,16,7.346896,12.0,2.0,1.098612,1.098612


Count of missing numbers after SPCl0_mean_sum in validation part 1 in column Lag1 is 3411597
SPR0_mean_sum is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,SPR0_mean_sum
Semana,Producto_ID,Ruta_SAK,Unnamed: 3_level_1
4,41,3201,107.5
4,41,3303,30.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,SPR0_mean_sum
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.386294,1.098612,,
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.609438,1.098612,,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,SPR0_mean_sum
52449630,8,1110,7,3301,15766,1212,4,33.52,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.609438,1.098612,,5.864407
52449631,8,1110,7,3301,15766,1216,5,41.900002,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.791759,1.098612,2.0,7.207547


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged
62643206,9,1110,7,3301,15766,1212,1,8.38,0,0.0,...,2,709,4,2008,16,7.346896,12.0,2.0,0.693147,1.098612
62643207,9,1110,7,3301,15766,1238,2,19.66,0,0.0,...,2,630,4,2008,16,7.346896,12.0,2.0,1.098612,1.098612


Count of missing numbers after SPR0_mean_sum in validation part 1 in column Lag1 is 61835
SPA0_mean_sum is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,SPA0_mean_sum
Semana,Producto_ID,Agencia_ID,Unnamed: 3_level_1
4,41,2281,861.5
4,41,23879,107.5


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,SPA0_mean_sum
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.386294,1.098612,,
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.609438,1.098612,,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,SPA0_mean_sum
52449630,8,1110,7,3301,15766,1212,4,33.52,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.609438,1.098612,5.864407,6.936416
52449631,8,1110,7,3301,15766,1216,5,41.900002,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.791759,1.098612,2.0,4.245455


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged
62643206,9,1110,7,3301,15766,1212,1,8.38,0,0.0,...,2,709,4,2008,16,7.346896,12.0,2.0,0.693147,1.098612
62643207,9,1110,7,3301,15766,1238,2,19.66,0,0.0,...,2,630,4,2008,16,7.346896,12.0,2.0,1.098612,1.098612


Count of missing numbers after SPA0_mean_sum in validation part 1 in column Lag1 is 27329
SPCh0_mean_sum is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,SPCh0_mean_sum
Semana,Producto_ID,Canal_ID,Unnamed: 3_level_1
4,41,7,610.166667
4,53,4,487.333333


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,SPCh0_mean_sum
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.386294,1.098612,,
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.609438,1.098612,,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,SPCh0_mean_sum
52449630,8,1110,7,3301,15766,1212,4,33.52,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.609438,1.098612,5.864407,5.85489
52449631,8,1110,7,3301,15766,1216,5,41.900002,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.791759,1.098612,2.0,6.201258


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged
62643206,9,1110,7,3301,15766,1212,1,8.38,0,0.0,...,2,709,4,2008,16,7.346896,12.0,2.0,0.693147,1.098612
62643207,9,1110,7,3301,15766,1238,2,19.66,0,0.0,...,2,630,4,2008,16,7.346896,12.0,2.0,1.098612,1.098612


Count of missing numbers after SPCh0_mean_sum in validation part 1 in column Lag1 is 1244
SPT0_mean_sum is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,SPT0_mean_sum
Semana,Producto_ID,Town_ID,Unnamed: 3_level_1
4,41,2381,861.5
4,41,2387,107.5


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,SPT0_mean_sum
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.386294,1.098612,,
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.609438,1.098612,,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,SPT0_mean_sum
52449630,8,1110,7,3301,15766,1212,4,33.52,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.609438,1.098612,5.864407,6.338747
52449631,8,1110,7,3301,15766,1216,5,41.900002,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.791759,1.098612,2.0,4.550162


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged
62643206,9,1110,7,3301,15766,1212,1,8.38,0,0.0,...,2,709,4,2008,16,7.346896,12.0,2.0,0.693147,1.098612
62643207,9,1110,7,3301,15766,1238,2,19.66,0,0.0,...,2,630,4,2008,16,7.346896,12.0,2.0,1.098612,1.098612


Count of missing numbers after SPT0_mean_sum in validation part 1 in column Lag1 is 1182
SPSt0_mean_sum is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,SPSt0_mean_sum
Semana,Producto_ID,State_ID,Unnamed: 3_level_1
4,41,22,610.166667
4,53,20,653.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,SPSt0_mean_sum
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.386294,1.098612,,
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.609438,1.098612,,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,SPSt0_mean_sum
52449630,8,1110,7,3301,15766,1212,4,33.52,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.609438,1.098612,5.864407,3.946143
52449631,8,1110,7,3301,15766,1216,5,41.900002,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.791759,1.098612,2.0,3.208723


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged
62643206,9,1110,7,3301,15766,1212,1,8.38,0,0.0,...,2,709,4,2008,16,7.346896,12.0,2.0,0.693147,1.098612
62643207,9,1110,7,3301,15766,1238,2,19.66,0,0.0,...,2,630,4,2008,16,7.346896,12.0,2.0,1.098612,1.098612


Count of missing numbers after SPSt0_mean_sum in validation part 1 in column Lag1 is 1131
SP0_mean_sum is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,SP0_mean_sum
Semana,Producto_ID,Unnamed: 2_level_1
4,41,610.166667
4,53,487.333333


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,SP0_mean_sum
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.386294,1.098612,,
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.609438,1.098612,,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,SP0_mean_sum
52449630,8,1110,7,3301,15766,1212,4,33.52,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.609438,1.098612,5.864407,2.882253
52449631,8,1110,7,3301,15766,1216,5,41.900002,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.791759,1.098612,2.0,3.085901


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged
62643206,9,1110,7,3301,15766,1212,1,8.38,0,0.0,...,2,709,4,2008,16,7.346896,12.0,2.0,0.693147,1.098612
62643207,9,1110,7,3301,15766,1238,2,19.66,0,0.0,...,2,630,4,2008,16,7.346896,12.0,2.0,1.098612,1.098612


Count of missing numbers after SP0_mean_sum in validation part 1 in column Lag1 is 1007


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,...,709,4,2008,16,7.346896,12.0,2.0,1.386294,1.098612,
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,...,712,4,2008,16,7.346896,12.0,2.0,1.609438,1.098612,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1
52449630,8,1110,7,3301,15766,1212,4,33.52,0,0.0,...,709,4,2008,16,7.346896,12.0,2.0,1.609438,1.098612,5.864407
52449631,8,1110,7,3301,15766,1216,5,41.900002,0,0.0,...,712,4,2008,16,7.346896,12.0,2.0,1.791759,1.098612,2.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,pieces,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged
62643206,9,1110,7,3301,15766,1212,1,8.38,0,0.0,...,2,709,4,2008,16,7.346896,12.0,2.0,0.693147,1.098612
62643207,9,1110,7,3301,15766,1238,2,19.66,0,0.0,...,2,630,4,2008,16,7.346896,12.0,2.0,1.098612,1.098612


In [30]:
configLag0Target1DeleteColumnsFalse = ConfigElements(2,[ ("SPClRACh0_mean_sum",["Semana",
                                                              "Producto_ID",
                                                              "Cliente_ID",
                                                              "Ruta_SAK",
                                                              "Agencia_ID",
                                                              "Canal_ID"],
                                                          ["mean"]),
                                                        ("SPClRA0_mean_sum",["Semana",
                                                              "Producto_ID",
                                                              "Cliente_ID",
                                                              "Ruta_SAK",
                                                              "Agencia_ID"],
                                                          ["mean"]),
                                                        ("SPClRCh0_mean_sum",["Semana",
                                                              "Producto_ID",
                                                              "Cliente_ID",
                                                              "Ruta_SAK",
                                                              "Canal_ID"],
                                                          ["mean"]),
                                                        ("SPClACh0_mean_sum",["Semana",
                                                              "Producto_ID",
                                                              "Cliente_ID",
                                                              "Agencia_ID",
                                                              "Canal_ID"],
                                                          ["mean"]),
                                                        ("SPClR0_mean_sum",["Semana",
                                                              "Producto_ID",
                                                              "Cliente_ID",
                                                              "Ruta_SAK"],
                                                          ["mean"]),
                                                        ("SPClA0_mean_sum",["Semana",
                                                              "Producto_ID",
                                                              "Cliente_ID",
                                                              "Agencia_ID"],
                                                          ["mean"]),
                                                       ("SPClCh0_mean_sum",["Semana",
                                                              "Producto_ID",
                                                              "Cliente_ID",
                                                              "Canal_ID"],
                                                          ["mean"]),
                                                       ("SPCl0_mean_sum",["Semana",
                                                              "Producto_ID",
                                                              "Cliente_ID"],
                                                          ["mean"]),
                                                       ("SPR0_mean_sum",["Semana",
                                                              "Producto_ID",
                                                              "Ruta_SAK"],
                                                          ["mean"]),
                                                       ("SPA0_mean_sum",["Semana",
                                                              "Producto_ID",
                                                              "Agencia_ID"],
                                                          ["mean"]),
                                                       ("SPCh0_mean_sum",["Semana",
                                                              "Producto_ID",
                                                              "Canal_ID"],
                                                          ["mean"]),
                                                       ("SPT0_mean_sum",["Semana",
                                                              "Producto_ID",
                                                              "Town_ID"],
                                                          ["mean"]),
                                                       ("SPSt0_mean_sum",["Semana",
                                                              "Producto_ID",
                                                              "State_ID"],
                                                          ["mean"]),
                                                       ("SP0_mean_sum",["Semana",
                                                              "Producto_ID"],
                                                          ["mean"])
                                                       
                                                       
                                                       ], "Lag2", True)
print  configLag0Target1DeleteColumnsFalse.__dict__

{'nameAndGroups': [('SPClRACh0_mean_sum', ['Semana', 'Producto_ID', 'Cliente_ID', 'Ruta_SAK', 'Agencia_ID', 'Canal_ID'], ['mean']), ('SPClRA0_mean_sum', ['Semana', 'Producto_ID', 'Cliente_ID', 'Ruta_SAK', 'Agencia_ID'], ['mean']), ('SPClRCh0_mean_sum', ['Semana', 'Producto_ID', 'Cliente_ID', 'Ruta_SAK', 'Canal_ID'], ['mean']), ('SPClACh0_mean_sum', ['Semana', 'Producto_ID', 'Cliente_ID', 'Agencia_ID', 'Canal_ID'], ['mean']), ('SPClR0_mean_sum', ['Semana', 'Producto_ID', 'Cliente_ID', 'Ruta_SAK'], ['mean']), ('SPClA0_mean_sum', ['Semana', 'Producto_ID', 'Cliente_ID', 'Agencia_ID'], ['mean']), ('SPClCh0_mean_sum', ['Semana', 'Producto_ID', 'Cliente_ID', 'Canal_ID'], ['mean']), ('SPCl0_mean_sum', ['Semana', 'Producto_ID', 'Cliente_ID'], ['mean']), ('SPR0_mean_sum', ['Semana', 'Producto_ID', 'Ruta_SAK'], ['mean']), ('SPA0_mean_sum', ['Semana', 'Producto_ID', 'Agencia_ID'], ['mean']), ('SPCh0_mean_sum', ['Semana', 'Producto_ID', 'Canal_ID'], ['mean']), ('SPT0_mean_sum', ['Semana', 'Producto

In [31]:
FE.AddConfigurableFeaturesToTrain(configLag0Target1DeleteColumnsFalse)

SPClRACh0_mean_sum is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,SPClRACh0_mean_sum
Semana,Producto_ID,Cliente_ID,Ruta_SAK,Agencia_ID,Canal_ID,Unnamed: 6_level_1
5,41,681747,3306,2281,7,2064
5,41,684023,3303,2281,7,30


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,Lag2,SPClRACh0_mean_sum
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,...,2008,16,7.346896,12.0,2.0,1.386294,1.098612,,,
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,...,2008,16,7.346896,12.0,2.0,1.609438,1.098612,,,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,Lag2,SPClRACh0_mean_sum
52449630,8,1110,7,3301,15766,1212,4,33.52,0,0.0,...,2008,16,7.346896,12.0,2.0,1.609438,1.098612,5.864407,,
52449631,8,1110,7,3301,15766,1216,5,41.900002,0,0.0,...,2008,16,7.346896,12.0,2.0,1.791759,1.098612,2.0,,1.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag2,SPClRACh0_mean_sum
62643206,9,1110,7,3301,15766,1212,1,8.38,0,0.0,...,4,2008,16,7.346896,12.0,2.0,0.693147,1.098612,,
62643207,9,1110,7,3301,15766,1238,2,19.66,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.098612,1.098612,,2.0


Count of missing numbers after SPClRACh0_mean_sum in validation part 1 in column Lag2 is 5256798
Count of missing numbers after SPClRACh0_mean_sum in validation part 2 in column Lag2 is 5077609
SPClRA0_mean_sum is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,SPClRA0_mean_sum
Semana,Producto_ID,Cliente_ID,Ruta_SAK,Agencia_ID,Unnamed: 5_level_1
5,41,681747,3306,2281,2064.0
5,41,684023,3303,2281,30.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,Lag2,SPClRA0_mean_sum
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,...,2008,16,7.346896,12.0,2.0,1.386294,1.098612,,,
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,...,2008,16,7.346896,12.0,2.0,1.609438,1.098612,,,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,Lag2,SPClRA0_mean_sum
52449630,8,1110,7,3301,15766,1212,4,33.52,0,0.0,...,2008,16,7.346896,12.0,2.0,1.609438,1.098612,5.864407,,
52449631,8,1110,7,3301,15766,1216,5,41.900002,0,0.0,...,2008,16,7.346896,12.0,2.0,1.791759,1.098612,2.0,1.0,1.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag2,SPClRA0_mean_sum
62643206,9,1110,7,3301,15766,1212,1,8.38,0,0.0,...,4,2008,16,7.346896,12.0,2.0,0.693147,1.098612,,
62643207,9,1110,7,3301,15766,1238,2,19.66,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.098612,1.098612,2.0,2.0


Count of missing numbers after SPClRA0_mean_sum in validation part 1 in column Lag2 is 5256439
Count of missing numbers after SPClRA0_mean_sum in validation part 2 in column Lag2 is 5076875
SPClRCh0_mean_sum is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,SPClRCh0_mean_sum
Semana,Producto_ID,Cliente_ID,Ruta_SAK,Canal_ID,Unnamed: 5_level_1
5,41,681747,3306,7,2064.0
5,41,684023,3303,7,30.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,Lag2,SPClRCh0_mean_sum
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,...,2008,16,7.346896,12.0,2.0,1.386294,1.098612,,,
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,...,2008,16,7.346896,12.0,2.0,1.609438,1.098612,,,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,Lag2,SPClRCh0_mean_sum
52449630,8,1110,7,3301,15766,1212,4,33.52,0,0.0,...,2008,16,7.346896,12.0,2.0,1.609438,1.098612,5.864407,,
52449631,8,1110,7,3301,15766,1216,5,41.900002,0,0.0,...,2008,16,7.346896,12.0,2.0,1.791759,1.098612,2.0,1.0,1.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag2,SPClRCh0_mean_sum
62643206,9,1110,7,3301,15766,1212,1,8.38,0,0.0,...,4,2008,16,7.346896,12.0,2.0,0.693147,1.098612,,
62643207,9,1110,7,3301,15766,1238,2,19.66,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.098612,1.098612,2.0,2.0


Count of missing numbers after SPClRCh0_mean_sum in validation part 1 in column Lag2 is 5255260
Count of missing numbers after SPClRCh0_mean_sum in validation part 2 in column Lag2 is 5075582
SPClACh0_mean_sum is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,SPClACh0_mean_sum
Semana,Producto_ID,Cliente_ID,Agencia_ID,Canal_ID,Unnamed: 5_level_1
5,41,681747,2281,7,2064.0
5,41,684023,2281,7,30.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,Lag2,SPClACh0_mean_sum
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,...,2008,16,7.346896,12.0,2.0,1.386294,1.098612,,,
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,...,2008,16,7.346896,12.0,2.0,1.609438,1.098612,,,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,Lag2,SPClACh0_mean_sum
52449630,8,1110,7,3301,15766,1212,4,33.52,0,0.0,...,2008,16,7.346896,12.0,2.0,1.609438,1.098612,5.864407,,
52449631,8,1110,7,3301,15766,1216,5,41.900002,0,0.0,...,2008,16,7.346896,12.0,2.0,1.791759,1.098612,2.0,1.0,1.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag2,SPClACh0_mean_sum
62643206,9,1110,7,3301,15766,1212,1,8.38,0,0.0,...,4,2008,16,7.346896,12.0,2.0,0.693147,1.098612,,
62643207,9,1110,7,3301,15766,1238,2,19.66,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.098612,1.098612,2.0,2.0


Count of missing numbers after SPClACh0_mean_sum in validation part 1 in column Lag2 is 5235457
Count of missing numbers after SPClACh0_mean_sum in validation part 2 in column Lag2 is 5051628
SPClR0_mean_sum is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,SPClR0_mean_sum
Semana,Producto_ID,Cliente_ID,Ruta_SAK,Unnamed: 4_level_1
5,41,681747,3306,2064.0
5,41,684023,3303,30.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,Lag2,SPClR0_mean_sum
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,...,2008,16,7.346896,12.0,2.0,1.386294,1.098612,,,
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,...,2008,16,7.346896,12.0,2.0,1.609438,1.098612,,,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,Lag2,SPClR0_mean_sum
52449630,8,1110,7,3301,15766,1212,4,33.52,0,0.0,...,2008,16,7.346896,12.0,2.0,1.609438,1.098612,5.864407,,
52449631,8,1110,7,3301,15766,1216,5,41.900002,0,0.0,...,2008,16,7.346896,12.0,2.0,1.791759,1.098612,2.0,1.0,1.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag2,SPClR0_mean_sum
62643206,9,1110,7,3301,15766,1212,1,8.38,0,0.0,...,4,2008,16,7.346896,12.0,2.0,0.693147,1.098612,,
62643207,9,1110,7,3301,15766,1238,2,19.66,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.098612,1.098612,2.0,2.0


Count of missing numbers after SPClR0_mean_sum in validation part 1 in column Lag2 is 5235457
Count of missing numbers after SPClR0_mean_sum in validation part 2 in column Lag2 is 5051628
SPClA0_mean_sum is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,SPClA0_mean_sum
Semana,Producto_ID,Cliente_ID,Agencia_ID,Unnamed: 4_level_1
5,41,681747,2281,2064.0
5,41,684023,2281,30.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,Lag2,SPClA0_mean_sum
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,...,2008,16,7.346896,12.0,2.0,1.386294,1.098612,,,
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,...,2008,16,7.346896,12.0,2.0,1.609438,1.098612,,,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,Lag2,SPClA0_mean_sum
52449630,8,1110,7,3301,15766,1212,4,33.52,0,0.0,...,2008,16,7.346896,12.0,2.0,1.609438,1.098612,5.864407,,
52449631,8,1110,7,3301,15766,1216,5,41.900002,0,0.0,...,2008,16,7.346896,12.0,2.0,1.791759,1.098612,2.0,1.0,1.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag2,SPClA0_mean_sum
62643206,9,1110,7,3301,15766,1212,1,8.38,0,0.0,...,4,2008,16,7.346896,12.0,2.0,0.693147,1.098612,,
62643207,9,1110,7,3301,15766,1238,2,19.66,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.098612,1.098612,2.0,2.0


Count of missing numbers after SPClA0_mean_sum in validation part 1 in column Lag2 is 5235283
Count of missing numbers after SPClA0_mean_sum in validation part 2 in column Lag2 is 5048055
SPClCh0_mean_sum is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,SPClCh0_mean_sum
Semana,Producto_ID,Cliente_ID,Canal_ID,Unnamed: 4_level_1
5,41,681747,7,2064.0
5,41,684023,7,30.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,Lag2,SPClCh0_mean_sum
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,...,2008,16,7.346896,12.0,2.0,1.386294,1.098612,,,
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,...,2008,16,7.346896,12.0,2.0,1.609438,1.098612,,,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,Lag2,SPClCh0_mean_sum
52449630,8,1110,7,3301,15766,1212,4,33.52,0,0.0,...,2008,16,7.346896,12.0,2.0,1.609438,1.098612,5.864407,,
52449631,8,1110,7,3301,15766,1216,5,41.900002,0,0.0,...,2008,16,7.346896,12.0,2.0,1.791759,1.098612,2.0,1.0,1.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag2,SPClCh0_mean_sum
62643206,9,1110,7,3301,15766,1212,1,8.38,0,0.0,...,4,2008,16,7.346896,12.0,2.0,0.693147,1.098612,,
62643207,9,1110,7,3301,15766,1238,2,19.66,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.098612,1.098612,2.0,2.0


Count of missing numbers after SPClCh0_mean_sum in validation part 1 in column Lag2 is 5234302
Count of missing numbers after SPClCh0_mean_sum in validation part 2 in column Lag2 is 5046894
SPCl0_mean_sum is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,SPCl0_mean_sum
Semana,Producto_ID,Cliente_ID,Unnamed: 3_level_1
5,41,681747,2064.0
5,41,684023,30.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,Lag2,SPCl0_mean_sum
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,...,2008,16,7.346896,12.0,2.0,1.386294,1.098612,,,
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,...,2008,16,7.346896,12.0,2.0,1.609438,1.098612,,,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,Lag2,SPCl0_mean_sum
52449630,8,1110,7,3301,15766,1212,4,33.52,0,0.0,...,2008,16,7.346896,12.0,2.0,1.609438,1.098612,5.864407,,
52449631,8,1110,7,3301,15766,1216,5,41.900002,0,0.0,...,2008,16,7.346896,12.0,2.0,1.791759,1.098612,2.0,1.0,1.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag2,SPCl0_mean_sum
62643206,9,1110,7,3301,15766,1212,1,8.38,0,0.0,...,4,2008,16,7.346896,12.0,2.0,0.693147,1.098612,,
62643207,9,1110,7,3301,15766,1238,2,19.66,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.098612,1.098612,2.0,2.0


Count of missing numbers after SPCl0_mean_sum in validation part 1 in column Lag2 is 5233860
Count of missing numbers after SPCl0_mean_sum in validation part 2 in column Lag2 is 5046369
SPR0_mean_sum is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,SPR0_mean_sum
Semana,Producto_ID,Ruta_SAK,Unnamed: 3_level_1
5,41,3201,107.5
5,41,3303,30.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,Lag2,SPR0_mean_sum
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,...,2008,16,7.346896,12.0,2.0,1.386294,1.098612,,,
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,...,2008,16,7.346896,12.0,2.0,1.609438,1.098612,,,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,Lag2,SPR0_mean_sum
52449630,8,1110,7,3301,15766,1212,4,33.52,0,0.0,...,2008,16,7.346896,12.0,2.0,1.609438,1.098612,5.864407,,5.931034
52449631,8,1110,7,3301,15766,1216,5,41.900002,0,0.0,...,2008,16,7.346896,12.0,2.0,1.791759,1.098612,2.0,1.0,7.617021


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag2,SPR0_mean_sum
62643206,9,1110,7,3301,15766,1212,1,8.38,0,0.0,...,4,2008,16,7.346896,12.0,2.0,0.693147,1.098612,,5.864407
62643207,9,1110,7,3301,15766,1238,2,19.66,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.098612,1.098612,2.0,5.043478


Count of missing numbers after SPR0_mean_sum in validation part 1 in column Lag2 is 529516
Count of missing numbers after SPR0_mean_sum in validation part 2 in column Lag2 is 143041
SPA0_mean_sum is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,SPA0_mean_sum
Semana,Producto_ID,Agencia_ID,Unnamed: 3_level_1
5,41,2281,861.5
5,41,23879,107.5


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,Lag2,SPA0_mean_sum
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,...,2008,16,7.346896,12.0,2.0,1.386294,1.098612,,,
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,...,2008,16,7.346896,12.0,2.0,1.609438,1.098612,,,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,Lag2,SPA0_mean_sum
52449630,8,1110,7,3301,15766,1212,4,33.52,0,0.0,...,2008,16,7.346896,12.0,2.0,1.609438,1.098612,5.864407,5.931034,6.372671
52449631,8,1110,7,3301,15766,1216,5,41.900002,0,0.0,...,2008,16,7.346896,12.0,2.0,1.791759,1.098612,2.0,1.0,3.536842


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag2,SPA0_mean_sum
62643206,9,1110,7,3301,15766,1212,1,8.38,0,0.0,...,4,2008,16,7.346896,12.0,2.0,0.693147,1.098612,5.864407,6.936416
62643207,9,1110,7,3301,15766,1238,2,19.66,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.098612,1.098612,2.0,5.764706


Count of missing numbers after SPA0_mean_sum in validation part 1 in column Lag2 is 465669
Count of missing numbers after SPA0_mean_sum in validation part 2 in column Lag2 is 84398
SPCh0_mean_sum is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,SPCh0_mean_sum
Semana,Producto_ID,Canal_ID,Unnamed: 3_level_1
5,41,7,610.166667
5,53,4,487.333333


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,Lag2,SPCh0_mean_sum
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,...,2008,16,7.346896,12.0,2.0,1.386294,1.098612,,,
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,...,2008,16,7.346896,12.0,2.0,1.609438,1.098612,,,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,Lag2,SPCh0_mean_sum
52449630,8,1110,7,3301,15766,1212,4,33.52,0,0.0,...,2008,16,7.346896,12.0,2.0,1.609438,1.098612,5.864407,5.931034,5.85218
52449631,8,1110,7,3301,15766,1216,5,41.900002,0,0.0,...,2008,16,7.346896,12.0,2.0,1.791759,1.098612,2.0,1.0,6.03172


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag2,SPCh0_mean_sum
62643206,9,1110,7,3301,15766,1212,1,8.38,0,0.0,...,4,2008,16,7.346896,12.0,2.0,0.693147,1.098612,5.864407,5.85489
62643207,9,1110,7,3301,15766,1238,2,19.66,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.098612,1.098612,2.0,6.48703


Count of missing numbers after SPCh0_mean_sum in validation part 1 in column Lag2 is 180989
Count of missing numbers after SPCh0_mean_sum in validation part 2 in column Lag2 is 9290
SPT0_mean_sum is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,SPT0_mean_sum
Semana,Producto_ID,Town_ID,Unnamed: 3_level_1
5,41,2381,861.5
5,41,2387,107.5


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,Lag2,SPT0_mean_sum
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,...,2008,16,7.346896,12.0,2.0,1.386294,1.098612,,,
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,...,2008,16,7.346896,12.0,2.0,1.609438,1.098612,,,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,Lag2,SPT0_mean_sum
52449630,8,1110,7,3301,15766,1212,4,33.52,0,0.0,...,2008,16,7.346896,12.0,2.0,1.609438,1.098612,5.864407,5.931034,5.913687
52449631,8,1110,7,3301,15766,1216,5,41.900002,0,0.0,...,2008,16,7.346896,12.0,2.0,1.791759,1.098612,2.0,1.0,4.309259


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag2,SPT0_mean_sum
62643206,9,1110,7,3301,15766,1212,1,8.38,0,0.0,...,4,2008,16,7.346896,12.0,2.0,0.693147,1.098612,5.864407,6.338747
62643207,9,1110,7,3301,15766,1238,2,19.66,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.098612,1.098612,2.0,6.072967


Count of missing numbers after SPT0_mean_sum in validation part 1 in column Lag2 is 180472
Count of missing numbers after SPT0_mean_sum in validation part 2 in column Lag2 is 9149
SPSt0_mean_sum is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,SPSt0_mean_sum
Semana,Producto_ID,State_ID,Unnamed: 3_level_1
5,41,22,610.166667
5,53,20,653.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,Lag2,SPSt0_mean_sum
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,...,2008,16,7.346896,12.0,2.0,1.386294,1.098612,,,
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,...,2008,16,7.346896,12.0,2.0,1.609438,1.098612,,,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,Lag2,SPSt0_mean_sum
52449630,8,1110,7,3301,15766,1212,4,33.52,0,0.0,...,2008,16,7.346896,12.0,2.0,1.609438,1.098612,5.864407,5.931034,3.91231
52449631,8,1110,7,3301,15766,1216,5,41.900002,0,0.0,...,2008,16,7.346896,12.0,2.0,1.791759,1.098612,2.0,1.0,3.181035


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag2,SPSt0_mean_sum
62643206,9,1110,7,3301,15766,1212,1,8.38,0,0.0,...,4,2008,16,7.346896,12.0,2.0,0.693147,1.098612,5.864407,3.946143
62643207,9,1110,7,3301,15766,1238,2,19.66,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.098612,1.098612,2.0,4.932605


Count of missing numbers after SPSt0_mean_sum in validation part 1 in column Lag2 is 178071
Count of missing numbers after SPSt0_mean_sum in validation part 2 in column Lag2 is 8719
SP0_mean_sum is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,SP0_mean_sum
Semana,Producto_ID,Unnamed: 2_level_1
5,41,610.166667
5,53,487.333333


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,Lag2,SP0_mean_sum
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,...,2008,16,7.346896,12.0,2.0,1.386294,1.098612,,,
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,...,2008,16,7.346896,12.0,2.0,1.609438,1.098612,,,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,Lag2,SP0_mean_sum
52449630,8,1110,7,3301,15766,1212,4,33.52,0,0.0,...,2008,16,7.346896,12.0,2.0,1.609438,1.098612,5.864407,5.931034,2.903423
52449631,8,1110,7,3301,15766,1216,5,41.900002,0,0.0,...,2008,16,7.346896,12.0,2.0,1.791759,1.098612,2.0,1.0,3.128395


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag2,SP0_mean_sum
62643206,9,1110,7,3301,15766,1212,1,8.38,0,0.0,...,4,2008,16,7.346896,12.0,2.0,0.693147,1.098612,5.864407,2.882253
62643207,9,1110,7,3301,15766,1238,2,19.66,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.098612,1.098612,2.0,3.166558


Count of missing numbers after SP0_mean_sum in validation part 1 in column Lag2 is 171361
Count of missing numbers after SP0_mean_sum in validation part 2 in column Lag2 is 7677


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,Lag2
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.386294,1.098612,,
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.609438,1.098612,,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,Lag2
52449630,8,1110,7,3301,15766,1212,4,33.52,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.609438,1.098612,5.864407,5.931034
52449631,8,1110,7,3301,15766,1216,5,41.900002,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.791759,1.098612,2.0,1.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Prod_name_ID,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag2
62643206,9,1110,7,3301,15766,1212,1,8.38,0,0.0,...,709,4,2008,16,7.346896,12.0,2.0,0.693147,1.098612,5.864407
62643207,9,1110,7,3301,15766,1238,2,19.66,0,0.0,...,630,4,2008,16,7.346896,12.0,2.0,1.098612,1.098612,2.0


In [32]:
configLag0Target1DeleteColumnsFalse = ConfigElements(3,[ ("SPClRACh0_mean_sum",["Semana",
                                                              "Producto_ID",
                                                              "Cliente_ID",
                                                              "Ruta_SAK",
                                                              "Agencia_ID",
                                                              "Canal_ID"],
                                                          ["mean"]),
                                                        ("SPClRA0_mean_sum",["Semana",
                                                              "Producto_ID",
                                                              "Cliente_ID",
                                                              "Ruta_SAK",
                                                              "Agencia_ID"],
                                                          ["mean"]),
                                                        ("SPClRCh0_mean_sum",["Semana",
                                                              "Producto_ID",
                                                              "Cliente_ID",
                                                              "Ruta_SAK",
                                                              "Canal_ID"],
                                                          ["mean"]),
                                                        ("SPClACh0_mean_sum",["Semana",
                                                              "Producto_ID",
                                                              "Cliente_ID",
                                                              "Agencia_ID",
                                                              "Canal_ID"],
                                                          ["mean"]),
                                                        ("SPClR0_mean_sum",["Semana",
                                                              "Producto_ID",
                                                              "Cliente_ID",
                                                              "Ruta_SAK"],
                                                          ["mean"]),
                                                        ("SPClA0_mean_sum",["Semana",
                                                              "Producto_ID",
                                                              "Cliente_ID",
                                                              "Agencia_ID"],
                                                          ["mean"]),
                                                       ("SPClCh0_mean_sum",["Semana",
                                                              "Producto_ID",
                                                              "Cliente_ID",
                                                              "Canal_ID"],
                                                          ["mean"]),
                                                       ("SPCl0_mean_sum",["Semana",
                                                              "Producto_ID",
                                                              "Cliente_ID"],
                                                          ["mean"]),
                                                       ("SPR0_mean_sum",["Semana",
                                                              "Producto_ID",
                                                              "Ruta_SAK"],
                                                          ["mean"]),
                                                       ("SPA0_mean_sum",["Semana",
                                                              "Producto_ID",
                                                              "Agencia_ID"],
                                                          ["mean"]),
                                                       ("SPCh0_mean_sum",["Semana",
                                                              "Producto_ID",
                                                              "Canal_ID"],
                                                          ["mean"]),
                                                       ("SPT0_mean_sum",["Semana",
                                                              "Producto_ID",
                                                              "Town_ID"],
                                                          ["mean"]),
                                                       ("SPSt0_mean_sum",["Semana",
                                                              "Producto_ID",
                                                              "State_ID"],
                                                          ["mean"]),
                                                       ("SP0_mean_sum",["Semana",
                                                              "Producto_ID"],
                                                          ["mean"])
                                                       
                                                       
                                                       ], "Lag3", True)
print  configLag0Target1DeleteColumnsFalse.__dict__

{'nameAndGroups': [('SPClRACh0_mean_sum', ['Semana', 'Producto_ID', 'Cliente_ID', 'Ruta_SAK', 'Agencia_ID', 'Canal_ID'], ['mean']), ('SPClRA0_mean_sum', ['Semana', 'Producto_ID', 'Cliente_ID', 'Ruta_SAK', 'Agencia_ID'], ['mean']), ('SPClRCh0_mean_sum', ['Semana', 'Producto_ID', 'Cliente_ID', 'Ruta_SAK', 'Canal_ID'], ['mean']), ('SPClACh0_mean_sum', ['Semana', 'Producto_ID', 'Cliente_ID', 'Agencia_ID', 'Canal_ID'], ['mean']), ('SPClR0_mean_sum', ['Semana', 'Producto_ID', 'Cliente_ID', 'Ruta_SAK'], ['mean']), ('SPClA0_mean_sum', ['Semana', 'Producto_ID', 'Cliente_ID', 'Agencia_ID'], ['mean']), ('SPClCh0_mean_sum', ['Semana', 'Producto_ID', 'Cliente_ID', 'Canal_ID'], ['mean']), ('SPCl0_mean_sum', ['Semana', 'Producto_ID', 'Cliente_ID'], ['mean']), ('SPR0_mean_sum', ['Semana', 'Producto_ID', 'Ruta_SAK'], ['mean']), ('SPA0_mean_sum', ['Semana', 'Producto_ID', 'Agencia_ID'], ['mean']), ('SPCh0_mean_sum', ['Semana', 'Producto_ID', 'Canal_ID'], ['mean']), ('SPT0_mean_sum', ['Semana', 'Producto

In [33]:
FE.AddConfigurableFeaturesToTrain(configLag0Target1DeleteColumnsFalse)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


SPClRACh0_mean_sum is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,SPClRACh0_mean_sum
Semana,Producto_ID,Cliente_ID,Ruta_SAK,Agencia_ID,Canal_ID,Unnamed: 6_level_1
6,41,681747,3306,2281,7,2064
6,41,684023,3303,2281,7,30


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,Lag2,Lag3,SPClRACh0_mean_sum
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,...,16,7.346896,12.0,2.0,1.386294,1.098612,,,,
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,...,16,7.346896,12.0,2.0,1.609438,1.098612,,,,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,Lag2,Lag3,SPClRACh0_mean_sum
52449630,8,1110,7,3301,15766,1212,4,33.52,0,0.0,...,16,7.346896,12.0,2.0,1.609438,1.098612,5.864407,5.931034,,5.0
52449631,8,1110,7,3301,15766,1216,5,41.900002,0,0.0,...,16,7.346896,12.0,2.0,1.791759,1.098612,2.0,1.0,,3.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag2,Lag3,SPClRACh0_mean_sum
62643206,9,1110,7,3301,15766,1212,1,8.38,0,0.0,...,2008,16,7.346896,12.0,2.0,0.693147,1.098612,5.864407,,
62643207,9,1110,7,3301,15766,1238,2,19.66,0,0.0,...,2008,16,7.346896,12.0,2.0,1.098612,1.098612,2.0,,2.0


Count of missing numbers after SPClRACh0_mean_sum in validation part 1 in column Lag3 is 5357403
Count of missing numbers after SPClRACh0_mean_sum in validation part 2 in column Lag3 is 5424990
SPClRA0_mean_sum is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,SPClRA0_mean_sum
Semana,Producto_ID,Cliente_ID,Ruta_SAK,Agencia_ID,Unnamed: 5_level_1
6,41,681747,3306,2281,2064.0
6,41,684023,3303,2281,30.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,Lag2,Lag3,SPClRA0_mean_sum
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,...,16,7.346896,12.0,2.0,1.386294,1.098612,,,,
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,...,16,7.346896,12.0,2.0,1.609438,1.098612,,,,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,Lag2,Lag3,SPClRA0_mean_sum
52449630,8,1110,7,3301,15766,1212,4,33.52,0,0.0,...,16,7.346896,12.0,2.0,1.609438,1.098612,5.864407,5.931034,5.0,5.0
52449631,8,1110,7,3301,15766,1216,5,41.900002,0,0.0,...,16,7.346896,12.0,2.0,1.791759,1.098612,2.0,1.0,3.0,3.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag2,Lag3,SPClRA0_mean_sum
62643206,9,1110,7,3301,15766,1212,1,8.38,0,0.0,...,2008,16,7.346896,12.0,2.0,0.693147,1.098612,5.864407,,
62643207,9,1110,7,3301,15766,1238,2,19.66,0,0.0,...,2008,16,7.346896,12.0,2.0,1.098612,1.098612,2.0,2.0,2.0


Count of missing numbers after SPClRA0_mean_sum in validation part 1 in column Lag3 is 5357039
Count of missing numbers after SPClRA0_mean_sum in validation part 2 in column Lag3 is 5424511
SPClRCh0_mean_sum is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,SPClRCh0_mean_sum
Semana,Producto_ID,Cliente_ID,Ruta_SAK,Canal_ID,Unnamed: 5_level_1
6,41,681747,3306,7,2064.0
6,41,684023,3303,7,30.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,Lag2,Lag3,SPClRCh0_mean_sum
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,...,16,7.346896,12.0,2.0,1.386294,1.098612,,,,
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,...,16,7.346896,12.0,2.0,1.609438,1.098612,,,,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,Lag2,Lag3,SPClRCh0_mean_sum
52449630,8,1110,7,3301,15766,1212,4,33.52,0,0.0,...,16,7.346896,12.0,2.0,1.609438,1.098612,5.864407,5.931034,5.0,5.0
52449631,8,1110,7,3301,15766,1216,5,41.900002,0,0.0,...,16,7.346896,12.0,2.0,1.791759,1.098612,2.0,1.0,3.0,3.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag2,Lag3,SPClRCh0_mean_sum
62643206,9,1110,7,3301,15766,1212,1,8.38,0,0.0,...,2008,16,7.346896,12.0,2.0,0.693147,1.098612,5.864407,,
62643207,9,1110,7,3301,15766,1238,2,19.66,0,0.0,...,2008,16,7.346896,12.0,2.0,1.098612,1.098612,2.0,2.0,2.0


Count of missing numbers after SPClRCh0_mean_sum in validation part 1 in column Lag3 is 5355652
Count of missing numbers after SPClRCh0_mean_sum in validation part 2 in column Lag3 is 5423242
SPClACh0_mean_sum is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,SPClACh0_mean_sum
Semana,Producto_ID,Cliente_ID,Agencia_ID,Canal_ID,Unnamed: 5_level_1
6,41,681747,2281,7,2064.0
6,41,684023,2281,7,30.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,Lag2,Lag3,SPClACh0_mean_sum
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,...,16,7.346896,12.0,2.0,1.386294,1.098612,,,,
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,...,16,7.346896,12.0,2.0,1.609438,1.098612,,,,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,Lag2,Lag3,SPClACh0_mean_sum
52449630,8,1110,7,3301,15766,1212,4,33.52,0,0.0,...,16,7.346896,12.0,2.0,1.609438,1.098612,5.864407,5.931034,5.0,5.0
52449631,8,1110,7,3301,15766,1216,5,41.900002,0,0.0,...,16,7.346896,12.0,2.0,1.791759,1.098612,2.0,1.0,3.0,3.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag2,Lag3,SPClACh0_mean_sum
62643206,9,1110,7,3301,15766,1212,1,8.38,0,0.0,...,2008,16,7.346896,12.0,2.0,0.693147,1.098612,5.864407,,
62643207,9,1110,7,3301,15766,1238,2,19.66,0,0.0,...,2008,16,7.346896,12.0,2.0,1.098612,1.098612,2.0,2.0,2.0


Count of missing numbers after SPClACh0_mean_sum in validation part 1 in column Lag3 is 5275090
Count of missing numbers after SPClACh0_mean_sum in validation part 2 in column Lag3 is 5384400
SPClR0_mean_sum is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,SPClR0_mean_sum
Semana,Producto_ID,Cliente_ID,Ruta_SAK,Unnamed: 4_level_1
6,41,681747,3306,2064.0
6,41,684023,3303,30.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,Lag2,Lag3,SPClR0_mean_sum
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,...,16,7.346896,12.0,2.0,1.386294,1.098612,,,,
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,...,16,7.346896,12.0,2.0,1.609438,1.098612,,,,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,Lag2,Lag3,SPClR0_mean_sum
52449630,8,1110,7,3301,15766,1212,4,33.52,0,0.0,...,16,7.346896,12.0,2.0,1.609438,1.098612,5.864407,5.931034,5.0,5.0
52449631,8,1110,7,3301,15766,1216,5,41.900002,0,0.0,...,16,7.346896,12.0,2.0,1.791759,1.098612,2.0,1.0,3.0,3.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag2,Lag3,SPClR0_mean_sum
62643206,9,1110,7,3301,15766,1212,1,8.38,0,0.0,...,2008,16,7.346896,12.0,2.0,0.693147,1.098612,5.864407,,
62643207,9,1110,7,3301,15766,1238,2,19.66,0,0.0,...,2008,16,7.346896,12.0,2.0,1.098612,1.098612,2.0,2.0,2.0


Count of missing numbers after SPClR0_mean_sum in validation part 1 in column Lag3 is 5275090
Count of missing numbers after SPClR0_mean_sum in validation part 2 in column Lag3 is 5384400
SPClA0_mean_sum is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,SPClA0_mean_sum
Semana,Producto_ID,Cliente_ID,Agencia_ID,Unnamed: 4_level_1
6,41,681747,2281,2064.0
6,41,684023,2281,30.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,Lag2,Lag3,SPClA0_mean_sum
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,...,16,7.346896,12.0,2.0,1.386294,1.098612,,,,
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,...,16,7.346896,12.0,2.0,1.609438,1.098612,,,,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,Lag2,Lag3,SPClA0_mean_sum
52449630,8,1110,7,3301,15766,1212,4,33.52,0,0.0,...,16,7.346896,12.0,2.0,1.609438,1.098612,5.864407,5.931034,5.0,5.0
52449631,8,1110,7,3301,15766,1216,5,41.900002,0,0.0,...,16,7.346896,12.0,2.0,1.791759,1.098612,2.0,1.0,3.0,3.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag2,Lag3,SPClA0_mean_sum
62643206,9,1110,7,3301,15766,1212,1,8.38,0,0.0,...,2008,16,7.346896,12.0,2.0,0.693147,1.098612,5.864407,,
62643207,9,1110,7,3301,15766,1238,2,19.66,0,0.0,...,2008,16,7.346896,12.0,2.0,1.098612,1.098612,2.0,2.0,2.0


Count of missing numbers after SPClA0_mean_sum in validation part 1 in column Lag3 is 5274637
Count of missing numbers after SPClA0_mean_sum in validation part 2 in column Lag3 is 5381000
SPClCh0_mean_sum is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,SPClCh0_mean_sum
Semana,Producto_ID,Cliente_ID,Canal_ID,Unnamed: 4_level_1
6,41,681747,7,2064.0
6,41,684023,7,30.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,Lag2,Lag3,SPClCh0_mean_sum
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,...,16,7.346896,12.0,2.0,1.386294,1.098612,,,,
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,...,16,7.346896,12.0,2.0,1.609438,1.098612,,,,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,Lag2,Lag3,SPClCh0_mean_sum
52449630,8,1110,7,3301,15766,1212,4,33.52,0,0.0,...,16,7.346896,12.0,2.0,1.609438,1.098612,5.864407,5.931034,5.0,5.0
52449631,8,1110,7,3301,15766,1216,5,41.900002,0,0.0,...,16,7.346896,12.0,2.0,1.791759,1.098612,2.0,1.0,3.0,3.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag2,Lag3,SPClCh0_mean_sum
62643206,9,1110,7,3301,15766,1212,1,8.38,0,0.0,...,2008,16,7.346896,12.0,2.0,0.693147,1.098612,5.864407,,
62643207,9,1110,7,3301,15766,1238,2,19.66,0,0.0,...,2008,16,7.346896,12.0,2.0,1.098612,1.098612,2.0,2.0,2.0


Count of missing numbers after SPClCh0_mean_sum in validation part 1 in column Lag3 is 5273666
Count of missing numbers after SPClCh0_mean_sum in validation part 2 in column Lag3 is 5379945
SPCl0_mean_sum is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,SPCl0_mean_sum
Semana,Producto_ID,Cliente_ID,Unnamed: 3_level_1
6,41,681747,2064.0
6,41,684023,30.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,Lag2,Lag3,SPCl0_mean_sum
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,...,16,7.346896,12.0,2.0,1.386294,1.098612,,,,
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,...,16,7.346896,12.0,2.0,1.609438,1.098612,,,,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,Lag2,Lag3,SPCl0_mean_sum
52449630,8,1110,7,3301,15766,1212,4,33.52,0,0.0,...,16,7.346896,12.0,2.0,1.609438,1.098612,5.864407,5.931034,5.0,5.0
52449631,8,1110,7,3301,15766,1216,5,41.900002,0,0.0,...,16,7.346896,12.0,2.0,1.791759,1.098612,2.0,1.0,3.0,3.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag2,Lag3,SPCl0_mean_sum
62643206,9,1110,7,3301,15766,1212,1,8.38,0,0.0,...,2008,16,7.346896,12.0,2.0,0.693147,1.098612,5.864407,,
62643207,9,1110,7,3301,15766,1238,2,19.66,0,0.0,...,2008,16,7.346896,12.0,2.0,1.098612,1.098612,2.0,2.0,2.0


Count of missing numbers after SPCl0_mean_sum in validation part 1 in column Lag3 is 5273141
Count of missing numbers after SPCl0_mean_sum in validation part 2 in column Lag3 is 5379306
SPR0_mean_sum is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,SPR0_mean_sum
Semana,Producto_ID,Ruta_SAK,Unnamed: 3_level_1
6,41,3201,107.5
6,41,3303,30.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,Lag2,Lag3,SPR0_mean_sum
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,...,16,7.346896,12.0,2.0,1.386294,1.098612,,,,
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,...,16,7.346896,12.0,2.0,1.609438,1.098612,,,,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,Lag2,Lag3,SPR0_mean_sum
52449630,8,1110,7,3301,15766,1212,4,33.52,0,0.0,...,16,7.346896,12.0,2.0,1.609438,1.098612,5.864407,5.931034,5.0,9.222222
52449631,8,1110,7,3301,15766,1216,5,41.900002,0,0.0,...,16,7.346896,12.0,2.0,1.791759,1.098612,2.0,1.0,3.0,9.818182


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag2,Lag3,SPR0_mean_sum
62643206,9,1110,7,3301,15766,1212,1,8.38,0,0.0,...,2008,16,7.346896,12.0,2.0,0.693147,1.098612,5.864407,,5.931034
62643207,9,1110,7,3301,15766,1238,2,19.66,0,0.0,...,2008,16,7.346896,12.0,2.0,1.098612,1.098612,2.0,2.0,4.515625


Count of missing numbers after SPR0_mean_sum in validation part 1 in column Lag3 is 683120
Count of missing numbers after SPR0_mean_sum in validation part 2 in column Lag3 is 662886
SPA0_mean_sum is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,SPA0_mean_sum
Semana,Producto_ID,Agencia_ID,Unnamed: 3_level_1
6,41,2281,861.5
6,41,23879,107.5


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,Lag2,Lag3,SPA0_mean_sum
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,...,16,7.346896,12.0,2.0,1.386294,1.098612,,,,
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,...,16,7.346896,12.0,2.0,1.609438,1.098612,,,,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,Lag2,Lag3,SPA0_mean_sum
52449630,8,1110,7,3301,15766,1212,4,33.52,0,0.0,...,16,7.346896,12.0,2.0,1.609438,1.098612,5.864407,5.931034,5.0,7.830409
52449631,8,1110,7,3301,15766,1216,5,41.900002,0,0.0,...,16,7.346896,12.0,2.0,1.791759,1.098612,2.0,1.0,3.0,4.244898


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag2,Lag3,SPA0_mean_sum
62643206,9,1110,7,3301,15766,1212,1,8.38,0,0.0,...,2008,16,7.346896,12.0,2.0,0.693147,1.098612,5.864407,5.931034,6.372671
62643207,9,1110,7,3301,15766,1238,2,19.66,0,0.0,...,2008,16,7.346896,12.0,2.0,1.098612,1.098612,2.0,2.0,5.675


Count of missing numbers after SPA0_mean_sum in validation part 1 in column Lag3 is 632345
Count of missing numbers after SPA0_mean_sum in validation part 2 in column Lag3 is 584639
SPCh0_mean_sum is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,SPCh0_mean_sum
Semana,Producto_ID,Canal_ID,Unnamed: 3_level_1
6,41,7,610.166667
6,53,4,487.333333


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,Lag2,Lag3,SPCh0_mean_sum
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,...,16,7.346896,12.0,2.0,1.386294,1.098612,,,,
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,...,16,7.346896,12.0,2.0,1.609438,1.098612,,,,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,Lag2,Lag3,SPCh0_mean_sum
52449630,8,1110,7,3301,15766,1212,4,33.52,0,0.0,...,16,7.346896,12.0,2.0,1.609438,1.098612,5.864407,5.931034,5.0,6.421352
52449631,8,1110,7,3301,15766,1216,5,41.900002,0,0.0,...,16,7.346896,12.0,2.0,1.791759,1.098612,2.0,1.0,3.0,6.253252


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag2,Lag3,SPCh0_mean_sum
62643206,9,1110,7,3301,15766,1212,1,8.38,0,0.0,...,2008,16,7.346896,12.0,2.0,0.693147,1.098612,5.864407,5.931034,5.85218
62643207,9,1110,7,3301,15766,1238,2,19.66,0,0.0,...,2008,16,7.346896,12.0,2.0,1.098612,1.098612,2.0,2.0,6.622421


Count of missing numbers after SPCh0_mean_sum in validation part 1 in column Lag3 is 517716
Count of missing numbers after SPCh0_mean_sum in validation part 2 in column Lag3 is 237153
SPT0_mean_sum is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,SPT0_mean_sum
Semana,Producto_ID,Town_ID,Unnamed: 3_level_1
6,41,2381,861.5
6,41,2387,107.5


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,Lag2,Lag3,SPT0_mean_sum
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,...,16,7.346896,12.0,2.0,1.386294,1.098612,,,,
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,...,16,7.346896,12.0,2.0,1.609438,1.098612,,,,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,Lag2,Lag3,SPT0_mean_sum
52449630,8,1110,7,3301,15766,1212,4,33.52,0,0.0,...,16,7.346896,12.0,2.0,1.609438,1.098612,5.864407,5.931034,5.0,7.021403
52449631,8,1110,7,3301,15766,1216,5,41.900002,0,0.0,...,16,7.346896,12.0,2.0,1.791759,1.098612,2.0,1.0,3.0,5.455696


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag2,Lag3,SPT0_mean_sum
62643206,9,1110,7,3301,15766,1212,1,8.38,0,0.0,...,2008,16,7.346896,12.0,2.0,0.693147,1.098612,5.864407,5.931034,5.913687
62643207,9,1110,7,3301,15766,1238,2,19.66,0,0.0,...,2008,16,7.346896,12.0,2.0,1.098612,1.098612,2.0,2.0,6.23641


Count of missing numbers after SPT0_mean_sum in validation part 1 in column Lag3 is 517603
Count of missing numbers after SPT0_mean_sum in validation part 2 in column Lag3 is 236507
SPSt0_mean_sum is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,SPSt0_mean_sum
Semana,Producto_ID,State_ID,Unnamed: 3_level_1
6,41,22,610.166667
6,53,20,653.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,Lag2,Lag3,SPSt0_mean_sum
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,...,16,7.346896,12.0,2.0,1.386294,1.098612,,,,
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,...,16,7.346896,12.0,2.0,1.609438,1.098612,,,,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,Lag2,Lag3,SPSt0_mean_sum
52449630,8,1110,7,3301,15766,1212,4,33.52,0,0.0,...,16,7.346896,12.0,2.0,1.609438,1.098612,5.864407,5.931034,5.0,4.23833
52449631,8,1110,7,3301,15766,1216,5,41.900002,0,0.0,...,16,7.346896,12.0,2.0,1.791759,1.098612,2.0,1.0,3.0,3.484234


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag2,Lag3,SPSt0_mean_sum
62643206,9,1110,7,3301,15766,1212,1,8.38,0,0.0,...,2008,16,7.346896,12.0,2.0,0.693147,1.098612,5.864407,5.931034,3.91231
62643207,9,1110,7,3301,15766,1238,2,19.66,0,0.0,...,2008,16,7.346896,12.0,2.0,1.098612,1.098612,2.0,2.0,5.023154


Count of missing numbers after SPSt0_mean_sum in validation part 1 in column Lag3 is 516838
Count of missing numbers after SPSt0_mean_sum in validation part 2 in column Lag3 is 232506
SP0_mean_sum is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,SP0_mean_sum
Semana,Producto_ID,Unnamed: 2_level_1
6,41,610.166667
6,53,487.333333


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,Lag2,Lag3,SP0_mean_sum
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,...,16,7.346896,12.0,2.0,1.386294,1.098612,,,,
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,...,16,7.346896,12.0,2.0,1.609438,1.098612,,,,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,Lag2,Lag3,SP0_mean_sum
52449630,8,1110,7,3301,15766,1212,4,33.52,0,0.0,...,16,7.346896,12.0,2.0,1.609438,1.098612,5.864407,5.931034,5.0,2.936457
52449631,8,1110,7,3301,15766,1216,5,41.900002,0,0.0,...,16,7.346896,12.0,2.0,1.791759,1.098612,2.0,1.0,3.0,3.250157


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag2,Lag3,SP0_mean_sum
62643206,9,1110,7,3301,15766,1212,1,8.38,0,0.0,...,2008,16,7.346896,12.0,2.0,0.693147,1.098612,5.864407,5.931034,2.903423
62643207,9,1110,7,3301,15766,1238,2,19.66,0,0.0,...,2008,16,7.346896,12.0,2.0,1.098612,1.098612,2.0,2.0,3.217699


Count of missing numbers after SP0_mean_sum in validation part 1 in column Lag3 is 516557
Count of missing numbers after SP0_mean_sum in validation part 2 in column Lag3 is 220435


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,Lag2,Lag3
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,...,2008,16,7.346896,12.0,2.0,1.386294,1.098612,,,
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,...,2008,16,7.346896,12.0,2.0,1.609438,1.098612,,,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag1,Lag2,Lag3
52449630,8,1110,7,3301,15766,1212,4,33.52,0,0.0,...,2008,16,7.346896,12.0,2.0,1.609438,1.098612,5.864407,5.931034,5.0
52449631,8,1110,7,3301,15766,1216,5,41.900002,0,0.0,...,2008,16,7.346896,12.0,2.0,1.791759,1.098612,2.0,1.0,3.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,...,Brand_ID,Town_ID,State_ID,DemandaGeneralMean,Lag0,Lag0Averaged,Demanda_uni_equilLogged,Lag0AveragedLogged,Lag2,Lag3
62643206,9,1110,7,3301,15766,1212,1,8.38,0,0.0,...,4,2008,16,7.346896,12.0,2.0,0.693147,1.098612,5.864407,5.931034
62643207,9,1110,7,3301,15766,1238,2,19.66,0,0.0,...,4,2008,16,7.346896,12.0,2.0,1.098612,1.098612,2.0,2.0


In [34]:
FE.SaveDataFrameToHdf('both')

In [18]:
FE.train[["Demanda_uni_equil","Lag0"]].head(2)

Unnamed: 0,Demanda_uni_equil,Lag0
0,3,2.0
1,4,2.0


In [19]:
FE.train.loc[:,"Demanda_uni_equil"] = np.log1p (FE.train["Demanda_uni_equil"].values)
FE.train.loc[:,"Lag0"] = np.log1p (FE.train["Lag0"].values)

In [24]:
FE.train[["Demanda_uni_equil","Lag0"]].head(2)

Unnamed: 0,Demanda_uni_equil,Lag0
0,1.386294,1.098612
1,1.609438,1.098612


In [23]:
print ('RMSLE Score:', rmse(FE.train.Demanda_uni_equil, FE.train.Lag0))

('RMSLE Score:', 0.58242946466416612)


## Add Demanda General Mean..

In [26]:
FE.train["Lag0"].isnull().sum()

0