In [1]:
import numpy as np  # linear algebra
import pandas as pd  # data processing, CSV file I/O (e.g. pd.read_csv)
from sklearn import datasets, linear_model
from datetime import datetime
import gc
%matplotlib inline
from IPython.display import display, HTML
from pprint import pprint
import time

In [2]:
#take 1 CSV, then split it to 3..
class FeatureEngineering:

    def __init__(self, ValidationStart, ValidationEnd, trainHdfPath, trainHdfFile, testHdfPath1, testHdfPath2, testHdfFile, 
                 testTypes, trainTypes, trainCsvPath, testCsvPath, maxLag=0):
        self.ValidationStart = ValidationStart
        self.ValidationEnd = ValidationEnd
        self.maxLag = maxLag
        self.trainHdfPath = trainHdfPath
        self.trainHdfFile = trainHdfFile
        self.testHdfPath1 = testHdfPath1
        self.testHdfPath2 = testHdfPath2
        self.testHdfFile = testHdfFile
        self.testTypes = testTypes
        self.trainTypes = trainTypes
        self.trainCsvPath = trainCsvPath
        self.testCsvPath = testCsvPath
        
    @staticmethod
    def __printDataFrameBasics__(data):
        display(data.head(2))
        #print data.dtypes
        gc.collect()
        print(data.info(memory_usage=True))
        
    @staticmethod    
    def changeIndexTypeToLowerMemory(data):
        ##########
        #This is very critical, i accept max number is 2^32. Also, if don't do that, memory gets so much higher..
        ##########
        data.reset_index(inplace=True)
        data.drop("index",axis=1, inplace=True)
        #data.index = data.index.astype('uint32')
        gc.collect()
        
    def ReadHdf(self, trainOrTestOrBoth):
        '''Reads and holds Df in object memory'''            
        if trainOrTestOrBoth == 'train' or trainOrTestOrBoth=='both':
            self.train = pd.read_hdf(self.trainHdfPath,self.trainHdfFile)
            FeatureEngineering.changeIndexTypeToLowerMemory(self.train)
            FeatureEngineering.__printDataFrameBasics__(self.train)
            
        if trainOrTestOrBoth == 'test' or trainOrTestOrBoth=='both':
            self.test1 = pd.read_hdf(self.testHdfPath1,self.testHdfFile)
            self.test2 = pd.read_hdf(self.testHdfPath2,self.testHdfFile)
            FeatureEngineering.changeIndexTypeToLowerMemory(self.test1)
            FeatureEngineering.changeIndexTypeToLowerMemory(self.test2)
            FeatureEngineering.__printDataFrameBasics__(self.test1)
            FeatureEngineering.__printDataFrameBasics__(self.test2)
        
    def ReadCsv(self, trainOrTestOrBoth):
        '''Reads and holds Df in memory'''
        if trainOrTestOrBoth == 'train' or trainOrTestOrBoth == 'both':
            self.train = pd.read_csv(self.trainCsvPath, usecols=self.trainTypes.keys(), dtype=self.trainTypes)
            FeatureEngineering.changeIndexTypeToLowerMemory(self.train)
            FeatureEngineering.__printDataFrameBasics__(self.train)
        if trainOrTestOrBoth == 'test' or trainOrTestOrBoth=='both':
            tempTest = pd.read_csv(self.testCsvPath, usecols=self.testTypes.keys(), dtype=self.testTypes)
            self.test1 = tempTest.loc[tempTest.Semana.values == self.ValidationStart]
            self.test2 = tempTest.loc[tempTest.Semana.values == self.ValidationEnd]
            del tempTest
            FeatureEngineering.changeIndexTypeToLowerMemory(self.test1)
            FeatureEngineering.changeIndexTypeToLowerMemory(self.test2)
            FeatureEngineering.__printDataFrameBasics__(self.test1)
            FeatureEngineering.__printDataFrameBasics__(self.test2)
            
    @staticmethod
    def ConvertCsvToHdf(csvPath, HdfPath, HdfName, ColumnTypeDict ):
        tempDf = pd.read_csv(csvPath, usecols=ColumnTypeDict.keys(), dtype=ColumnTypeDict,index=False)
        tempDf.to_hdf(HdfPath, HdfName, format='t')
        del tempDf
        gc.collect()
        print "ConvertCsvToHdf is done.."

    def Preprocess(self, trainOrTestOrBoth, columnFunctionTypeList):
        '''columnFunctionTypeList = [ ['C1',Func1,Type], ['C2',Func2,Type],..    ]'''
        for column, func, localType in columnFunctionTypeList:
            if trainOrTestOrBoth == 'train' or trainOrTestOrBoth =='both':
                self.train.loc[:,column] =  np.apply_along_axis(func,0,FE.train[column].values).astype(localType)
                #np.apply_along_axis(lambda x: x+1,0,FE.train["Semana"]).astype("int32")
            if trainOrTestOrBoth == 'test' or trainOrTestOrBoth == 'both':
                self.test1.loc[:,column] =  np.apply_along_axis(func,0,FE.test1[column].values).astype(localType)
                self.test2.loc[:,column] =  np.apply_along_axis(func,0,FE.test2[column].values).astype(localType)
        gc.collect()
        
    def SaveDataFrameToHdf(self,trainOrTestOrBoth):
        if trainOrTestOrBoth == 'train' or trainOrTestOrBoth=='both':
            self.train.to_hdf(self.trainHdfPath, self.trainHdfFile, format='t', index="False")
        if trainOrTestOrBoth == 'test' or trainOrTestOrBoth=='both':
            self.test1.to_hdf(self.testHdfPath1, self.testHdfFile, format='t', index="False")
            self.test2.to_hdf(self.testHdfPath2, self.testHdfFile, format='t', index="False")
        
    def AddDemandaGeneralMean(self,trainOrTestOrBoth): 
        #self.train.loc[:,"DemandaGeneralMean"] = self.train["Demanda_uni_equil"].loc[
         #   self.train.loc[:,'Semana'] < 10].mean().astype("float32")
            
        meanOfDemanda = self.train["Demanda_uni_equil"].values.mean().astype("float32")
        
        if trainOrTestOrBoth == 'train' or trainOrTestOrBoth=='both':
            self.train.loc[:,"DemandaGeneralMean"] = meanOfDemanda
        if trainOrTestOrBoth == 'test' or trainOrTestOrBoth=='both':
            self.test1.loc[:,"DemandaGeneralMean"] = meanOfDemanda
            self.test2.loc[:,"DemandaGeneralMean"] = meanOfDemanda
        
        #self.train.loc[:,"DemandaGeneralMean"] = self.train["Demanda_uni_equil"].values[
        #(self.train.loc[:,'Semana'].values < self.ValidationStart).values].mean().astype("float32")
        gc.collect()
        
    '''ConfigElements(0,[ ("A",["Semana","Agencia_ID"],["count","count"]),'''
    def AddConfigurableFeaturesToTrain(self, config):
        if config.lag > self.maxLag:
            self.maxLag = config.lag
        
        tempData = self.train[self.train['Semana'].values <= (self.ValidationEnd - config.lag)]
        #display(tempData)
        if(config.lag != 0):
            tempData.loc[:,'Semana'] = tempData['Semana'].values + config.lag
        #display(tempData)
        
        #Means iterative.. eliminate as long as np.nan exists..If there is already one, don't create, use the existing
        if config.targetVariable != "" and  config.targetVariable not in self.train.columns:
            self.train.loc[:,config.targetVariable] = np.nan
            self.test1.loc[:,config.targetVariable] = np.nan
            
            if config.lag != 1:
                self.test2.loc[:,config.targetVariable] = np.nan
        
        for name,groups,aggregate in config.nameAndGroups:
            if name not in self.train.columns:
                print "{} is not in columns..".format(name)            
                
                groupedDataframe = tempData[groups+['Demanda_uni_equil']].copy().groupby(groups).agg(aggregate[0])
                gc.collect()
                #groupedDataframe.columns = groupedDataframe.columns.droplevel(0)
                groupedDataframe.columns = [name]
                
                #This is means of the counts of the semana-columns tuples!..!!!
                #If no lag and mean, mean of the columns without semana!!..
                #If there is lag and count, count of the columns x weeks before
                #If there is lag and mean, mean of the columns x weeks before
                #if(config.lag == 0 and aggregate == "count"):
                if(len(aggregate)>1):
                    groupedDataframe.reset_index(inplace=True)
                    groupedDataframe.drop("Semana",axis=1, inplace=True)
                    groups = groups[1:]
                    groupedDataframe = groupedDataframe.groupby(groups).agg(aggregate[1])
                    groupedDataframe.columns = [name]
                    gc.collect()
                
                display(groupedDataframe)
                self.train = self.train.merge( groupedDataframe, left_on=groups,
                    right_index=True, how='left', sort=False,copy=False)
                gc.collect()
                self.test1 = self.test1.merge( groupedDataframe, left_on=groups,
                    right_index=True, how='left', sort=False,copy=False)
                gc.collect()
                if config.lag != 1:
                    self.test2 = self.test2.merge( groupedDataframe, left_on=groups,
                        right_index=True, how='left', sort=False,copy=False)
                
                del groupedDataframe
                gc.collect()
            else:
                print "{} is in columns..".format(name)
            
            display(self.train)
            display(self.test1)
            display(self.test2)
            
            #Means iterative..!!!!!
            if config.targetVariable != "":
                self.train.loc[pd.isnull(self.train[config.targetVariable].values), 
                    config.targetVariable] = self.train.loc[pd.isnull(self.train[config.targetVariable].values)
                    , name].values
                self.test1.loc[pd.isnull(self.test1[config.targetVariable].values), 
                    config.targetVariable] = self.test1.loc[pd.isnull(self.test1[config.targetVariable].values),
                    name].values
                if config.lag != 1:
                    self.test2.loc[pd.isnull(self.test2[config.targetVariable].values), 
                        config.targetVariable] = self.test2.loc[pd.isnull(self.test2[config.targetVariable].values)
                        , name].values
                    
                count = self.test1[config.targetVariable].isnull().sum()
                print "Count of missing numbers after {} in validation part 1 in column {} is {}".format(name, 
                    config.targetVariable,str(count))
                if config.lag != 1:
                    count = self.test2.loc[:,config.targetVariable].isnull().sum()
                    print "Count of missing numbers after {} in validation part 2 in column {} is {}".format(name, 
                        config.targetVariable,str(count))
                
                
                #display(self.train)
                #If column is already in Dataframe and we want to fill target variable, this deletes columns!!!
                if(config.deleteColumns):
                    self.train.drop(name, axis=1, inplace=True)
                    self.test1.drop(name, axis=1, inplace=True)
                    if config.lag != 1:
                        self.test2.drop(name, axis=1, inplace=True)
                gc.collect()
                #Only in tesst
                #if count == 0:
                 #   break
        del tempData
        display(self.train)   
        display(self.test1)   
        display(self.test2)
        gc.collect()
        return 
    
    def DeleteLaggedWeeksFromTrain(self):
        self.train = self.train[self.train['Semana'].values >= (3 + self.maxLag)]
        gc.collect()
        display(self.train.head(2))
        
    def ReadFirstNRowsOfACsv(self, nrows, trainOrTestOrBoth) :
        if trainOrTestOrBoth == 'train' or trainOrTestOrBoth=='both':
            self.train = pd.read_csv(self.trainCsvPath, usecols=self.trainTypes.keys(), dtype=self.trainTypes, nrows = nrows)
            FeatureEngineering.changeIndexTypeToLowerMemory(self.train)
            FeatureEngineering.__printDataFrameBasics__(self.train)
        if trainOrTestOrBoth == 'test' or trainOrTestOrBoth=='both':
            tempTest = pd.read_csv(self.testCsvPath, usecols=self.testTypes.keys(), dtype=self.testTypes, nrows = nrows*2)
            self.test1 = tempTest.loc[tempTest.Semana == self.ValidationStart]
            self.test2 = tempTest.loc[tempTest.Semana == self.ValidationEnd]
            del tempTest
            FeatureEngineering.changeIndexTypeToLowerMemory(self.test1)
            FeatureEngineering.changeIndexTypeToLowerMemory(self.test2)
            FeatureEngineering.__printDataFrameBasics__(self.test1)
            FeatureEngineering.__printDataFrameBasics__(self.test2)
    
    #Use when concatanating train and validation before predict test for example..
    def AppendTestToTrain(self,deleteTest = True):
        self.train = self.train.append(self.test1,ignore_index=True)
        gc.collect()
        if(deleteTest):
            del self.test1
            gc.collect()
        try:
            self.train = self.train.append(self.test2,ignore_index=True)
            gc.collect()
            if(deleteTest):
                del self.test2
                gc.collect()
        except:
            pass
        #BAD PERFORMANCE!!
    #Split train data to train and test1 and test2 (validation)
    #def SplitTrainToTestUsingValidationStart(self):
     #   boolCondition = self.train.Semana == self.ValidationStart
      #  self.test1 = self.train.loc[boolCondition]
       # self.train.drop((self.train.loc[boolCondition].index), axis=0,inplace=True)
        
       # boolCondition = self.train.Semana == self.ValidationEnd
       # self.test2 = self.train.loc[boolCondition]
       # self.train.drop((self.train.loc[boolCondition].index), axis=0,inplace=True)
      #  del boolCondition
      #  gc.collect()
    
    #Reaches 3x memory from train, because of test1, test2 and train itself at the end.. GC fixed in the end..
    def SplitTrainToTestUsingValidationStart(self):
        boolCondition = self.train.Semana.values == self.ValidationStart
        self.test1 = self.train[boolCondition]
        boolCondition = self.train.Semana.values == self.ValidationEnd
        self.test2 = self.train[boolCondition]
        FE.train = FE.train[ FE.train.Semana.values < FE.ValidationStart ]
        del boolCondition
        gc.collect()

In [3]:
parameterDict =       {"ValidationStart":10, 
 "ValidationEnd":11,
   "maxLag":2,
    "trainHdfPath":'../../input/train_100.h5',
    "trainHdfFile":"train",
    "testHdfPath1":"../../input/test1_100.h5",
    "testHdfPath2":"../../input/test2_100.h5",
    "testHdfFile":"test", 
    "trainTypes" : {'Semana':np.uint8, 'Agencia_ID':np.uint16, 'Canal_ID':np.uint8,'Ruta_SAK':np.uint16, 
        'Cliente_ID':np.uint32, 'Producto_ID':np.uint16,'Venta_uni_hoy':np.uint16, 'Venta_hoy':np.float32,
                    'Dev_uni_proxima': np.uint32, 'Dev_proxima':np.float32,'Demanda_uni_equil':np.uint32}, 
    "testTypes" : {'id':np.uint32,'Semana':np.uint8, 'Agencia_ID':np.uint16, 'Canal_ID':np.uint8,'Ruta_SAK':np.uint16,
        'Cliente_ID':np.uint32, 'Producto_ID':np.uint16},
    "trainCsvPath":'../../input/train_100.csv'   ,
    "testCsvPath":'../../input/test_100.csv'}

FE = FeatureEngineering(**parameterDict)
print FE.__dict__

{'trainCsvPath': '../../input/train_100.csv', 'maxLag': 2, 'testTypes': {'Cliente_ID': <type 'numpy.uint32'>, 'Ruta_SAK': <type 'numpy.uint16'>, 'Canal_ID': <type 'numpy.uint8'>, 'Producto_ID': <type 'numpy.uint16'>, 'Agencia_ID': <type 'numpy.uint16'>, 'Semana': <type 'numpy.uint8'>, 'id': <type 'numpy.uint32'>}, 'testHdfFile': 'test', 'trainTypes': {'Dev_proxima': <type 'numpy.float32'>, 'Venta_uni_hoy': <type 'numpy.uint16'>, 'Cliente_ID': <type 'numpy.uint32'>, 'Demanda_uni_equil': <type 'numpy.uint32'>, 'Ruta_SAK': <type 'numpy.uint16'>, 'Canal_ID': <type 'numpy.uint8'>, 'Venta_hoy': <type 'numpy.float32'>, 'Producto_ID': <type 'numpy.uint16'>, 'Agencia_ID': <type 'numpy.uint16'>, 'Dev_uni_proxima': <type 'numpy.uint32'>, 'Semana': <type 'numpy.uint8'>}, 'testHdfPath1': '../../input/test1_100.h5', 'ValidationEnd': 11, 'testHdfPath2': '../../input/test2_100.h5', 'testCsvPath': '../../input/test_100.csv', 'ValidationStart': 10, 'trainHdfFile': 'train', 'trainHdfPath': '../../input/t

In [8]:
FE.ReadCsv('test')

Unnamed: 0,id,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID
0,142871,10,1550,1,1212,968466,36287
1,260706,10,23879,1,1223,1134138,6469


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 48 entries, 0 to 47
Data columns (total 7 columns):
id             48 non-null uint32
Semana         48 non-null uint8
Agencia_ID     48 non-null uint16
Canal_ID       48 non-null uint8
Ruta_SAK       48 non-null uint16
Cliente_ID     48 non-null uint32
Producto_ID    48 non-null uint16
dtypes: uint16(3), uint32(2), uint8(2)
memory usage: 840.0 bytes
None


Unnamed: 0,id,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID
0,36353,11,1441,1,1034,1710154,34892
1,79676,11,1219,1,1001,324254,46772


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 52 entries, 0 to 51
Data columns (total 7 columns):
id             52 non-null uint32
Semana         52 non-null uint8
Agencia_ID     52 non-null uint16
Canal_ID       52 non-null uint8
Ruta_SAK       52 non-null uint16
Cliente_ID     52 non-null uint32
Producto_ID    52 non-null uint16
dtypes: uint16(3), uint32(2), uint8(2)
memory usage: 904.0 bytes
None


## Small File..Read train from csv, split, preprocess, save to hdf and read from hdf again..

In [10]:
FE.ReadCsv('both')
#FE.SplitTrainToTestUsingValidationStart()
FE.Preprocess('train', [["Demanda_uni_equil",np.log1p,'float32']])
FE.SaveDataFrameToHdf('both')

Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil
0,3,1111,1,4402,50789,32846,2,10.92,0,0.0,2
1,3,1112,1,2103,1209569,36748,4,46.119999,0,0.0,4


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 28 entries, 0 to 27
Data columns (total 11 columns):
Semana               28 non-null uint8
Agencia_ID           28 non-null uint16
Canal_ID             28 non-null uint8
Ruta_SAK             28 non-null uint16
Cliente_ID           28 non-null uint32
Producto_ID          28 non-null uint16
Venta_uni_hoy        28 non-null uint16
Venta_hoy            28 non-null float32
Dev_uni_proxima      28 non-null uint32
Dev_proxima          28 non-null float32
Demanda_uni_equil    28 non-null uint32
dtypes: float32(2), uint16(4), uint32(3), uint8(2)
memory usage: 912.0 bytes
None


Unnamed: 0,id,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID
0,142871,10,1550,1,1212,968466,36287
1,260706,10,23879,1,1223,1134138,6469


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 48 entries, 0 to 47
Data columns (total 7 columns):
id             48 non-null uint32
Semana         48 non-null uint8
Agencia_ID     48 non-null uint16
Canal_ID       48 non-null uint8
Ruta_SAK       48 non-null uint16
Cliente_ID     48 non-null uint32
Producto_ID    48 non-null uint16
dtypes: uint16(3), uint32(2), uint8(2)
memory usage: 840.0 bytes
None


Unnamed: 0,id,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID
0,36353,11,1441,1,1034,1710154,34892
1,79676,11,1219,1,1001,324254,46772


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 52 entries, 0 to 51
Data columns (total 7 columns):
id             52 non-null uint32
Semana         52 non-null uint8
Agencia_ID     52 non-null uint16
Canal_ID       52 non-null uint8
Ruta_SAK       52 non-null uint16
Cliente_ID     52 non-null uint32
Producto_ID    52 non-null uint16
dtypes: uint16(3), uint32(2), uint8(2)
memory usage: 904.0 bytes
None


In [9]:
display(FE.train.head(2))
display(FE.test1.head(2))
display(FE.test2.head(2))

Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil
0,3,1111,1,4402,50789,32846,2,10.92,0,0.0,1.098612
1,3,1112,1,2103,1209569,36748,4,46.119999,0,0.0,1.609438


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil
20,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612
21,8,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil
24,9,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612
25,9,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612


In [24]:
FE.ReadHdf('both')

Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil
0,3,1111,1,4402,50789,32846,2,10.92,0,0.0,1.098612
1,3,1112,1,2103,1209569,36748,4,46.119999,0,0.0,1.609438


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 28 entries, 0 to 27
Data columns (total 11 columns):
Semana               28 non-null uint8
Agencia_ID           28 non-null uint16
Canal_ID             28 non-null uint8
Ruta_SAK             28 non-null uint16
Cliente_ID           28 non-null uint32
Producto_ID          28 non-null uint16
Venta_uni_hoy        28 non-null uint16
Venta_hoy            28 non-null float32
Dev_uni_proxima      28 non-null uint32
Dev_proxima          28 non-null float32
Demanda_uni_equil    28 non-null float32
dtypes: float32(3), uint16(4), uint32(2), uint8(2)
memory usage: 912.0 bytes
None


Unnamed: 0,id,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID
0,142871,10,1550,1,1212,968466,36287
1,260706,10,23879,1,1223,1134138,6469


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 48 entries, 0 to 47
Data columns (total 7 columns):
id             48 non-null uint32
Semana         48 non-null uint8
Agencia_ID     48 non-null uint16
Canal_ID       48 non-null uint8
Ruta_SAK       48 non-null uint16
Cliente_ID     48 non-null uint32
Producto_ID    48 non-null uint16
dtypes: uint16(3), uint32(2), uint8(2)
memory usage: 840.0 bytes
None


Unnamed: 0,id,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID
0,36353,11,1441,1,1034,1710154,34892
1,79676,11,1219,1,1001,324254,46772


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 52 entries, 0 to 51
Data columns (total 7 columns):
id             52 non-null uint32
Semana         52 non-null uint8
Agencia_ID     52 non-null uint16
Canal_ID       52 non-null uint8
Ruta_SAK       52 non-null uint16
Cliente_ID     52 non-null uint32
Producto_ID    52 non-null uint16
dtypes: uint16(3), uint32(2), uint8(2)
memory usage: 904.0 bytes
None


In [27]:
display(FE.train.head(2))
display(FE.test1.head(2))
display(FE.test2.head(2))

Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,DemandaGeneralMean
0,3,1111,1,4402,50789,32846,2,10.92,0,0.0,1.098612,1.307894
1,3,1112,1,2103,1209569,36748,4,46.119999,0,0.0,1.609438,1.307894


Unnamed: 0,id,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,DemandaGeneralMean
0,142871,10,1550,1,1212,968466,36287,1.307894
1,260706,10,23879,1,1223,1134138,6469,1.307894


Unnamed: 0,id,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,DemandaGeneralMean
0,36353,11,1441,1,1034,1710154,34892,1.307894
1,79676,11,1219,1,1001,324254,46772,1.307894


## AddDemandaGeneralMean test

In [26]:
FE.AddDemandaGeneralMean('both')

## DeleteLaggedWeeksFromTrain

In [5]:
parameterDict =       {"ValidationStart":5, 
 "ValidationEnd":6,
   "maxLag":1,
    "trainHdfPath":'../../input/train.h5',
    "trainHdfFile":"train",
    "testHdfPath1":"../../input/test1.h5",
    "testHdfPath2":"../../input/test2.h5",
    "testHdfFile":"test", 
    "trainTypes" : {'Semana':np.uint8, 'Agencia_ID':np.uint16, 'Canal_ID':np.uint8,'Ruta_SAK':np.uint16, 
        'Cliente_ID':np.uint32, 'Producto_ID':np.uint16,'Venta_uni_hoy':np.uint16, 'Venta_hoy':np.float32,
                    'Dev_uni_proxima': np.uint32, 'Dev_proxima':np.float32,'Demanda_uni_equil':np.uint32}, 
    "testTypes" : {'id':np.uint32,'Semana':np.uint8, 'Agencia_ID':np.uint16, 'Canal_ID':np.uint8,'Ruta_SAK':np.uint16,
        'Cliente_ID':np.uint32, 'Producto_ID':np.uint16},
    "trainCsvPath":'../../input/train.csv'   ,
    "testCsvPath":'../../input/test.csv'}

FE = FeatureEngineering(**parameterDict)
print FE.__dict__

{'trainCsvPath': '../../input/train.csv', 'maxLag': 1, 'testTypes': {'Cliente_ID': <type 'numpy.uint32'>, 'Ruta_SAK': <type 'numpy.uint16'>, 'Canal_ID': <type 'numpy.uint8'>, 'Producto_ID': <type 'numpy.uint16'>, 'Agencia_ID': <type 'numpy.uint16'>, 'Semana': <type 'numpy.uint8'>, 'id': <type 'numpy.uint32'>}, 'testHdfFile': 'test', 'trainTypes': {'Dev_proxima': <type 'numpy.float32'>, 'Venta_uni_hoy': <type 'numpy.uint16'>, 'Cliente_ID': <type 'numpy.uint32'>, 'Demanda_uni_equil': <type 'numpy.uint32'>, 'Ruta_SAK': <type 'numpy.uint16'>, 'Canal_ID': <type 'numpy.uint8'>, 'Venta_hoy': <type 'numpy.float32'>, 'Producto_ID': <type 'numpy.uint16'>, 'Agencia_ID': <type 'numpy.uint16'>, 'Dev_uni_proxima': <type 'numpy.uint32'>, 'Semana': <type 'numpy.uint8'>}, 'testHdfPath1': '../../input/test1.h5', 'ValidationEnd': 6, 'testHdfPath2': '../../input/test2.h5', 'testCsvPath': '../../input/test.csv', 'ValidationStart': 5, 'trainHdfFile': 'train', 'trainHdfPath': '../../input/train.h5'}


In [6]:
FE.ReadFirstNRowsOfACsv(35000000,'train')
FE.SplitTrainToTestUsingValidationStart()

Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,3
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,4


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 35000000 entries, 0 to 34999999
Data columns (total 11 columns):
Semana               uint8
Agencia_ID           uint16
Canal_ID             uint8
Ruta_SAK             uint16
Cliente_ID           uint32
Producto_ID          uint16
Venta_uni_hoy        uint16
Venta_hoy            float32
Dev_uni_proxima      uint32
Dev_proxima          float32
Demanda_uni_equil    uint32
dtypes: float32(2), uint16(4), uint32(3), uint8(2)
memory usage: 1001.4 MB
None


In [7]:
display(FE.train.head(2))
display(FE.test1.head(2))
display(FE.test2.head(2))

Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,3
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,4


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil
22174800,5,1110,7,3301,15766,1212,5,41.900002,0,0.0,5
22174801,5,1110,7,3301,15766,1216,3,25.139999,0,0.0,3


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil
32790197,6,1110,7,3301,15766,1216,1,8.38,0,0.0,1
32790198,6,1110,7,3301,15766,1238,2,19.66,0,0.0,2


In [10]:
FE.train.info()
FE.test1.info()
FE.test2.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 11009593 entries, 11165207 to 22174799
Data columns (total 11 columns):
Semana               uint8
Agencia_ID           uint16
Canal_ID             uint8
Ruta_SAK             uint16
Cliente_ID           uint32
Producto_ID          uint16
Venta_uni_hoy        uint16
Venta_hoy            float32
Dev_uni_proxima      uint32
Dev_proxima          float32
Demanda_uni_equil    uint32
dtypes: float32(2), uint16(4), uint32(3), uint8(2)
memory usage: 399.0 MB
<class 'pandas.core.frame.DataFrame'>
Int64Index: 10615397 entries, 22174800 to 32790196
Data columns (total 11 columns):
Semana               uint8
Agencia_ID           uint16
Canal_ID             uint8
Ruta_SAK             uint16
Cliente_ID           uint32
Producto_ID          uint16
Venta_uni_hoy        uint16
Venta_hoy            float32
Dev_uni_proxima      uint32
Dev_proxima          float32
Demanda_uni_equil    uint32
dtypes: float32(2), uint16(4), uint32(3), uint8(2)
memory usage: 3

In [9]:
FE.DeleteLaggedWeeksFromTrain()

Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil
11165207,4,1110,7,3301,15766,325,1,8.15,0,0.0,1
11165208,4,1110,7,3301,15766,328,1,8.15,0,0.0,1


## AppendTestToTrain

In [None]:
parameterDict =       {"ValidationStart":5, 
 "ValidationEnd":6,
   "maxLag":1,
    "trainHdfPath":'../../input/train.h5',
    "trainHdfFile":"train",
    "testHdfPath1":"../../input/test1.h5",
    "testHdfPath2":"../../input/test2.h5",
    "testHdfFile":"test", 
    "trainTypes" : {'Semana':np.uint8, 'Agencia_ID':np.uint16, 'Canal_ID':np.uint8,'Ruta_SAK':np.uint16, 
        'Cliente_ID':np.uint32, 'Producto_ID':np.uint16,'Venta_uni_hoy':np.uint16, 'Venta_hoy':np.float32,
                    'Dev_uni_proxima': np.uint32, 'Dev_proxima':np.float32,'Demanda_uni_equil':np.uint32}, 
    "testTypes" : {'id':np.uint32,'Semana':np.uint8, 'Agencia_ID':np.uint16, 'Canal_ID':np.uint8,'Ruta_SAK':np.uint16,
        'Cliente_ID':np.uint32, 'Producto_ID':np.uint16},
    "trainCsvPath":'../../input/train.csv'   ,
    "testCsvPath":'../../input/test.csv'}

FE = FeatureEngineering(**parameterDict)
print FE.__dict__

In [None]:
FE.ReadFirstNRowsOfACsv(35000000,'train')
FE.SplitTrainToTestUsingValidationStart()

# Full File!!!!!!!!!!!!!!!!Read, Convert and Read again..

In [5]:
parameterDict =       {"ValidationStart":8, 
 "ValidationEnd":9,
   "maxLag":3,
    "trainHdfPath":'../../input/train.h5',
    "trainHdfFile":"train",
    "testHdfPath1":"../../input/test1.h5",
    "testHdfPath2":"../../input/test2.h5",
    "testHdfFile":"test", 
    "trainTypes" : {'Semana':np.uint8, 'Agencia_ID':np.uint16, 'Canal_ID':np.uint8,'Ruta_SAK':np.uint16, 
        'Cliente_ID':np.uint32, 'Producto_ID':np.uint16,'Venta_uni_hoy':np.uint16, 'Venta_hoy':np.float32,
                    'Dev_uni_proxima': np.uint32, 'Dev_proxima':np.float32,'Demanda_uni_equil':np.uint32}, 
    "testTypes" : {'id':np.uint32,'Semana':np.uint8, 'Agencia_ID':np.uint16, 'Canal_ID':np.uint8,'Ruta_SAK':np.uint16,
        'Cliente_ID':np.uint32, 'Producto_ID':np.uint16},
    "trainCsvPath":'../../input/train.csv'   ,
    "testCsvPath":'../../input/test.csv'}

FE = FeatureEngineering(**parameterDict)
print FE.__dict__

{'trainCsvPath': '../../input/train.csv', 'maxLag': 3, 'testTypes': {'Cliente_ID': <type 'numpy.uint32'>, 'Ruta_SAK': <type 'numpy.uint16'>, 'Canal_ID': <type 'numpy.uint8'>, 'Producto_ID': <type 'numpy.uint16'>, 'Agencia_ID': <type 'numpy.uint16'>, 'Semana': <type 'numpy.uint8'>, 'id': <type 'numpy.uint32'>}, 'testHdfFile': 'test', 'trainTypes': {'Dev_proxima': <type 'numpy.float32'>, 'Venta_uni_hoy': <type 'numpy.uint16'>, 'Cliente_ID': <type 'numpy.uint32'>, 'Demanda_uni_equil': <type 'numpy.uint32'>, 'Ruta_SAK': <type 'numpy.uint16'>, 'Canal_ID': <type 'numpy.uint8'>, 'Venta_hoy': <type 'numpy.float32'>, 'Producto_ID': <type 'numpy.uint16'>, 'Agencia_ID': <type 'numpy.uint16'>, 'Dev_uni_proxima': <type 'numpy.uint32'>, 'Semana': <type 'numpy.uint8'>}, 'testHdfPath1': '../../input/test1.h5', 'ValidationEnd': 9, 'testHdfPath2': '../../input/test2.h5', 'testCsvPath': '../../input/test.csv', 'ValidationStart': 8, 'trainHdfFile': 'train', 'trainHdfPath': '../../input/train.h5'}


In [6]:
FE.ReadCsv('train')
FE.SplitTrainToTestUsingValidationStart()
FE.Preprocess('both', [["Demanda_uni_equil",np.log1p,'float32']])
FE.SaveDataFrameToHdf('both')

Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,3
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,4


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 74180464 entries, 0 to 74180463
Data columns (total 11 columns):
Semana               uint8
Agencia_ID           uint16
Canal_ID             uint8
Ruta_SAK             uint16
Cliente_ID           uint32
Producto_ID          uint16
Venta_uni_hoy        uint16
Venta_hoy            float32
Dev_uni_proxima      uint32
Dev_proxima          float32
Demanda_uni_equil    uint32
dtypes: float32(2), uint16(4), uint32(3), uint8(2)
memory usage: 2.1 GB
None


In [8]:
FE.train.info()
FE.test1.info()
FE.test2.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 53364883 entries, 0 to 53364882
Data columns (total 11 columns):
Semana               uint8
Agencia_ID           uint16
Canal_ID             uint8
Ruta_SAK             uint16
Cliente_ID           uint32
Producto_ID          uint16
Venta_uni_hoy        uint16
Venta_hoy            float32
Dev_uni_proxima      uint32
Dev_proxima          float32
Demanda_uni_equil    float32
dtypes: float32(3), uint16(4), uint32(2), uint8(2)
memory usage: 1.9 GB
<class 'pandas.core.frame.DataFrame'>
Int64Index: 10406868 entries, 53364883 to 63771750
Data columns (total 11 columns):
Semana               uint8
Agencia_ID           uint16
Canal_ID             uint8
Ruta_SAK             uint16
Cliente_ID           uint32
Producto_ID          uint16
Venta_uni_hoy        uint16
Venta_hoy            float32
Dev_uni_proxima      uint32
Dev_proxima          float32
Demanda_uni_equil    float32
dtypes: float32(3), uint16(4), uint32(2), uint8(2)
memory usage: 377.1 MB

## FULL Read train, test1 and test2 from hdf!..

In [5]:
parameterDict =       {"ValidationStart":8, 
 "ValidationEnd":9,
   "maxLag":3,
    "trainHdfPath":'../../input/train.h5',
    "trainHdfFile":"train",
    "testHdfPath1":"../../input/test1.h5",
    "testHdfPath2":"../../input/test2.h5",
    "testHdfFile":"test", 
    "trainTypes" : {'Semana':np.uint8, 'Agencia_ID':np.uint16, 'Canal_ID':np.uint8,'Ruta_SAK':np.uint16, 
        'Cliente_ID':np.uint32, 'Producto_ID':np.uint16,'Venta_uni_hoy':np.uint16, 'Venta_hoy':np.float32,
                    'Dev_uni_proxima': np.uint32, 'Dev_proxima':np.float32,'Demanda_uni_equil':np.uint32}, 
    "testTypes" : {'id':np.uint32,'Semana':np.uint8, 'Agencia_ID':np.uint16, 'Canal_ID':np.uint8,'Ruta_SAK':np.uint16,
        'Cliente_ID':np.uint32, 'Producto_ID':np.uint16},
    "trainCsvPath":'../../input/train.csv'   ,
    "testCsvPath":'../../input/test.csv'}

FE = FeatureEngineering(**parameterDict)
print FE.__dict__

{'trainCsvPath': '../../input/train.csv', 'maxLag': 3, 'testTypes': {'Cliente_ID': <type 'numpy.uint32'>, 'Ruta_SAK': <type 'numpy.uint16'>, 'Canal_ID': <type 'numpy.uint8'>, 'Producto_ID': <type 'numpy.uint16'>, 'Agencia_ID': <type 'numpy.uint16'>, 'Semana': <type 'numpy.uint8'>, 'id': <type 'numpy.uint32'>}, 'testHdfFile': 'test', 'trainTypes': {'Dev_proxima': <type 'numpy.float32'>, 'Venta_uni_hoy': <type 'numpy.uint16'>, 'Cliente_ID': <type 'numpy.uint32'>, 'Demanda_uni_equil': <type 'numpy.uint32'>, 'Ruta_SAK': <type 'numpy.uint16'>, 'Canal_ID': <type 'numpy.uint8'>, 'Venta_hoy': <type 'numpy.float32'>, 'Producto_ID': <type 'numpy.uint16'>, 'Agencia_ID': <type 'numpy.uint16'>, 'Dev_uni_proxima': <type 'numpy.uint32'>, 'Semana': <type 'numpy.uint8'>}, 'testHdfPath1': '../../input/test1.h5', 'ValidationEnd': 9, 'testHdfPath2': '../../input/test2.h5', 'testCsvPath': '../../input/test.csv', 'ValidationStart': 8, 'trainHdfFile': 'train', 'trainHdfPath': '../../input/train.h5'}


In [6]:
FE.ReadHdf('both')

Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil
0,3,1110,7,3301,15766,1212,3,25.139999,0,0.0,1.386294
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,1.609438


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 53364883 entries, 0 to 53364882
Data columns (total 11 columns):
Semana               uint8
Agencia_ID           uint16
Canal_ID             uint8
Ruta_SAK             uint16
Cliente_ID           uint32
Producto_ID          uint16
Venta_uni_hoy        uint16
Venta_hoy            float32
Dev_uni_proxima      uint32
Dev_proxima          float32
Demanda_uni_equil    float32
dtypes: float32(3), uint16(4), uint32(2), uint8(2)
memory usage: 1.5 GB
None


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil
0,8,1110,7,3301,15766,1212,4,33.52,0,0.0,1.609438
1,8,1110,7,3301,15766,1216,5,41.900002,0,0.0,1.791759


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10406868 entries, 0 to 10406867
Data columns (total 11 columns):
Semana               uint8
Agencia_ID           uint16
Canal_ID             uint8
Ruta_SAK             uint16
Cliente_ID           uint32
Producto_ID          uint16
Venta_uni_hoy        uint16
Venta_hoy            float32
Dev_uni_proxima      uint32
Dev_proxima          float32
Demanda_uni_equil    float32
dtypes: float32(3), uint16(4), uint32(2), uint8(2)
memory usage: 297.7 MB
None


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil
0,9,1110,7,3301,15766,1212,1,8.38,0,0.0,0.693147
1,9,1110,7,3301,15766,1238,2,19.66,0,0.0,1.098612


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10408713 entries, 0 to 10408712
Data columns (total 11 columns):
Semana               uint8
Agencia_ID           uint16
Canal_ID             uint8
Ruta_SAK             uint16
Cliente_ID           uint32
Producto_ID          uint16
Venta_uni_hoy        uint16
Venta_hoy            float32
Dev_uni_proxima      uint32
Dev_proxima          float32
Demanda_uni_equil    float32
dtypes: float32(3), uint16(4), uint32(2), uint8(2)
memory usage: 297.8 MB
None


# Group By test!!!
# Big Test!!!Very Long!!!

In [3]:
parameterDict =       {"ValidationStart":8, 
 "ValidationEnd":9,
   "maxLag":2,
    "trainHdfPath":'../../input/train_100.h5',
    "trainHdfFile":"train",
    "testHdfPath1":"../../input/test1_100.h5",
    "testHdfPath2":"../../input/test2_100.h5",
    "testHdfFile":"test", 
    "trainTypes" : {'Semana':np.uint8, 'Agencia_ID':np.uint16, 'Canal_ID':np.uint8,'Ruta_SAK':np.uint16, 
        'Cliente_ID':np.uint32, 'Producto_ID':np.uint16,'Venta_uni_hoy':np.uint16, 'Venta_hoy':np.float32,
                    'Dev_uni_proxima': np.uint32, 'Dev_proxima':np.float32,'Demanda_uni_equil':np.uint32}, 
    "testTypes" : {'id':np.uint32,'Semana':np.uint8, 'Agencia_ID':np.uint16, 'Canal_ID':np.uint8,'Ruta_SAK':np.uint16,
        'Cliente_ID':np.uint32, 'Producto_ID':np.uint16},
    "trainCsvPath":'../../input/train_100.csv'   ,
    "testCsvPath":'../../input/test_100.csv'}

FE = FeatureEngineering(**parameterDict)
print FE.__dict__

{'trainCsvPath': '../../input/train_100.csv', 'maxLag': 2, 'testTypes': {'Cliente_ID': <type 'numpy.uint32'>, 'Ruta_SAK': <type 'numpy.uint16'>, 'Canal_ID': <type 'numpy.uint8'>, 'Producto_ID': <type 'numpy.uint16'>, 'Agencia_ID': <type 'numpy.uint16'>, 'Semana': <type 'numpy.uint8'>, 'id': <type 'numpy.uint32'>}, 'testHdfFile': 'test', 'trainTypes': {'Dev_proxima': <type 'numpy.float32'>, 'Venta_uni_hoy': <type 'numpy.uint16'>, 'Cliente_ID': <type 'numpy.uint32'>, 'Demanda_uni_equil': <type 'numpy.uint32'>, 'Ruta_SAK': <type 'numpy.uint16'>, 'Canal_ID': <type 'numpy.uint8'>, 'Venta_hoy': <type 'numpy.float32'>, 'Producto_ID': <type 'numpy.uint16'>, 'Agencia_ID': <type 'numpy.uint16'>, 'Dev_uni_proxima': <type 'numpy.uint32'>, 'Semana': <type 'numpy.uint8'>}, 'testHdfPath1': '../../input/test1_100.h5', 'ValidationEnd': 9, 'testHdfPath2': '../../input/test2_100.h5', 'testCsvPath': '../../input/test_100.csv', 'ValidationStart': 8, 'trainHdfFile': 'train', 'trainHdfPath': '../../input/tra

In [4]:
#FE.ReadCsv('train')
#FE.SplitTrainToTestUsingValidationStart()
#FE.Preprocess('both', [["Demanda_uni_equil",np.log1p,'float32']])
#FE.SaveDataFrameToHdf('both')

In [5]:
class ConfigElements:
    def __init__(self, lag, nameAndGroups, targetVariable="", deleteColumns = False):
        self.lag = lag
        self.nameAndGroups = nameAndGroups
        #If there is target variable, then 5 4 3 2 1, fill the np.nans..Else hold them all in Dataframe..
        self.targetVariable = targetVariable
        self.deleteColumns = deleteColumns

In [6]:
FE.ReadHdf('both')

Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil
0,3,1111,1,4402,50789,32846,2,10.92,0,0.0,1.098612
1,3,1112,1,2103,1209569,36748,4,46.119999,0,0.0,1.609438


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 11 columns):
Semana               20 non-null uint8
Agencia_ID           20 non-null uint16
Canal_ID             20 non-null uint8
Ruta_SAK             20 non-null uint16
Cliente_ID           20 non-null uint32
Producto_ID          20 non-null uint16
Venta_uni_hoy        20 non-null uint16
Venta_hoy            20 non-null float32
Dev_uni_proxima      20 non-null uint32
Dev_proxima          20 non-null float32
Demanda_uni_equil    20 non-null float32
dtypes: float32(3), uint16(4), uint32(2), uint8(2)
memory usage: 672.0 bytes
None


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil
0,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612
1,8,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 11 columns):
Semana               4 non-null uint8
Agencia_ID           4 non-null uint16
Canal_ID             4 non-null uint8
Ruta_SAK             4 non-null uint16
Cliente_ID           4 non-null uint32
Producto_ID          4 non-null uint16
Venta_uni_hoy        4 non-null uint16
Venta_hoy            4 non-null float32
Dev_uni_proxima      4 non-null uint32
Dev_proxima          4 non-null float32
Demanda_uni_equil    4 non-null float32
dtypes: float32(3), uint16(4), uint32(2), uint8(2)
memory usage: 192.0 bytes
None


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil
0,9,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612
1,9,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 11 columns):
Semana               4 non-null uint8
Agencia_ID           4 non-null uint16
Canal_ID             4 non-null uint8
Ruta_SAK             4 non-null uint16
Cliente_ID           4 non-null uint32
Producto_ID          4 non-null uint16
Venta_uni_hoy        4 non-null uint16
Venta_hoy            4 non-null float32
Dev_uni_proxima      4 non-null uint32
Dev_proxima          4 non-null float32
Demanda_uni_equil    4 non-null float32
dtypes: float32(3), uint16(4), uint32(2), uint8(2)
memory usage: 192.0 bytes
None


## Groupby Test Lag0Target1 DeleteColumns false

In [10]:
configLag0Target1DeleteColumnsFalse = ConfigElements(0,[ ("SA0CC",["Semana","Agencia_ID"],["count","count"]),(
    "A0M",["Agencia_ID"],["mean"]), ("SAMM",["Semana","Agencia_ID"],["mean","mean"])], "lag0tar1V1", False)
print  configLag0Target1DeleteColumnsFalse.__dict__

{'nameAndGroups': [('SA0CC', ['Semana', 'Agencia_ID'], ['count', 'count']), ('A0M', ['Agencia_ID'], ['mean']), ('SAMM', ['Semana', 'Cliente_ID'], ['mean', 'mean'])], 'lag': 0, 'targetVariable': 'lag0tar1V1', 'deleteColumns': False}


In [11]:
FE.AddConfigurableFeaturesToTrain(configLag0Target1DeleteColumnsFalse)

SA0CC is in columns..


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0CC,A0M,SAMM
0,3,1111,1,4402,50789,32846,2,10.92,0,0.0,1.098612,3.0,3,1.381045,1.381045
1,3,1112,1,2103,1209569,36748,4,46.119999,0,0.0,1.609438,5.0,5,1.347412,1.429942
2,3,1113,1,1603,399077,35147,3,61.5,0,0.0,2.079442,4.0,4,1.695274,1.80848
3,3,1114,1,2124,174266,36748,3,34.59,0,0.0,1.386294,2.0,2,1.194506,1.242453
4,4,1113,1,1061,641223,1129,5,88.0,0,0.0,1.791759,4.0,4,1.695274,1.80848
5,4,1113,1,1272,1649797,1109,1,15.01,0,0.0,0.693147,4.0,4,1.695274,1.80848
6,4,1112,1,1272,2270968,41938,1,9.91,0,0.0,0.693147,5.0,5,1.347412,1.429942
7,4,1115,1,1281,358759,31423,2,21.379999,0,0.0,1.098612,1.0,1,1.098612,1.098612
8,5,1111,1,1457,660918,1250,2,15.28,0,0.0,1.098612,3.0,3,1.381045,1.381045
9,5,1112,1,1203,402789,972,1,18.98,0,0.0,0.693147,5.0,5,1.347412,1.429942


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0CC,A0M,SAMM
0,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,4.0,4,1.695274,1.80848
1,8,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,3.0,3,1.381045,1.381045
2,8,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,5.0,5,1.347412,1.429942
3,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,4.0,4,1.695274,1.80848


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0CC,A0M,SAMM
0,9,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,5.0,5.0,1.347412,1.429942
1,9,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,3.0,3.0,1.381045,1.381045
2,9,1117,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,,,
3,9,1130,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,,,


Count of missing numbers after SA0CC in validation part 1 in column lag0tar1V1 is 0
Count of missing numbers after SA0CC in validation part 2 in column lag0tar1V1 is 2
A0M is in columns..


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0CC,A0M,SAMM
0,3,1111,1,4402,50789,32846,2,10.92,0,0.0,1.098612,3.0,3,1.381045,1.381045
1,3,1112,1,2103,1209569,36748,4,46.119999,0,0.0,1.609438,5.0,5,1.347412,1.429942
2,3,1113,1,1603,399077,35147,3,61.5,0,0.0,2.079442,4.0,4,1.695274,1.80848
3,3,1114,1,2124,174266,36748,3,34.59,0,0.0,1.386294,2.0,2,1.194506,1.242453
4,4,1113,1,1061,641223,1129,5,88.0,0,0.0,1.791759,4.0,4,1.695274,1.80848
5,4,1113,1,1272,1649797,1109,1,15.01,0,0.0,0.693147,4.0,4,1.695274,1.80848
6,4,1112,1,1272,2270968,41938,1,9.91,0,0.0,0.693147,5.0,5,1.347412,1.429942
7,4,1115,1,1281,358759,31423,2,21.379999,0,0.0,1.098612,1.0,1,1.098612,1.098612
8,5,1111,1,1457,660918,1250,2,15.28,0,0.0,1.098612,3.0,3,1.381045,1.381045
9,5,1112,1,1203,402789,972,1,18.98,0,0.0,0.693147,5.0,5,1.347412,1.429942


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0CC,A0M,SAMM
0,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,4.0,4,1.695274,1.80848
1,8,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,3.0,3,1.381045,1.381045
2,8,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,5.0,5,1.347412,1.429942
3,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,4.0,4,1.695274,1.80848


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0CC,A0M,SAMM
0,9,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,5.0,5.0,1.347412,1.429942
1,9,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,3.0,3.0,1.381045,1.381045
2,9,1117,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,,,
3,9,1130,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,,,


Count of missing numbers after A0M in validation part 1 in column lag0tar1V1 is 0
Count of missing numbers after A0M in validation part 2 in column lag0tar1V1 is 2
SAMM is in columns..


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0CC,A0M,SAMM
0,3,1111,1,4402,50789,32846,2,10.92,0,0.0,1.098612,3.0,3,1.381045,1.381045
1,3,1112,1,2103,1209569,36748,4,46.119999,0,0.0,1.609438,5.0,5,1.347412,1.429942
2,3,1113,1,1603,399077,35147,3,61.5,0,0.0,2.079442,4.0,4,1.695274,1.80848
3,3,1114,1,2124,174266,36748,3,34.59,0,0.0,1.386294,2.0,2,1.194506,1.242453
4,4,1113,1,1061,641223,1129,5,88.0,0,0.0,1.791759,4.0,4,1.695274,1.80848
5,4,1113,1,1272,1649797,1109,1,15.01,0,0.0,0.693147,4.0,4,1.695274,1.80848
6,4,1112,1,1272,2270968,41938,1,9.91,0,0.0,0.693147,5.0,5,1.347412,1.429942
7,4,1115,1,1281,358759,31423,2,21.379999,0,0.0,1.098612,1.0,1,1.098612,1.098612
8,5,1111,1,1457,660918,1250,2,15.28,0,0.0,1.098612,3.0,3,1.381045,1.381045
9,5,1112,1,1203,402789,972,1,18.98,0,0.0,0.693147,5.0,5,1.347412,1.429942


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0CC,A0M,SAMM
0,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,4.0,4,1.695274,1.80848
1,8,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,3.0,3,1.381045,1.381045
2,8,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,5.0,5,1.347412,1.429942
3,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,4.0,4,1.695274,1.80848


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0CC,A0M,SAMM
0,9,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,5.0,5.0,1.347412,1.429942
1,9,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,3.0,3.0,1.381045,1.381045
2,9,1117,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,,,
3,9,1130,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,,,


Count of missing numbers after SAMM in validation part 1 in column lag0tar1V1 is 0
Count of missing numbers after SAMM in validation part 2 in column lag0tar1V1 is 2


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0CC,A0M,SAMM
0,3,1111,1,4402,50789,32846,2,10.92,0,0.0,1.098612,3.0,3,1.381045,1.381045
1,3,1112,1,2103,1209569,36748,4,46.119999,0,0.0,1.609438,5.0,5,1.347412,1.429942
2,3,1113,1,1603,399077,35147,3,61.5,0,0.0,2.079442,4.0,4,1.695274,1.80848
3,3,1114,1,2124,174266,36748,3,34.59,0,0.0,1.386294,2.0,2,1.194506,1.242453
4,4,1113,1,1061,641223,1129,5,88.0,0,0.0,1.791759,4.0,4,1.695274,1.80848
5,4,1113,1,1272,1649797,1109,1,15.01,0,0.0,0.693147,4.0,4,1.695274,1.80848
6,4,1112,1,1272,2270968,41938,1,9.91,0,0.0,0.693147,5.0,5,1.347412,1.429942
7,4,1115,1,1281,358759,31423,2,21.379999,0,0.0,1.098612,1.0,1,1.098612,1.098612
8,5,1111,1,1457,660918,1250,2,15.28,0,0.0,1.098612,3.0,3,1.381045,1.381045
9,5,1112,1,1203,402789,972,1,18.98,0,0.0,0.693147,5.0,5,1.347412,1.429942


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0CC,A0M,SAMM
0,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,4.0,4,1.695274,1.80848
1,8,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,3.0,3,1.381045,1.381045
2,8,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,5.0,5,1.347412,1.429942
3,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,4.0,4,1.695274,1.80848


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0CC,A0M,SAMM
0,9,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,5.0,5.0,1.347412,1.429942
1,9,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,3.0,3.0,1.381045,1.381045
2,9,1117,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,,,
3,9,1130,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,,,


## Control if target is in column, or new features are in columns..SAMM is new..

In [13]:
configLag0Target1DeleteColumnsFalse = ConfigElements(0,[ ("SA0CC",["Semana","Agencia_ID"],["count","count"]),(
    "A0M",["Agencia_ID"],["mean"]), ("SCMM",["Semana","Cliente_ID"],["mean","mean"])], "lag0tar1V1", False)
print  configLag0Target1DeleteColumnsFalse.__dict__

{'nameAndGroups': [('SA0CC', ['Semana', 'Agencia_ID'], ['count', 'count']), ('A0M', ['Agencia_ID'], ['mean']), ('SCMM', ['Semana', 'Cliente_ID'], ['mean', 'mean'])], 'lag': 0, 'targetVariable': 'lag0tar1V1', 'deleteColumns': False}


In [14]:
FE.AddConfigurableFeaturesToTrain(configLag0Target1DeleteColumnsFalse)

SA0CC is in columns..


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0CC,A0M,SAMM
0,3,1111,1,4402,50789,32846,2,10.92,0,0.0,1.098612,3.0,3,1.381045,1.381045
1,3,1112,1,2103,1209569,36748,4,46.119999,0,0.0,1.609438,5.0,5,1.347412,1.429942
2,3,1113,1,1603,399077,35147,3,61.5,0,0.0,2.079442,4.0,4,1.695274,1.80848
3,3,1114,1,2124,174266,36748,3,34.59,0,0.0,1.386294,2.0,2,1.194506,1.242453
4,4,1113,1,1061,641223,1129,5,88.0,0,0.0,1.791759,4.0,4,1.695274,1.80848
5,4,1113,1,1272,1649797,1109,1,15.01,0,0.0,0.693147,4.0,4,1.695274,1.80848
6,4,1112,1,1272,2270968,41938,1,9.91,0,0.0,0.693147,5.0,5,1.347412,1.429942
7,4,1115,1,1281,358759,31423,2,21.379999,0,0.0,1.098612,1.0,1,1.098612,1.098612
8,5,1111,1,1457,660918,1250,2,15.28,0,0.0,1.098612,3.0,3,1.381045,1.381045
9,5,1112,1,1203,402789,972,1,18.98,0,0.0,0.693147,5.0,5,1.347412,1.429942


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0CC,A0M,SAMM
0,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,4.0,4,1.695274,1.80848
1,8,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,3.0,3,1.381045,1.381045
2,8,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,5.0,5,1.347412,1.429942
3,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,4.0,4,1.695274,1.80848


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0CC,A0M,SAMM
0,9,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,5.0,5.0,1.347412,1.429942
1,9,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,3.0,3.0,1.381045,1.381045
2,9,1117,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,,,
3,9,1130,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,,,


Count of missing numbers after SA0CC in validation part 1 in column lag0tar1V1 is 0
Count of missing numbers after SA0CC in validation part 2 in column lag0tar1V1 is 2
A0M is in columns..


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0CC,A0M,SAMM
0,3,1111,1,4402,50789,32846,2,10.92,0,0.0,1.098612,3.0,3,1.381045,1.381045
1,3,1112,1,2103,1209569,36748,4,46.119999,0,0.0,1.609438,5.0,5,1.347412,1.429942
2,3,1113,1,1603,399077,35147,3,61.5,0,0.0,2.079442,4.0,4,1.695274,1.80848
3,3,1114,1,2124,174266,36748,3,34.59,0,0.0,1.386294,2.0,2,1.194506,1.242453
4,4,1113,1,1061,641223,1129,5,88.0,0,0.0,1.791759,4.0,4,1.695274,1.80848
5,4,1113,1,1272,1649797,1109,1,15.01,0,0.0,0.693147,4.0,4,1.695274,1.80848
6,4,1112,1,1272,2270968,41938,1,9.91,0,0.0,0.693147,5.0,5,1.347412,1.429942
7,4,1115,1,1281,358759,31423,2,21.379999,0,0.0,1.098612,1.0,1,1.098612,1.098612
8,5,1111,1,1457,660918,1250,2,15.28,0,0.0,1.098612,3.0,3,1.381045,1.381045
9,5,1112,1,1203,402789,972,1,18.98,0,0.0,0.693147,5.0,5,1.347412,1.429942


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0CC,A0M,SAMM
0,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,4.0,4,1.695274,1.80848
1,8,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,3.0,3,1.381045,1.381045
2,8,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,5.0,5,1.347412,1.429942
3,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,4.0,4,1.695274,1.80848


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0CC,A0M,SAMM
0,9,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,5.0,5.0,1.347412,1.429942
1,9,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,3.0,3.0,1.381045,1.381045
2,9,1117,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,,,
3,9,1130,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,,,


Count of missing numbers after A0M in validation part 1 in column lag0tar1V1 is 0
Count of missing numbers after A0M in validation part 2 in column lag0tar1V1 is 2
SCMM is not in columns..


Unnamed: 0_level_0,SCMM
Cliente_ID,Unnamed: 1_level_1
50789,1.098612
174266,1.386294
349439,1.098612
358759,1.098612
396762,1.098612
399077,2.079442
402789,1.516754
641223,1.791759
660918,1.098612
1209569,1.609438


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0CC,A0M,SAMM,SCMM
0,3,1111,1,4402,50789,32846,2,10.92,0,0.0,1.098612,3.0,3,1.381045,1.381045,1.098612
1,3,1112,1,2103,1209569,36748,4,46.119999,0,0.0,1.609438,5.0,5,1.347412,1.429942,1.609438
2,3,1113,1,1603,399077,35147,3,61.5,0,0.0,2.079442,4.0,4,1.695274,1.80848,2.079442
3,3,1114,1,2124,174266,36748,3,34.59,0,0.0,1.386294,2.0,2,1.194506,1.242453,1.386294
4,4,1113,1,1061,641223,1129,5,88.0,0,0.0,1.791759,4.0,4,1.695274,1.80848,1.791759
5,4,1113,1,1272,1649797,1109,1,15.01,0,0.0,0.693147,4.0,4,1.695274,1.80848,0.693147
6,4,1112,1,1272,2270968,41938,1,9.91,0,0.0,0.693147,5.0,5,1.347412,1.429942,0.693147
7,4,1115,1,1281,358759,31423,2,21.379999,0,0.0,1.098612,1.0,1,1.098612,1.098612,1.098612
8,5,1111,1,1457,660918,1250,2,15.28,0,0.0,1.098612,3.0,3,1.381045,1.381045,1.098612
9,5,1112,1,1203,402789,972,1,18.98,0,0.0,0.693147,5.0,5,1.347412,1.429942,1.516754


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0CC,A0M,SAMM,SCMM
0,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,4.0,4,1.695274,1.80848,1.098612
1,8,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,3.0,3,1.381045,1.381045,1.098612
2,8,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,5.0,5,1.347412,1.429942,1.098612
3,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,4.0,4,1.695274,1.80848,1.098612


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0CC,A0M,SAMM,SCMM
0,9,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,5.0,5.0,1.347412,1.429942,1.098612
1,9,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,3.0,3.0,1.381045,1.381045,1.098612
2,9,1117,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,,,,1.098612
3,9,1130,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,,,,1.098612


Count of missing numbers after SCMM in validation part 1 in column lag0tar1V1 is 0
Count of missing numbers after SCMM in validation part 2 in column lag0tar1V1 is 0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0CC,A0M,SAMM,SCMM
0,3,1111,1,4402,50789,32846,2,10.92,0,0.0,1.098612,3.0,3,1.381045,1.381045,1.098612
1,3,1112,1,2103,1209569,36748,4,46.119999,0,0.0,1.609438,5.0,5,1.347412,1.429942,1.609438
2,3,1113,1,1603,399077,35147,3,61.5,0,0.0,2.079442,4.0,4,1.695274,1.80848,2.079442
3,3,1114,1,2124,174266,36748,3,34.59,0,0.0,1.386294,2.0,2,1.194506,1.242453,1.386294
4,4,1113,1,1061,641223,1129,5,88.0,0,0.0,1.791759,4.0,4,1.695274,1.80848,1.791759
5,4,1113,1,1272,1649797,1109,1,15.01,0,0.0,0.693147,4.0,4,1.695274,1.80848,0.693147
6,4,1112,1,1272,2270968,41938,1,9.91,0,0.0,0.693147,5.0,5,1.347412,1.429942,0.693147
7,4,1115,1,1281,358759,31423,2,21.379999,0,0.0,1.098612,1.0,1,1.098612,1.098612,1.098612
8,5,1111,1,1457,660918,1250,2,15.28,0,0.0,1.098612,3.0,3,1.381045,1.381045,1.098612
9,5,1112,1,1203,402789,972,1,18.98,0,0.0,0.693147,5.0,5,1.347412,1.429942,1.516754


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0CC,A0M,SAMM,SCMM
0,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,4.0,4,1.695274,1.80848,1.098612
1,8,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,3.0,3,1.381045,1.381045,1.098612
2,8,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,5.0,5,1.347412,1.429942,1.098612
3,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,4.0,4,1.695274,1.80848,1.098612


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0CC,A0M,SAMM,SCMM
0,9,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,5.0,5.0,1.347412,1.429942,1.098612
1,9,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,3.0,3.0,1.381045,1.381045,1.098612
2,9,1117,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.098612,,,,1.098612
3,9,1130,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.098612,,,,1.098612


## Reset now..Groupby Test Lag0Target1 DeleteColumns true

In [7]:
configLag0Target1DeleteColumnsFalse = ConfigElements(0,[ ("SA0CC",["Semana","Agencia_ID"],["count","count"]),(
    "A0M",["Agencia_ID"],["mean"]), ("SAMM",["Semana","Agencia_ID"],["mean","mean"])], "lag0tar1V1", True)
print  configLag0Target1DeleteColumnsFalse.__dict__

{'nameAndGroups': [('SA0CC', ['Semana', 'Agencia_ID'], ['count', 'count']), ('A0M', ['Agencia_ID'], ['mean']), ('SAMM', ['Semana', 'Agencia_ID'], ['mean', 'mean'])], 'lag': 0, 'targetVariable': 'lag0tar1V1', 'deleteColumns': True}


In [8]:
FE.AddConfigurableFeaturesToTrain(configLag0Target1DeleteColumnsFalse)

SA0CC is not in columns..


Unnamed: 0_level_0,SA0CC
Agencia_ID,Unnamed: 1_level_1
1111,3
1112,5
1113,4
1114,2
1115,1
1127,1


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0CC
0,3,1111,1,4402,50789,32846,2,10.92,0,0.0,1.098612,,3
1,3,1112,1,2103,1209569,36748,4,46.119999,0,0.0,1.609438,,5
2,3,1113,1,1603,399077,35147,3,61.5,0,0.0,2.079442,,4
3,3,1114,1,2124,174266,36748,3,34.59,0,0.0,1.386294,,2
4,4,1113,1,1061,641223,1129,5,88.0,0,0.0,1.791759,,4
5,4,1113,1,1272,1649797,1109,1,15.01,0,0.0,0.693147,,4
6,4,1112,1,1272,2270968,41938,1,9.91,0,0.0,0.693147,,5
7,4,1115,1,1281,358759,31423,2,21.379999,0,0.0,1.098612,,1
8,5,1111,1,1457,660918,1250,2,15.28,0,0.0,1.098612,,3
9,5,1112,1,1203,402789,972,1,18.98,0,0.0,0.693147,,5


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0CC
0,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,4
1,8,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,3
2,8,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,5
3,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,4


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0CC
0,9,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,5.0
1,9,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,3.0
2,9,1117,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,
3,9,1130,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,


Count of missing numbers after SA0CC in validation part 1 in column lag0tar1V1 is 0
Count of missing numbers after SA0CC in validation part 2 in column lag0tar1V1 is 2
A0M is not in columns..


Unnamed: 0_level_0,A0M
Agencia_ID,Unnamed: 1_level_1
1111,1.381045
1112,1.347412
1113,1.695274
1114,1.194506
1115,1.098612
1127,1.098612


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,A0M
0,3,1111,1,4402,50789,32846,2,10.92,0,0.0,1.098612,3,1.381045
1,3,1112,1,2103,1209569,36748,4,46.119999,0,0.0,1.609438,5,1.347412
2,3,1113,1,1603,399077,35147,3,61.5,0,0.0,2.079442,4,1.695274
3,3,1114,1,2124,174266,36748,3,34.59,0,0.0,1.386294,2,1.194506
4,4,1113,1,1061,641223,1129,5,88.0,0,0.0,1.791759,4,1.695274
5,4,1113,1,1272,1649797,1109,1,15.01,0,0.0,0.693147,4,1.695274
6,4,1112,1,1272,2270968,41938,1,9.91,0,0.0,0.693147,5,1.347412
7,4,1115,1,1281,358759,31423,2,21.379999,0,0.0,1.098612,1,1.098612
8,5,1111,1,1457,660918,1250,2,15.28,0,0.0,1.098612,3,1.381045
9,5,1112,1,1203,402789,972,1,18.98,0,0.0,0.693147,5,1.347412


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,A0M
0,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,4,1.695274
1,8,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,3,1.381045
2,8,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,5,1.347412
3,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,4,1.695274


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,A0M
0,9,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,5.0,1.347412
1,9,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,3.0,1.381045
2,9,1117,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,
3,9,1130,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,


Count of missing numbers after A0M in validation part 1 in column lag0tar1V1 is 0
Count of missing numbers after A0M in validation part 2 in column lag0tar1V1 is 2
SAMM is not in columns..


Unnamed: 0_level_0,SAMM
Agencia_ID,Unnamed: 1_level_1
1111,1.381045
1112,1.429942
1113,1.80848
1114,1.242453
1115,1.098612
1127,1.098612


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SAMM
0,3,1111,1,4402,50789,32846,2,10.92,0,0.0,1.098612,3.0,1.381045
1,3,1112,1,2103,1209569,36748,4,46.119999,0,0.0,1.609438,5.0,1.429942
2,3,1113,1,1603,399077,35147,3,61.5,0,0.0,2.079442,4.0,1.80848
3,3,1114,1,2124,174266,36748,3,34.59,0,0.0,1.386294,2.0,1.242453
4,4,1113,1,1061,641223,1129,5,88.0,0,0.0,1.791759,4.0,1.80848
5,4,1113,1,1272,1649797,1109,1,15.01,0,0.0,0.693147,4.0,1.80848
6,4,1112,1,1272,2270968,41938,1,9.91,0,0.0,0.693147,5.0,1.429942
7,4,1115,1,1281,358759,31423,2,21.379999,0,0.0,1.098612,1.0,1.098612
8,5,1111,1,1457,660918,1250,2,15.28,0,0.0,1.098612,3.0,1.381045
9,5,1112,1,1203,402789,972,1,18.98,0,0.0,0.693147,5.0,1.429942


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SAMM
0,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,4.0,1.80848
1,8,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,3.0,1.381045
2,8,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,5.0,1.429942
3,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,4.0,1.80848


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SAMM
0,9,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,5.0,1.429942
1,9,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,3.0,1.381045
2,9,1117,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,
3,9,1130,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,


Count of missing numbers after SAMM in validation part 1 in column lag0tar1V1 is 0
Count of missing numbers after SAMM in validation part 2 in column lag0tar1V1 is 2


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1
0,3,1111,1,4402,50789,32846,2,10.92,0,0.0,1.098612,3.0
1,3,1112,1,2103,1209569,36748,4,46.119999,0,0.0,1.609438,5.0
2,3,1113,1,1603,399077,35147,3,61.5,0,0.0,2.079442,4.0
3,3,1114,1,2124,174266,36748,3,34.59,0,0.0,1.386294,2.0
4,4,1113,1,1061,641223,1129,5,88.0,0,0.0,1.791759,4.0
5,4,1113,1,1272,1649797,1109,1,15.01,0,0.0,0.693147,4.0
6,4,1112,1,1272,2270968,41938,1,9.91,0,0.0,0.693147,5.0
7,4,1115,1,1281,358759,31423,2,21.379999,0,0.0,1.098612,1.0
8,5,1111,1,1457,660918,1250,2,15.28,0,0.0,1.098612,3.0
9,5,1112,1,1203,402789,972,1,18.98,0,0.0,0.693147,5.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1
0,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,4.0
1,8,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,3.0
2,8,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,5.0
3,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,4.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1
0,9,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,5.0
1,9,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,3.0
2,9,1117,1,1232,396762,1250,2,15.28,0,0.0,1.098612,
3,9,1130,1,1232,396762,1250,2,15.28,0,0.0,1.098612,


## Control if target is in column, or new features are in columns..SAMM is new..

In [9]:
configLag0Target1DeleteColumnsFalse = ConfigElements(0,[ ("SA0CC",["Semana","Agencia_ID"],["count","count"]),(
    "A0M",["Agencia_ID"],["mean"]), ("SCMM",["Semana","Cliente_ID"],["mean","mean"])], "lag0tar1V1", True)
print  configLag0Target1DeleteColumnsFalse.__dict__

{'nameAndGroups': [('SA0CC', ['Semana', 'Agencia_ID'], ['count', 'count']), ('A0M', ['Agencia_ID'], ['mean']), ('SCMM', ['Semana', 'Cliente_ID'], ['mean', 'mean'])], 'lag': 0, 'targetVariable': 'lag0tar1V1', 'deleteColumns': True}


In [10]:
FE.AddConfigurableFeaturesToTrain(configLag0Target1DeleteColumnsFalse)

SA0CC is not in columns..


Unnamed: 0_level_0,SA0CC
Agencia_ID,Unnamed: 1_level_1
1111,3
1112,5
1113,4
1114,2
1115,1
1127,1


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0CC
0,3,1111,1,4402,50789,32846,2,10.92,0,0.0,1.098612,3.0,3
1,3,1112,1,2103,1209569,36748,4,46.119999,0,0.0,1.609438,5.0,5
2,3,1113,1,1603,399077,35147,3,61.5,0,0.0,2.079442,4.0,4
3,3,1114,1,2124,174266,36748,3,34.59,0,0.0,1.386294,2.0,2
4,4,1113,1,1061,641223,1129,5,88.0,0,0.0,1.791759,4.0,4
5,4,1113,1,1272,1649797,1109,1,15.01,0,0.0,0.693147,4.0,4
6,4,1112,1,1272,2270968,41938,1,9.91,0,0.0,0.693147,5.0,5
7,4,1115,1,1281,358759,31423,2,21.379999,0,0.0,1.098612,1.0,1
8,5,1111,1,1457,660918,1250,2,15.28,0,0.0,1.098612,3.0,3
9,5,1112,1,1203,402789,972,1,18.98,0,0.0,0.693147,5.0,5


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0CC
0,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,4.0,4
1,8,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,3.0,3
2,8,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,5.0,5
3,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,4.0,4


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0CC
0,9,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,5.0,5.0
1,9,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,3.0,3.0
2,9,1117,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,
3,9,1130,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,


Count of missing numbers after SA0CC in validation part 1 in column lag0tar1V1 is 0
Count of missing numbers after SA0CC in validation part 2 in column lag0tar1V1 is 2
A0M is not in columns..


Unnamed: 0_level_0,A0M
Agencia_ID,Unnamed: 1_level_1
1111,1.381045
1112,1.347412
1113,1.695274
1114,1.194506
1115,1.098612
1127,1.098612


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,A0M
0,3,1111,1,4402,50789,32846,2,10.92,0,0.0,1.098612,3.0,1.381045
1,3,1112,1,2103,1209569,36748,4,46.119999,0,0.0,1.609438,5.0,1.347412
2,3,1113,1,1603,399077,35147,3,61.5,0,0.0,2.079442,4.0,1.695274
3,3,1114,1,2124,174266,36748,3,34.59,0,0.0,1.386294,2.0,1.194506
4,4,1113,1,1061,641223,1129,5,88.0,0,0.0,1.791759,4.0,1.695274
5,4,1113,1,1272,1649797,1109,1,15.01,0,0.0,0.693147,4.0,1.695274
6,4,1112,1,1272,2270968,41938,1,9.91,0,0.0,0.693147,5.0,1.347412
7,4,1115,1,1281,358759,31423,2,21.379999,0,0.0,1.098612,1.0,1.098612
8,5,1111,1,1457,660918,1250,2,15.28,0,0.0,1.098612,3.0,1.381045
9,5,1112,1,1203,402789,972,1,18.98,0,0.0,0.693147,5.0,1.347412


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,A0M
0,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,4.0,1.695274
1,8,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,3.0,1.381045
2,8,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,5.0,1.347412
3,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,4.0,1.695274


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,A0M
0,9,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,5.0,1.347412
1,9,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,3.0,1.381045
2,9,1117,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,
3,9,1130,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,


Count of missing numbers after A0M in validation part 1 in column lag0tar1V1 is 0
Count of missing numbers after A0M in validation part 2 in column lag0tar1V1 is 2
SCMM is not in columns..


Unnamed: 0_level_0,SCMM
Cliente_ID,Unnamed: 1_level_1
50789,1.098612
174266,1.386294
349439,1.098612
358759,1.098612
396762,1.098612
399077,2.079442
402789,1.516754
641223,1.791759
660918,1.098612
1209569,1.609438


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SCMM
0,3,1111,1,4402,50789,32846,2,10.92,0,0.0,1.098612,3.0,1.098612
1,3,1112,1,2103,1209569,36748,4,46.119999,0,0.0,1.609438,5.0,1.609438
2,3,1113,1,1603,399077,35147,3,61.5,0,0.0,2.079442,4.0,2.079442
3,3,1114,1,2124,174266,36748,3,34.59,0,0.0,1.386294,2.0,1.386294
4,4,1113,1,1061,641223,1129,5,88.0,0,0.0,1.791759,4.0,1.791759
5,4,1113,1,1272,1649797,1109,1,15.01,0,0.0,0.693147,4.0,0.693147
6,4,1112,1,1272,2270968,41938,1,9.91,0,0.0,0.693147,5.0,0.693147
7,4,1115,1,1281,358759,31423,2,21.379999,0,0.0,1.098612,1.0,1.098612
8,5,1111,1,1457,660918,1250,2,15.28,0,0.0,1.098612,3.0,1.098612
9,5,1112,1,1203,402789,972,1,18.98,0,0.0,0.693147,5.0,1.516754


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SCMM
0,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,4.0,1.098612
1,8,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,3.0,1.098612
2,8,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,5.0,1.098612
3,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,4.0,1.098612


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SCMM
0,9,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,5.0,1.098612
1,9,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,3.0,1.098612
2,9,1117,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,1.098612
3,9,1130,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,1.098612


Count of missing numbers after SCMM in validation part 1 in column lag0tar1V1 is 0
Count of missing numbers after SCMM in validation part 2 in column lag0tar1V1 is 0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1
0,3,1111,1,4402,50789,32846,2,10.92,0,0.0,1.098612,3.0
1,3,1112,1,2103,1209569,36748,4,46.119999,0,0.0,1.609438,5.0
2,3,1113,1,1603,399077,35147,3,61.5,0,0.0,2.079442,4.0
3,3,1114,1,2124,174266,36748,3,34.59,0,0.0,1.386294,2.0
4,4,1113,1,1061,641223,1129,5,88.0,0,0.0,1.791759,4.0
5,4,1113,1,1272,1649797,1109,1,15.01,0,0.0,0.693147,4.0
6,4,1112,1,1272,2270968,41938,1,9.91,0,0.0,0.693147,5.0
7,4,1115,1,1281,358759,31423,2,21.379999,0,0.0,1.098612,1.0
8,5,1111,1,1457,660918,1250,2,15.28,0,0.0,1.098612,3.0
9,5,1112,1,1203,402789,972,1,18.98,0,0.0,0.693147,5.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1
0,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,4.0
1,8,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,3.0
2,8,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,5.0
3,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,4.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1
0,9,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,5.0
1,9,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,3.0
2,9,1117,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.098612
3,9,1130,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.098612


# Reset!!Lag1 TEST!!!!!!!!!!!!!!
# Reset!!Lag1 TEST!!!!!!!!!!!!!!

## Groupby Test Lag1Target1 DeleteColumns false

In [8]:
configLag0Target1DeleteColumnsFalse = ConfigElements(1,[ ("SA0C",["Semana","Agencia_ID"],["count"]),
                                                        ("SAM",["Semana","Agencia_ID"],["mean"])], "lag0tar1V1", False)
print  configLag0Target1DeleteColumnsFalse.__dict__


{'nameAndGroups': [('SA0C', ['Semana', 'Agencia_ID'], ['count']), ('SAM', ['Semana', 'Agencia_ID'], ['mean'])], 'lag': 1, 'targetVariable': 'lag0tar1V1', 'deleteColumns': False}


In [9]:
FE.AddConfigurableFeaturesToTrain(configLag0Target1DeleteColumnsFalse)

SA0C is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,SA0C
Semana,Agencia_ID,Unnamed: 2_level_1
4,1111,1
4,1112,1
4,1113,1
4,1114,1
5,1112,1
5,1113,2
5,1115,1
6,1111,1
6,1112,2
6,1113,1


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0C
0,3,1111,1,4402,50789,32846,2,10.92,0,0.0,1.098612,,
1,3,1112,1,2103,1209569,36748,4,46.119999,0,0.0,1.609438,,
2,3,1113,1,1603,399077,35147,3,61.5,0,0.0,2.079442,,
3,3,1114,1,2124,174266,36748,3,34.59,0,0.0,1.386294,,
4,4,1113,1,1061,641223,1129,5,88.0,0,0.0,1.791759,,1.0
5,4,1113,1,1272,1649797,1109,1,15.01,0,0.0,0.693147,,1.0
6,4,1112,1,1272,2270968,41938,1,9.91,0,0.0,0.693147,,1.0
7,4,1115,1,1281,358759,31423,2,21.379999,0,0.0,1.098612,,
8,5,1111,1,1457,660918,1250,2,15.28,0,0.0,1.098612,,
9,5,1112,1,1203,402789,972,1,18.98,0,0.0,0.693147,,1.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0C
0,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,
1,8,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,
2,8,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,2.0
3,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil
0,9,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612
1,9,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612
2,9,1117,1,1232,396762,1250,2,15.28,0,0.0,1.098612
3,9,1130,1,1232,396762,1250,2,15.28,0,0.0,1.098612


Count of missing numbers after SA0C in validation part 1 in column lag0tar1V1 is 3
SAM is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,SAM
Semana,Agencia_ID,Unnamed: 2_level_1
4,1111,1.098612
4,1112,1.609438
4,1113,2.079442
4,1114,1.386294
5,1112,0.693147
5,1113,1.242453
5,1115,1.098612
6,1111,1.098612
6,1112,0.693147
6,1113,2.302585


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0C,SAM
0,3,1111,1,4402,50789,32846,2,10.92,0,0.0,1.098612,,,
1,3,1112,1,2103,1209569,36748,4,46.119999,0,0.0,1.609438,,,
2,3,1113,1,1603,399077,35147,3,61.5,0,0.0,2.079442,,,
3,3,1114,1,2124,174266,36748,3,34.59,0,0.0,1.386294,,,
4,4,1113,1,1061,641223,1129,5,88.0,0,0.0,1.791759,1.0,1.0,2.079442
5,4,1113,1,1272,1649797,1109,1,15.01,0,0.0,0.693147,1.0,1.0,2.079442
6,4,1112,1,1272,2270968,41938,1,9.91,0,0.0,0.693147,1.0,1.0,1.609438
7,4,1115,1,1281,358759,31423,2,21.379999,0,0.0,1.098612,,,
8,5,1111,1,1457,660918,1250,2,15.28,0,0.0,1.098612,,,
9,5,1112,1,1203,402789,972,1,18.98,0,0.0,0.693147,1.0,1.0,0.693147


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0C,SAM
0,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,,
1,8,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,,
2,8,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,2.0,2.0,1.589027
3,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil
0,9,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612
1,9,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612
2,9,1117,1,1232,396762,1250,2,15.28,0,0.0,1.098612
3,9,1130,1,1232,396762,1250,2,15.28,0,0.0,1.098612


Count of missing numbers after SAM in validation part 1 in column lag0tar1V1 is 3


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0C,SAM
0,3,1111,1,4402,50789,32846,2,10.92,0,0.0,1.098612,,,
1,3,1112,1,2103,1209569,36748,4,46.119999,0,0.0,1.609438,,,
2,3,1113,1,1603,399077,35147,3,61.5,0,0.0,2.079442,,,
3,3,1114,1,2124,174266,36748,3,34.59,0,0.0,1.386294,,,
4,4,1113,1,1061,641223,1129,5,88.0,0,0.0,1.791759,1.0,1.0,2.079442
5,4,1113,1,1272,1649797,1109,1,15.01,0,0.0,0.693147,1.0,1.0,2.079442
6,4,1112,1,1272,2270968,41938,1,9.91,0,0.0,0.693147,1.0,1.0,1.609438
7,4,1115,1,1281,358759,31423,2,21.379999,0,0.0,1.098612,,,
8,5,1111,1,1457,660918,1250,2,15.28,0,0.0,1.098612,,,
9,5,1112,1,1203,402789,972,1,18.98,0,0.0,0.693147,1.0,1.0,0.693147


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0C,SAM
0,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,,
1,8,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,,
2,8,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,2.0,2.0,1.589027
3,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil
0,9,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612
1,9,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612
2,9,1117,1,1232,396762,1250,2,15.28,0,0.0,1.098612
3,9,1130,1,1232,396762,1250,2,15.28,0,0.0,1.098612


## Control if target is in column, or new features are in columns..SAMM is new..

In [10]:
configLag0Target1DeleteColumnsFalse = ConfigElements(1,[ ("SA0C",["Semana","Agencia_ID"],["count"]),
                                                ("SCM",["Semana","Cliente_ID"],["mean"])], "lag0tar1V1", False)
print  configLag0Target1DeleteColumnsFalse.__dict__

{'nameAndGroups': [('SA0C', ['Semana', 'Agencia_ID'], ['count']), ('SCM', ['Semana', 'Cliente_ID'], ['mean'])], 'lag': 1, 'targetVariable': 'lag0tar1V1', 'deleteColumns': False}


In [11]:
FE.AddConfigurableFeaturesToTrain(configLag0Target1DeleteColumnsFalse)

SA0C is in columns..


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0C,SAM
0,3,1111,1,4402,50789,32846,2,10.92,0,0.0,1.098612,,,
1,3,1112,1,2103,1209569,36748,4,46.119999,0,0.0,1.609438,,,
2,3,1113,1,1603,399077,35147,3,61.5,0,0.0,2.079442,,,
3,3,1114,1,2124,174266,36748,3,34.59,0,0.0,1.386294,,,
4,4,1113,1,1061,641223,1129,5,88.0,0,0.0,1.791759,1.0,1.0,2.079442
5,4,1113,1,1272,1649797,1109,1,15.01,0,0.0,0.693147,1.0,1.0,2.079442
6,4,1112,1,1272,2270968,41938,1,9.91,0,0.0,0.693147,1.0,1.0,1.609438
7,4,1115,1,1281,358759,31423,2,21.379999,0,0.0,1.098612,,,
8,5,1111,1,1457,660918,1250,2,15.28,0,0.0,1.098612,,,
9,5,1112,1,1203,402789,972,1,18.98,0,0.0,0.693147,1.0,1.0,0.693147


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0C,SAM
0,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,,
1,8,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,,
2,8,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,2.0,2.0,1.589027
3,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil
0,9,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612
1,9,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612
2,9,1117,1,1232,396762,1250,2,15.28,0,0.0,1.098612
3,9,1130,1,1232,396762,1250,2,15.28,0,0.0,1.098612


Count of missing numbers after SA0C in validation part 1 in column lag0tar1V1 is 3
SCM is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,SCM
Semana,Cliente_ID,Unnamed: 2_level_1
4,50789,1.098612
4,174266,1.386294
4,399077,2.079442
4,1209569,1.609438
5,358759,1.098612
5,641223,1.791759
5,1649797,0.693147
5,2270968,0.693147
6,402789,0.693147
6,660918,1.098612


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0C,SAM,SCM
0,3,1111,1,4402,50789,32846,2,10.92,0,0.0,1.098612,,,,
1,3,1112,1,2103,1209569,36748,4,46.119999,0,0.0,1.609438,,,,
2,3,1113,1,1603,399077,35147,3,61.5,0,0.0,2.079442,,,,
3,3,1114,1,2124,174266,36748,3,34.59,0,0.0,1.386294,,,,
4,4,1113,1,1061,641223,1129,5,88.0,0,0.0,1.791759,1.0,1.0,2.079442,
5,4,1113,1,1272,1649797,1109,1,15.01,0,0.0,0.693147,1.0,1.0,2.079442,
6,4,1112,1,1272,2270968,41938,1,9.91,0,0.0,0.693147,1.0,1.0,1.609438,
7,4,1115,1,1281,358759,31423,2,21.379999,0,0.0,1.098612,,,,
8,5,1111,1,1457,660918,1250,2,15.28,0,0.0,1.098612,,,,
9,5,1112,1,1203,402789,972,1,18.98,0,0.0,0.693147,1.0,1.0,0.693147,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0C,SAM,SCM
0,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,,,1.098612
1,8,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,,,1.098612
2,8,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,2.0,2.0,1.589027,1.098612
3,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,,,1.098612


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil
0,9,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612
1,9,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612
2,9,1117,1,1232,396762,1250,2,15.28,0,0.0,1.098612
3,9,1130,1,1232,396762,1250,2,15.28,0,0.0,1.098612


Count of missing numbers after SCM in validation part 1 in column lag0tar1V1 is 0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0C,SAM,SCM
0,3,1111,1,4402,50789,32846,2,10.92,0,0.0,1.098612,,,,
1,3,1112,1,2103,1209569,36748,4,46.119999,0,0.0,1.609438,,,,
2,3,1113,1,1603,399077,35147,3,61.5,0,0.0,2.079442,,,,
3,3,1114,1,2124,174266,36748,3,34.59,0,0.0,1.386294,,,,
4,4,1113,1,1061,641223,1129,5,88.0,0,0.0,1.791759,1.0,1.0,2.079442,
5,4,1113,1,1272,1649797,1109,1,15.01,0,0.0,0.693147,1.0,1.0,2.079442,
6,4,1112,1,1272,2270968,41938,1,9.91,0,0.0,0.693147,1.0,1.0,1.609438,
7,4,1115,1,1281,358759,31423,2,21.379999,0,0.0,1.098612,,,,
8,5,1111,1,1457,660918,1250,2,15.28,0,0.0,1.098612,,,,
9,5,1112,1,1203,402789,972,1,18.98,0,0.0,0.693147,1.0,1.0,0.693147,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0C,SAM,SCM
0,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.098612,,,1.098612
1,8,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.098612,,,1.098612
2,8,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,2.0,2.0,1.589027,1.098612
3,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.098612,,,1.098612


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil
0,9,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612
1,9,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612
2,9,1117,1,1232,396762,1250,2,15.28,0,0.0,1.098612
3,9,1130,1,1232,396762,1250,2,15.28,0,0.0,1.098612


## Reset now..Groupby Test Lag1Target1 DeleteColumns true

In [9]:
configLag0Target1DeleteColumnsFalse = ConfigElements(1,[ ("SA0C",["Semana","Agencia_ID"],["count"]),
                                                ("SAM",["Semana","Agencia_ID"],["mean"])], "lag0tar1V1", True)
print  configLag0Target1DeleteColumnsFalse.__dict__

{'nameAndGroups': [('SA0C', ['Semana', 'Agencia_ID'], ['count']), ('SAM', ['Semana', 'Agencia_ID'], ['mean'])], 'lag': 1, 'targetVariable': 'lag0tar1V1', 'deleteColumns': True}


In [10]:
FE.AddConfigurableFeaturesToTrain(configLag0Target1DeleteColumnsFalse)

SA0C is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,SA0C
Semana,Agencia_ID,Unnamed: 2_level_1
4,1111,1
4,1112,1
4,1113,1
4,1114,1
5,1112,1
5,1113,2
5,1115,1
6,1111,1
6,1112,2
6,1113,1


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0C
0,3,1111,1,4402,50789,32846,2,10.92,0,0.0,1.098612,,
1,3,1112,1,2103,1209569,36748,4,46.119999,0,0.0,1.609438,,
2,3,1113,1,1603,399077,35147,3,61.5,0,0.0,2.079442,,
3,3,1114,1,2124,174266,36748,3,34.59,0,0.0,1.386294,,
4,4,1113,1,1061,641223,1129,5,88.0,0,0.0,1.791759,,1.0
5,4,1113,1,1272,1649797,1109,1,15.01,0,0.0,0.693147,,1.0
6,4,1112,1,1272,2270968,41938,1,9.91,0,0.0,0.693147,,1.0
7,4,1115,1,1281,358759,31423,2,21.379999,0,0.0,1.098612,,
8,5,1111,1,1457,660918,1250,2,15.28,0,0.0,1.098612,,
9,5,1112,1,1203,402789,972,1,18.98,0,0.0,0.693147,,1.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0C
0,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,
1,8,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,
2,8,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,2.0
3,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil
0,9,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612
1,9,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612
2,9,1117,1,1232,396762,1250,2,15.28,0,0.0,1.098612
3,9,1130,1,1232,396762,1250,2,15.28,0,0.0,1.098612


Count of missing numbers after SA0C in validation part 1 in column lag0tar1V1 is 3
SAM is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,SAM
Semana,Agencia_ID,Unnamed: 2_level_1
4,1111,1.098612
4,1112,1.609438
4,1113,2.079442
4,1114,1.386294
5,1112,0.693147
5,1113,1.242453
5,1115,1.098612
6,1111,1.098612
6,1112,0.693147
6,1113,2.302585


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SAM
0,3,1111,1,4402,50789,32846,2,10.92,0,0.0,1.098612,,
1,3,1112,1,2103,1209569,36748,4,46.119999,0,0.0,1.609438,,
2,3,1113,1,1603,399077,35147,3,61.5,0,0.0,2.079442,,
3,3,1114,1,2124,174266,36748,3,34.59,0,0.0,1.386294,,
4,4,1113,1,1061,641223,1129,5,88.0,0,0.0,1.791759,1.0,2.079442
5,4,1113,1,1272,1649797,1109,1,15.01,0,0.0,0.693147,1.0,2.079442
6,4,1112,1,1272,2270968,41938,1,9.91,0,0.0,0.693147,1.0,1.609438
7,4,1115,1,1281,358759,31423,2,21.379999,0,0.0,1.098612,,
8,5,1111,1,1457,660918,1250,2,15.28,0,0.0,1.098612,,
9,5,1112,1,1203,402789,972,1,18.98,0,0.0,0.693147,1.0,0.693147


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SAM
0,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,
1,8,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,
2,8,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,2.0,1.589027
3,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil
0,9,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612
1,9,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612
2,9,1117,1,1232,396762,1250,2,15.28,0,0.0,1.098612
3,9,1130,1,1232,396762,1250,2,15.28,0,0.0,1.098612


Count of missing numbers after SAM in validation part 1 in column lag0tar1V1 is 3


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1
0,3,1111,1,4402,50789,32846,2,10.92,0,0.0,1.098612,
1,3,1112,1,2103,1209569,36748,4,46.119999,0,0.0,1.609438,
2,3,1113,1,1603,399077,35147,3,61.5,0,0.0,2.079442,
3,3,1114,1,2124,174266,36748,3,34.59,0,0.0,1.386294,
4,4,1113,1,1061,641223,1129,5,88.0,0,0.0,1.791759,1.0
5,4,1113,1,1272,1649797,1109,1,15.01,0,0.0,0.693147,1.0
6,4,1112,1,1272,2270968,41938,1,9.91,0,0.0,0.693147,1.0
7,4,1115,1,1281,358759,31423,2,21.379999,0,0.0,1.098612,
8,5,1111,1,1457,660918,1250,2,15.28,0,0.0,1.098612,
9,5,1112,1,1203,402789,972,1,18.98,0,0.0,0.693147,1.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1
0,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,
1,8,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,
2,8,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,2.0
3,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil
0,9,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612
1,9,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612
2,9,1117,1,1232,396762,1250,2,15.28,0,0.0,1.098612
3,9,1130,1,1232,396762,1250,2,15.28,0,0.0,1.098612


## Control if target is in column, or new features are in columns..SAMM is new..

In [12]:
configLag0Target1DeleteColumnsFalse = ConfigElements(1,[ ("SA0C",["Semana","Agencia_ID"],["count"]),
                                                        ("SCM",["Semana","Cliente_ID"],["mean"])], "lag0tar1V1", True)
print  configLag0Target1DeleteColumnsFalse.__dict__

{'nameAndGroups': [('SA0C', ['Semana', 'Agencia_ID'], ['count']), ('SCM', ['Semana', 'Cliente_ID'], ['mean'])], 'lag': 1, 'targetVariable': 'lag0tar1V1', 'deleteColumns': True}


In [13]:
FE.AddConfigurableFeaturesToTrain(configLag0Target1DeleteColumnsFalse)

SA0C is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,SA0C
Semana,Agencia_ID,Unnamed: 2_level_1
4,1111,1
4,1112,1
4,1113,1
4,1114,1
5,1112,1
5,1113,2
5,1115,1
6,1111,1
6,1112,2
6,1113,1


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0C
0,3,1111,1,4402,50789,32846,2,10.92,0,0.0,1.098612,,
1,3,1112,1,2103,1209569,36748,4,46.119999,0,0.0,1.609438,,
2,3,1113,1,1603,399077,35147,3,61.5,0,0.0,2.079442,,
3,3,1114,1,2124,174266,36748,3,34.59,0,0.0,1.386294,,
4,4,1113,1,1061,641223,1129,5,88.0,0,0.0,1.791759,1.0,1.0
5,4,1113,1,1272,1649797,1109,1,15.01,0,0.0,0.693147,1.0,1.0
6,4,1112,1,1272,2270968,41938,1,9.91,0,0.0,0.693147,1.0,1.0
7,4,1115,1,1281,358759,31423,2,21.379999,0,0.0,1.098612,,
8,5,1111,1,1457,660918,1250,2,15.28,0,0.0,1.098612,,
9,5,1112,1,1203,402789,972,1,18.98,0,0.0,0.693147,1.0,1.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0C
0,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,
1,8,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,
2,8,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,2.0,2.0
3,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil
0,9,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612
1,9,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612
2,9,1117,1,1232,396762,1250,2,15.28,0,0.0,1.098612
3,9,1130,1,1232,396762,1250,2,15.28,0,0.0,1.098612


Count of missing numbers after SA0C in validation part 1 in column lag0tar1V1 is 3
SCM is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,SCM
Semana,Cliente_ID,Unnamed: 2_level_1
4,50789,1.098612
4,174266,1.386294
4,399077,2.079442
4,1209569,1.609438
5,358759,1.098612
5,641223,1.791759
5,1649797,0.693147
5,2270968,0.693147
6,402789,0.693147
6,660918,1.098612


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SCM
0,3,1111,1,4402,50789,32846,2,10.92,0,0.0,1.098612,,
1,3,1112,1,2103,1209569,36748,4,46.119999,0,0.0,1.609438,,
2,3,1113,1,1603,399077,35147,3,61.5,0,0.0,2.079442,,
3,3,1114,1,2124,174266,36748,3,34.59,0,0.0,1.386294,,
4,4,1113,1,1061,641223,1129,5,88.0,0,0.0,1.791759,1.0,
5,4,1113,1,1272,1649797,1109,1,15.01,0,0.0,0.693147,1.0,
6,4,1112,1,1272,2270968,41938,1,9.91,0,0.0,0.693147,1.0,
7,4,1115,1,1281,358759,31423,2,21.379999,0,0.0,1.098612,,
8,5,1111,1,1457,660918,1250,2,15.28,0,0.0,1.098612,,
9,5,1112,1,1203,402789,972,1,18.98,0,0.0,0.693147,1.0,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SCM
0,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,1.098612
1,8,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,1.098612
2,8,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,2.0,1.098612
3,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,1.098612


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil
0,9,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612
1,9,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612
2,9,1117,1,1232,396762,1250,2,15.28,0,0.0,1.098612
3,9,1130,1,1232,396762,1250,2,15.28,0,0.0,1.098612


Count of missing numbers after SCM in validation part 1 in column lag0tar1V1 is 0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1
0,3,1111,1,4402,50789,32846,2,10.92,0,0.0,1.098612,
1,3,1112,1,2103,1209569,36748,4,46.119999,0,0.0,1.609438,
2,3,1113,1,1603,399077,35147,3,61.5,0,0.0,2.079442,
3,3,1114,1,2124,174266,36748,3,34.59,0,0.0,1.386294,
4,4,1113,1,1061,641223,1129,5,88.0,0,0.0,1.791759,1.0
5,4,1113,1,1272,1649797,1109,1,15.01,0,0.0,0.693147,1.0
6,4,1112,1,1272,2270968,41938,1,9.91,0,0.0,0.693147,1.0
7,4,1115,1,1281,358759,31423,2,21.379999,0,0.0,1.098612,
8,5,1111,1,1457,660918,1250,2,15.28,0,0.0,1.098612,
9,5,1112,1,1203,402789,972,1,18.98,0,0.0,0.693147,1.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1
0,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.098612
1,8,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.098612
2,8,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,2.0
3,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.098612


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil
0,9,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612
1,9,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612
2,9,1117,1,1232,396762,1250,2,15.28,0,0.0,1.098612
3,9,1130,1,1232,396762,1250,2,15.28,0,0.0,1.098612


## ## Add another target..

In [15]:
configLag0Target1DeleteColumnsFalse = ConfigElements(1,[ ("SA0C",["Semana","Agencia_ID"],["count"]),
                                                        ("SPC",["Semana","Producto_ID"],["count"])], "Lag2NewTarget", False)
print  configLag0Target1DeleteColumnsFalse.__dict__

{'nameAndGroups': [('SA0C', ['Semana', 'Agencia_ID'], ['count']), ('SPC', ['Semana', 'Producto_ID'], ['count'])], 'lag': 1, 'targetVariable': 'Lag2NewTarget', 'deleteColumns': False}


In [16]:
FE.AddConfigurableFeaturesToTrain(configLag0Target1DeleteColumnsFalse)

SA0C is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,SA0C
Semana,Agencia_ID,Unnamed: 2_level_1
4,1111,1
4,1112,1
4,1113,1
4,1114,1
5,1112,1
5,1113,2
5,1115,1
6,1111,1
6,1112,2
6,1113,1


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,Lag2NewTarget,SA0C
0,3,1111,1,4402,50789,32846,2,10.92,0,0.0,1.098612,,,
1,3,1112,1,2103,1209569,36748,4,46.119999,0,0.0,1.609438,,,
2,3,1113,1,1603,399077,35147,3,61.5,0,0.0,2.079442,,,
3,3,1114,1,2124,174266,36748,3,34.59,0,0.0,1.386294,,,
4,4,1113,1,1061,641223,1129,5,88.0,0,0.0,1.791759,1.0,,1.0
5,4,1113,1,1272,1649797,1109,1,15.01,0,0.0,0.693147,1.0,,1.0
6,4,1112,1,1272,2270968,41938,1,9.91,0,0.0,0.693147,1.0,,1.0
7,4,1115,1,1281,358759,31423,2,21.379999,0,0.0,1.098612,,,
8,5,1111,1,1457,660918,1250,2,15.28,0,0.0,1.098612,,,
9,5,1112,1,1203,402789,972,1,18.98,0,0.0,0.693147,1.0,,1.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,Lag2NewTarget,SA0C
0,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.098612,,
1,8,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.098612,,
2,8,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,2.0,,2.0
3,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.098612,,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil
0,9,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612
1,9,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612
2,9,1117,1,1232,396762,1250,2,15.28,0,0.0,1.098612
3,9,1130,1,1232,396762,1250,2,15.28,0,0.0,1.098612


Count of missing numbers after SA0C in validation part 1 in column Lag2NewTarget is 3
SPC is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,SPC
Semana,Producto_ID,Unnamed: 2_level_1
4,32846,1
4,35147,1
4,36748,2
5,1109,1
5,1129,1
5,31423,1
5,41938,1
6,972,1
6,1250,1
6,4767,1


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,Lag2NewTarget,SA0C,SPC
0,3,1111,1,4402,50789,32846,2,10.92,0,0.0,1.098612,,,,
1,3,1112,1,2103,1209569,36748,4,46.119999,0,0.0,1.609438,,,,
2,3,1113,1,1603,399077,35147,3,61.5,0,0.0,2.079442,,,,
3,3,1114,1,2124,174266,36748,3,34.59,0,0.0,1.386294,,,,
4,4,1113,1,1061,641223,1129,5,88.0,0,0.0,1.791759,1.0,1.0,1.0,
5,4,1113,1,1272,1649797,1109,1,15.01,0,0.0,0.693147,1.0,1.0,1.0,
6,4,1112,1,1272,2270968,41938,1,9.91,0,0.0,0.693147,1.0,1.0,1.0,
7,4,1115,1,1281,358759,31423,2,21.379999,0,0.0,1.098612,,,,
8,5,1111,1,1457,660918,1250,2,15.28,0,0.0,1.098612,,,,
9,5,1112,1,1203,402789,972,1,18.98,0,0.0,0.693147,1.0,1.0,1.0,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,Lag2NewTarget,SA0C,SPC
0,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.098612,,,3
1,8,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.098612,,,3
2,8,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,2.0,2.0,2.0,3
3,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.098612,,,3


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil
0,9,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612
1,9,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612
2,9,1117,1,1232,396762,1250,2,15.28,0,0.0,1.098612
3,9,1130,1,1232,396762,1250,2,15.28,0,0.0,1.098612


Count of missing numbers after SPC in validation part 1 in column Lag2NewTarget is 0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,Lag2NewTarget,SA0C,SPC
0,3,1111,1,4402,50789,32846,2,10.92,0,0.0,1.098612,,,,
1,3,1112,1,2103,1209569,36748,4,46.119999,0,0.0,1.609438,,,,
2,3,1113,1,1603,399077,35147,3,61.5,0,0.0,2.079442,,,,
3,3,1114,1,2124,174266,36748,3,34.59,0,0.0,1.386294,,,,
4,4,1113,1,1061,641223,1129,5,88.0,0,0.0,1.791759,1.0,1.0,1.0,
5,4,1113,1,1272,1649797,1109,1,15.01,0,0.0,0.693147,1.0,1.0,1.0,
6,4,1112,1,1272,2270968,41938,1,9.91,0,0.0,0.693147,1.0,1.0,1.0,
7,4,1115,1,1281,358759,31423,2,21.379999,0,0.0,1.098612,,,,
8,5,1111,1,1457,660918,1250,2,15.28,0,0.0,1.098612,,,,
9,5,1112,1,1203,402789,972,1,18.98,0,0.0,0.693147,1.0,1.0,1.0,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,Lag2NewTarget,SA0C,SPC
0,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.098612,3.0,,3
1,8,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.098612,3.0,,3
2,8,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,2.0,2.0,2.0,3
3,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.098612,3.0,,3


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil
0,9,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612
1,9,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612
2,9,1117,1,1232,396762,1250,2,15.28,0,0.0,1.098612
3,9,1130,1,1232,396762,1250,2,15.28,0,0.0,1.098612


# Reset!!Lag2 TEST!!!!!!!!!!!!!!
# Reset!!Lag2 TEST!!!!!!!!!!!!!!

## Groupby Test Lag2Target1 DeleteColumns false

In [8]:
configLag0Target1DeleteColumnsFalse = ConfigElements(2,[ ("SA0C",["Semana","Agencia_ID"],["count"]),
                                                        ("SAM",["Semana","Agencia_ID"],["mean"])], "lag0tar1V1", False)
print  configLag0Target1DeleteColumnsFalse.__dict__


{'nameAndGroups': [('SA0C', ['Semana', 'Agencia_ID'], ['count']), ('SAM', ['Semana', 'Agencia_ID'], ['mean'])], 'lag': 2, 'targetVariable': 'lag0tar1V1', 'deleteColumns': False}


In [9]:
FE.AddConfigurableFeaturesToTrain(configLag0Target1DeleteColumnsFalse)

SA0C is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,SA0C
Semana,Agencia_ID,Unnamed: 2_level_1
5,1111,1
5,1112,1
5,1113,1
5,1114,1
6,1112,1
6,1113,2
6,1115,1
7,1111,1
7,1112,2
7,1113,1


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0C
0,3,1111,1,4402,50789,32846,2,10.92,0,0.0,1.098612,,
1,3,1112,1,2103,1209569,36748,4,46.119999,0,0.0,1.609438,,
2,3,1113,1,1603,399077,35147,3,61.5,0,0.0,2.079442,,
3,3,1114,1,2124,174266,36748,3,34.59,0,0.0,1.386294,,
4,4,1113,1,1061,641223,1129,5,88.0,0,0.0,1.791759,,
5,4,1113,1,1272,1649797,1109,1,15.01,0,0.0,0.693147,,
6,4,1112,1,1272,2270968,41938,1,9.91,0,0.0,0.693147,,
7,4,1115,1,1281,358759,31423,2,21.379999,0,0.0,1.098612,,
8,5,1111,1,1457,660918,1250,2,15.28,0,0.0,1.098612,,1.0
9,5,1112,1,1203,402789,972,1,18.98,0,0.0,0.693147,,1.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0C
0,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,1
1,8,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,1
2,8,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,1
3,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,1


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0C
0,9,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,2.0
1,9,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,
2,9,1117,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,
3,9,1130,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,


Count of missing numbers after SA0C in validation part 1 in column lag0tar1V1 is 0
Count of missing numbers after SA0C in validation part 2 in column lag0tar1V1 is 3
SAM is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,SAM
Semana,Agencia_ID,Unnamed: 2_level_1
5,1111,1.098612
5,1112,1.609438
5,1113,2.079442
5,1114,1.386294
6,1112,0.693147
6,1113,1.242453
6,1115,1.098612
7,1111,1.098612
7,1112,0.693147
7,1113,2.302585


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0C,SAM
0,3,1111,1,4402,50789,32846,2,10.92,0,0.0,1.098612,,,
1,3,1112,1,2103,1209569,36748,4,46.119999,0,0.0,1.609438,,,
2,3,1113,1,1603,399077,35147,3,61.5,0,0.0,2.079442,,,
3,3,1114,1,2124,174266,36748,3,34.59,0,0.0,1.386294,,,
4,4,1113,1,1061,641223,1129,5,88.0,0,0.0,1.791759,,,
5,4,1113,1,1272,1649797,1109,1,15.01,0,0.0,0.693147,,,
6,4,1112,1,1272,2270968,41938,1,9.91,0,0.0,0.693147,,,
7,4,1115,1,1281,358759,31423,2,21.379999,0,0.0,1.098612,,,
8,5,1111,1,1457,660918,1250,2,15.28,0,0.0,1.098612,1.0,1.0,1.098612
9,5,1112,1,1203,402789,972,1,18.98,0,0.0,0.693147,1.0,1.0,1.609438


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0C,SAM
0,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1,1,1.609438
1,8,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1,1,1.94591
2,8,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1,1,2.564949
3,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1,1,1.609438


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0C,SAM
0,9,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,2.0,2.0,1.589027
1,9,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,,
2,9,1117,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,,
3,9,1130,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,,


Count of missing numbers after SAM in validation part 1 in column lag0tar1V1 is 0
Count of missing numbers after SAM in validation part 2 in column lag0tar1V1 is 3


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0C,SAM
0,3,1111,1,4402,50789,32846,2,10.92,0,0.0,1.098612,,,
1,3,1112,1,2103,1209569,36748,4,46.119999,0,0.0,1.609438,,,
2,3,1113,1,1603,399077,35147,3,61.5,0,0.0,2.079442,,,
3,3,1114,1,2124,174266,36748,3,34.59,0,0.0,1.386294,,,
4,4,1113,1,1061,641223,1129,5,88.0,0,0.0,1.791759,,,
5,4,1113,1,1272,1649797,1109,1,15.01,0,0.0,0.693147,,,
6,4,1112,1,1272,2270968,41938,1,9.91,0,0.0,0.693147,,,
7,4,1115,1,1281,358759,31423,2,21.379999,0,0.0,1.098612,,,
8,5,1111,1,1457,660918,1250,2,15.28,0,0.0,1.098612,1.0,1.0,1.098612
9,5,1112,1,1203,402789,972,1,18.98,0,0.0,0.693147,1.0,1.0,1.609438


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0C,SAM
0,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.0,1,1.609438
1,8,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.0,1,1.94591
2,8,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.0,1,2.564949
3,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.0,1,1.609438


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0C,SAM
0,9,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,2.0,2.0,1.589027
1,9,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,,
2,9,1117,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,,
3,9,1130,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,,


## Control if target is in column, or new features are in columns..SAMM is new..


In [10]:
configLag0Target1DeleteColumnsFalse = ConfigElements(2,[ ("SA0C",["Semana","Agencia_ID"],["count"]),
                                                    ("SCM",["Semana","Cliente_ID"],["mean"])], "lag0tar1V1", False)
print  configLag0Target1DeleteColumnsFalse.__dict__

{'nameAndGroups': [('SA0C', ['Semana', 'Agencia_ID'], ['count']), ('SCM', ['Semana', 'Cliente_ID'], ['mean'])], 'lag': 2, 'targetVariable': 'lag0tar1V1', 'deleteColumns': False}


In [11]:
FE.AddConfigurableFeaturesToTrain(configLag0Target1DeleteColumnsFalse)

SA0C is in columns..


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0C,SAM
0,3,1111,1,4402,50789,32846,2,10.92,0,0.0,1.098612,,,
1,3,1112,1,2103,1209569,36748,4,46.119999,0,0.0,1.609438,,,
2,3,1113,1,1603,399077,35147,3,61.5,0,0.0,2.079442,,,
3,3,1114,1,2124,174266,36748,3,34.59,0,0.0,1.386294,,,
4,4,1113,1,1061,641223,1129,5,88.0,0,0.0,1.791759,,,
5,4,1113,1,1272,1649797,1109,1,15.01,0,0.0,0.693147,,,
6,4,1112,1,1272,2270968,41938,1,9.91,0,0.0,0.693147,,,
7,4,1115,1,1281,358759,31423,2,21.379999,0,0.0,1.098612,,,
8,5,1111,1,1457,660918,1250,2,15.28,0,0.0,1.098612,1.0,1.0,1.098612
9,5,1112,1,1203,402789,972,1,18.98,0,0.0,0.693147,1.0,1.0,1.609438


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0C,SAM
0,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.0,1,1.609438
1,8,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.0,1,1.94591
2,8,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.0,1,2.564949
3,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.0,1,1.609438


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0C,SAM
0,9,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,2.0,2.0,1.589027
1,9,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,,
2,9,1117,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,,
3,9,1130,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,,


Count of missing numbers after SA0C in validation part 1 in column lag0tar1V1 is 0
Count of missing numbers after SA0C in validation part 2 in column lag0tar1V1 is 3
SCM is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,SCM
Semana,Cliente_ID,Unnamed: 2_level_1
5,50789,1.098612
5,174266,1.386294
5,399077,2.079442
5,1209569,1.609438
6,358759,1.098612
6,641223,1.791759
6,1649797,0.693147
6,2270968,0.693147
7,402789,0.693147
7,660918,1.098612


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0C,SAM,SCM
0,3,1111,1,4402,50789,32846,2,10.92,0,0.0,1.098612,,,,
1,3,1112,1,2103,1209569,36748,4,46.119999,0,0.0,1.609438,,,,
2,3,1113,1,1603,399077,35147,3,61.5,0,0.0,2.079442,,,,
3,3,1114,1,2124,174266,36748,3,34.59,0,0.0,1.386294,,,,
4,4,1113,1,1061,641223,1129,5,88.0,0,0.0,1.791759,,,,
5,4,1113,1,1272,1649797,1109,1,15.01,0,0.0,0.693147,,,,
6,4,1112,1,1272,2270968,41938,1,9.91,0,0.0,0.693147,,,,
7,4,1115,1,1281,358759,31423,2,21.379999,0,0.0,1.098612,,,,
8,5,1111,1,1457,660918,1250,2,15.28,0,0.0,1.098612,1.0,1.0,1.098612,
9,5,1112,1,1203,402789,972,1,18.98,0,0.0,0.693147,1.0,1.0,1.609438,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0C,SAM,SCM
0,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.0,1,1.609438,
1,8,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.0,1,1.94591,
2,8,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.0,1,2.564949,
3,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.0,1,1.609438,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0C,SAM,SCM
0,9,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,2.0,2.0,1.589027,1.098612
1,9,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,,,1.098612
2,9,1117,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,,,1.098612
3,9,1130,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,,,1.098612


Count of missing numbers after SCM in validation part 1 in column lag0tar1V1 is 0
Count of missing numbers after SCM in validation part 2 in column lag0tar1V1 is 0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0C,SAM,SCM
0,3,1111,1,4402,50789,32846,2,10.92,0,0.0,1.098612,,,,
1,3,1112,1,2103,1209569,36748,4,46.119999,0,0.0,1.609438,,,,
2,3,1113,1,1603,399077,35147,3,61.5,0,0.0,2.079442,,,,
3,3,1114,1,2124,174266,36748,3,34.59,0,0.0,1.386294,,,,
4,4,1113,1,1061,641223,1129,5,88.0,0,0.0,1.791759,,,,
5,4,1113,1,1272,1649797,1109,1,15.01,0,0.0,0.693147,,,,
6,4,1112,1,1272,2270968,41938,1,9.91,0,0.0,0.693147,,,,
7,4,1115,1,1281,358759,31423,2,21.379999,0,0.0,1.098612,,,,
8,5,1111,1,1457,660918,1250,2,15.28,0,0.0,1.098612,1.0,1.0,1.098612,
9,5,1112,1,1203,402789,972,1,18.98,0,0.0,0.693147,1.0,1.0,1.609438,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0C,SAM,SCM
0,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.0,1,1.609438,
1,8,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.0,1,1.94591,
2,8,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.0,1,2.564949,
3,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.0,1,1.609438,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0C,SAM,SCM
0,9,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,2.0,2.0,1.589027,1.098612
1,9,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.098612,,,1.098612
2,9,1117,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.098612,,,1.098612
3,9,1130,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.098612,,,1.098612


## Reset now..Groupby Test Lag1Target1 DeleteColumns true

In [7]:
configLag0Target1DeleteColumnsFalse = ConfigElements(2,[ ("SA0C",["Semana","Agencia_ID"],["count"]),
                                                        ("SAM",["Semana","Agencia_ID"],["mean"])], "lag0tar1V1", True)
print  configLag0Target1DeleteColumnsFalse.__dict__

{'nameAndGroups': [('SA0C', ['Semana', 'Agencia_ID'], ['count']), ('SAM', ['Semana', 'Agencia_ID'], ['mean'])], 'lag': 2, 'targetVariable': 'lag0tar1V1', 'deleteColumns': True}


In [8]:
FE.AddConfigurableFeaturesToTrain(configLag0Target1DeleteColumnsFalse)

SA0C is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,SA0C
Semana,Agencia_ID,Unnamed: 2_level_1
5,1111,1
5,1112,1
5,1113,1
5,1114,1
6,1112,1
6,1113,2
6,1115,1
7,1111,1
7,1112,2
7,1113,1


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0C
0,3,1111,1,4402,50789,32846,2,10.92,0,0.0,1.098612,,
1,3,1112,1,2103,1209569,36748,4,46.119999,0,0.0,1.609438,,
2,3,1113,1,1603,399077,35147,3,61.5,0,0.0,2.079442,,
3,3,1114,1,2124,174266,36748,3,34.59,0,0.0,1.386294,,
4,4,1113,1,1061,641223,1129,5,88.0,0,0.0,1.791759,,
5,4,1113,1,1272,1649797,1109,1,15.01,0,0.0,0.693147,,
6,4,1112,1,1272,2270968,41938,1,9.91,0,0.0,0.693147,,
7,4,1115,1,1281,358759,31423,2,21.379999,0,0.0,1.098612,,
8,5,1111,1,1457,660918,1250,2,15.28,0,0.0,1.098612,,1.0
9,5,1112,1,1203,402789,972,1,18.98,0,0.0,0.693147,,1.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0C
0,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,1
1,8,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,1
2,8,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,1
3,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,1


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0C
0,9,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,2.0
1,9,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,
2,9,1117,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,
3,9,1130,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,


Count of missing numbers after SA0C in validation part 1 in column lag0tar1V1 is 0
Count of missing numbers after SA0C in validation part 2 in column lag0tar1V1 is 3
SAM is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,SAM
Semana,Agencia_ID,Unnamed: 2_level_1
5,1111,1.098612
5,1112,1.609438
5,1113,2.079442
5,1114,1.386294
6,1112,0.693147
6,1113,1.242453
6,1115,1.098612
7,1111,1.098612
7,1112,0.693147
7,1113,2.302585


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SAM
0,3,1111,1,4402,50789,32846,2,10.92,0,0.0,1.098612,,
1,3,1112,1,2103,1209569,36748,4,46.119999,0,0.0,1.609438,,
2,3,1113,1,1603,399077,35147,3,61.5,0,0.0,2.079442,,
3,3,1114,1,2124,174266,36748,3,34.59,0,0.0,1.386294,,
4,4,1113,1,1061,641223,1129,5,88.0,0,0.0,1.791759,,
5,4,1113,1,1272,1649797,1109,1,15.01,0,0.0,0.693147,,
6,4,1112,1,1272,2270968,41938,1,9.91,0,0.0,0.693147,,
7,4,1115,1,1281,358759,31423,2,21.379999,0,0.0,1.098612,,
8,5,1111,1,1457,660918,1250,2,15.28,0,0.0,1.098612,1.0,1.098612
9,5,1112,1,1203,402789,972,1,18.98,0,0.0,0.693147,1.0,1.609438


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SAM
0,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1,1.609438
1,8,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1,1.94591
2,8,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1,2.564949
3,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1,1.609438


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SAM
0,9,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,2.0,1.589027
1,9,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,
2,9,1117,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,
3,9,1130,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,


Count of missing numbers after SAM in validation part 1 in column lag0tar1V1 is 0
Count of missing numbers after SAM in validation part 2 in column lag0tar1V1 is 3


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1
0,3,1111,1,4402,50789,32846,2,10.92,0,0.0,1.098612,
1,3,1112,1,2103,1209569,36748,4,46.119999,0,0.0,1.609438,
2,3,1113,1,1603,399077,35147,3,61.5,0,0.0,2.079442,
3,3,1114,1,2124,174266,36748,3,34.59,0,0.0,1.386294,
4,4,1113,1,1061,641223,1129,5,88.0,0,0.0,1.791759,
5,4,1113,1,1272,1649797,1109,1,15.01,0,0.0,0.693147,
6,4,1112,1,1272,2270968,41938,1,9.91,0,0.0,0.693147,
7,4,1115,1,1281,358759,31423,2,21.379999,0,0.0,1.098612,
8,5,1111,1,1457,660918,1250,2,15.28,0,0.0,1.098612,1.0
9,5,1112,1,1203,402789,972,1,18.98,0,0.0,0.693147,1.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1
0,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.0
1,8,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.0
2,8,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.0
3,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1
0,9,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,2.0
1,9,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,
2,9,1117,1,1232,396762,1250,2,15.28,0,0.0,1.098612,
3,9,1130,1,1232,396762,1250,2,15.28,0,0.0,1.098612,


## Control if target is in column, or new features are in columns..SAMM is new..

In [9]:
configLag0Target1DeleteColumnsFalse = ConfigElements(2,[ ("SA0C",["Semana","Agencia_ID"],["count"]),
                                                        ("SCM",["Semana","Cliente_ID"],["mean"])], "lag0tar1V1", True)
print  configLag0Target1DeleteColumnsFalse.__dict__

{'nameAndGroups': [('SA0C', ['Semana', 'Agencia_ID'], ['count']), ('SCM', ['Semana', 'Cliente_ID'], ['mean'])], 'lag': 2, 'targetVariable': 'lag0tar1V1', 'deleteColumns': True}


In [10]:
FE.AddConfigurableFeaturesToTrain(configLag0Target1DeleteColumnsFalse)

SA0C is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,SA0C
Semana,Agencia_ID,Unnamed: 2_level_1
5,1111,1
5,1112,1
5,1113,1
5,1114,1
6,1112,1
6,1113,2
6,1115,1
7,1111,1
7,1112,2
7,1113,1


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0C
0,3,1111,1,4402,50789,32846,2,10.92,0,0.0,1.098612,,
1,3,1112,1,2103,1209569,36748,4,46.119999,0,0.0,1.609438,,
2,3,1113,1,1603,399077,35147,3,61.5,0,0.0,2.079442,,
3,3,1114,1,2124,174266,36748,3,34.59,0,0.0,1.386294,,
4,4,1113,1,1061,641223,1129,5,88.0,0,0.0,1.791759,,
5,4,1113,1,1272,1649797,1109,1,15.01,0,0.0,0.693147,,
6,4,1112,1,1272,2270968,41938,1,9.91,0,0.0,0.693147,,
7,4,1115,1,1281,358759,31423,2,21.379999,0,0.0,1.098612,,
8,5,1111,1,1457,660918,1250,2,15.28,0,0.0,1.098612,1.0,1.0
9,5,1112,1,1203,402789,972,1,18.98,0,0.0,0.693147,1.0,1.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0C
0,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.0,1
1,8,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.0,1
2,8,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.0,1
3,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.0,1


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SA0C
0,9,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,2.0,2.0
1,9,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,
2,9,1117,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,
3,9,1130,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,


Count of missing numbers after SA0C in validation part 1 in column lag0tar1V1 is 0
Count of missing numbers after SA0C in validation part 2 in column lag0tar1V1 is 3
SCM is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,SCM
Semana,Cliente_ID,Unnamed: 2_level_1
5,50789,1.098612
5,174266,1.386294
5,399077,2.079442
5,1209569,1.609438
6,358759,1.098612
6,641223,1.791759
6,1649797,0.693147
6,2270968,0.693147
7,402789,0.693147
7,660918,1.098612


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SCM
0,3,1111,1,4402,50789,32846,2,10.92,0,0.0,1.098612,,
1,3,1112,1,2103,1209569,36748,4,46.119999,0,0.0,1.609438,,
2,3,1113,1,1603,399077,35147,3,61.5,0,0.0,2.079442,,
3,3,1114,1,2124,174266,36748,3,34.59,0,0.0,1.386294,,
4,4,1113,1,1061,641223,1129,5,88.0,0,0.0,1.791759,,
5,4,1113,1,1272,1649797,1109,1,15.01,0,0.0,0.693147,,
6,4,1112,1,1272,2270968,41938,1,9.91,0,0.0,0.693147,,
7,4,1115,1,1281,358759,31423,2,21.379999,0,0.0,1.098612,,
8,5,1111,1,1457,660918,1250,2,15.28,0,0.0,1.098612,1.0,
9,5,1112,1,1203,402789,972,1,18.98,0,0.0,0.693147,1.0,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SCM
0,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.0,
1,8,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.0,
2,8,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.0,
3,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.0,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,SCM
0,9,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,2.0,1.098612
1,9,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,1.098612
2,9,1117,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,1.098612
3,9,1130,1,1232,396762,1250,2,15.28,0,0.0,1.098612,,1.098612


Count of missing numbers after SCM in validation part 1 in column lag0tar1V1 is 0
Count of missing numbers after SCM in validation part 2 in column lag0tar1V1 is 0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1
0,3,1111,1,4402,50789,32846,2,10.92,0,0.0,1.098612,
1,3,1112,1,2103,1209569,36748,4,46.119999,0,0.0,1.609438,
2,3,1113,1,1603,399077,35147,3,61.5,0,0.0,2.079442,
3,3,1114,1,2124,174266,36748,3,34.59,0,0.0,1.386294,
4,4,1113,1,1061,641223,1129,5,88.0,0,0.0,1.791759,
5,4,1113,1,1272,1649797,1109,1,15.01,0,0.0,0.693147,
6,4,1112,1,1272,2270968,41938,1,9.91,0,0.0,0.693147,
7,4,1115,1,1281,358759,31423,2,21.379999,0,0.0,1.098612,
8,5,1111,1,1457,660918,1250,2,15.28,0,0.0,1.098612,1.0
9,5,1112,1,1203,402789,972,1,18.98,0,0.0,0.693147,1.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1
0,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.0
1,8,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.0
2,8,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.0
3,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1
0,9,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,2.0
1,9,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.098612
2,9,1117,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.098612
3,9,1130,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.098612


## Add another target..

In [11]:
configLag0Target1DeleteColumnsFalse = ConfigElements(2,[ ("SA0C",["Semana","Agencia_ID"],["count"]),
                                                        ("SPC",["Semana","Producto_ID"],["count"])], "Lag2NewTarget", False)
print  configLag0Target1DeleteColumnsFalse.__dict__

{'nameAndGroups': [('SA0C', ['Semana', 'Agencia_ID'], ['count']), ('SPC', ['Semana', 'Producto_ID'], ['count'])], 'lag': 2, 'targetVariable': 'Lag2NewTarget', 'deleteColumns': False}


In [12]:
FE.AddConfigurableFeaturesToTrain(configLag0Target1DeleteColumnsFalse)

SA0C is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,SA0C
Semana,Agencia_ID,Unnamed: 2_level_1
5,1111,1
5,1112,1
5,1113,1
5,1114,1
6,1112,1
6,1113,2
6,1115,1
7,1111,1
7,1112,2
7,1113,1


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,Lag2NewTarget,SA0C
0,3,1111,1,4402,50789,32846,2,10.92,0,0.0,1.098612,,,
1,3,1112,1,2103,1209569,36748,4,46.119999,0,0.0,1.609438,,,
2,3,1113,1,1603,399077,35147,3,61.5,0,0.0,2.079442,,,
3,3,1114,1,2124,174266,36748,3,34.59,0,0.0,1.386294,,,
4,4,1113,1,1061,641223,1129,5,88.0,0,0.0,1.791759,,,
5,4,1113,1,1272,1649797,1109,1,15.01,0,0.0,0.693147,,,
6,4,1112,1,1272,2270968,41938,1,9.91,0,0.0,0.693147,,,
7,4,1115,1,1281,358759,31423,2,21.379999,0,0.0,1.098612,,,
8,5,1111,1,1457,660918,1250,2,15.28,0,0.0,1.098612,1.0,,1.0
9,5,1112,1,1203,402789,972,1,18.98,0,0.0,0.693147,1.0,,1.0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,Lag2NewTarget,SA0C
0,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.0,,1
1,8,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.0,,1
2,8,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.0,,1
3,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.0,,1


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,Lag2NewTarget,SA0C
0,9,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,2.0,,2.0
1,9,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.098612,,
2,9,1117,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.098612,,
3,9,1130,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.098612,,


Count of missing numbers after SA0C in validation part 1 in column Lag2NewTarget is 0
Count of missing numbers after SA0C in validation part 2 in column Lag2NewTarget is 3
SPC is not in columns..


Unnamed: 0_level_0,Unnamed: 1_level_0,SPC
Semana,Producto_ID,Unnamed: 2_level_1
5,32846,1
5,35147,1
5,36748,2
6,1109,1
6,1129,1
6,31423,1
6,41938,1
7,972,1
7,1250,1
7,4767,1


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,Lag2NewTarget,SA0C,SPC
0,3,1111,1,4402,50789,32846,2,10.92,0,0.0,1.098612,,,,
1,3,1112,1,2103,1209569,36748,4,46.119999,0,0.0,1.609438,,,,
2,3,1113,1,1603,399077,35147,3,61.5,0,0.0,2.079442,,,,
3,3,1114,1,2124,174266,36748,3,34.59,0,0.0,1.386294,,,,
4,4,1113,1,1061,641223,1129,5,88.0,0,0.0,1.791759,,,,
5,4,1113,1,1272,1649797,1109,1,15.01,0,0.0,0.693147,,,,
6,4,1112,1,1272,2270968,41938,1,9.91,0,0.0,0.693147,,,,
7,4,1115,1,1281,358759,31423,2,21.379999,0,0.0,1.098612,,,,
8,5,1111,1,1457,660918,1250,2,15.28,0,0.0,1.098612,1.0,1.0,1.0,
9,5,1112,1,1203,402789,972,1,18.98,0,0.0,0.693147,1.0,1.0,1.0,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,Lag2NewTarget,SA0C,SPC
0,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.0,1,1,
1,8,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.0,1,1,
2,8,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.0,1,1,
3,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.0,1,1,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,Lag2NewTarget,SA0C,SPC
0,9,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,2.0,2.0,2.0,3
1,9,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.098612,,,3
2,9,1117,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.098612,,,3
3,9,1130,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.098612,,,3


Count of missing numbers after SPC in validation part 1 in column Lag2NewTarget is 0
Count of missing numbers after SPC in validation part 2 in column Lag2NewTarget is 0


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,Lag2NewTarget,SA0C,SPC
0,3,1111,1,4402,50789,32846,2,10.92,0,0.0,1.098612,,,,
1,3,1112,1,2103,1209569,36748,4,46.119999,0,0.0,1.609438,,,,
2,3,1113,1,1603,399077,35147,3,61.5,0,0.0,2.079442,,,,
3,3,1114,1,2124,174266,36748,3,34.59,0,0.0,1.386294,,,,
4,4,1113,1,1061,641223,1129,5,88.0,0,0.0,1.791759,,,,
5,4,1113,1,1272,1649797,1109,1,15.01,0,0.0,0.693147,,,,
6,4,1112,1,1272,2270968,41938,1,9.91,0,0.0,0.693147,,,,
7,4,1115,1,1281,358759,31423,2,21.379999,0,0.0,1.098612,,,,
8,5,1111,1,1457,660918,1250,2,15.28,0,0.0,1.098612,1.0,1.0,1.0,
9,5,1112,1,1203,402789,972,1,18.98,0,0.0,0.693147,1.0,1.0,1.0,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,Lag2NewTarget,SA0C,SPC
0,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.0,1.0,1,
1,8,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.0,1.0,1,
2,8,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.0,1.0,1,
3,8,1113,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.0,1.0,1,


Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,lag0tar1V1,Lag2NewTarget,SA0C,SPC
0,9,1112,1,1232,396762,1250,2,15.28,0,0.0,1.098612,2.0,2.0,2.0,3
1,9,1111,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.098612,3.0,,3
2,9,1117,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.098612,3.0,,3
3,9,1130,1,1232,396762,1250,2,15.28,0,0.0,1.098612,1.098612,3.0,,3
