In [1]:
# Reload libraries
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
from sklearn.pipeline import Pipeline

In [4]:
from feature_engineering.time_features import TimeFeatures
from feature_engineering.price_features import PriceFeatures

In [5]:
data_raw = pd.read_excel("HackathonData.xlsx")

In [6]:
data_raw.columns = data_raw.columns.str.lower()
data = data_raw.copy()
data.head()

Unnamed: 0,encoded_sku_id,sales_date,subclass_name,class_name,ml_name,category_name,retail_price,promo_price,competitor_price,inventory,daily_units
0,1,2017-09-28,MISC CAMERA ACCES,DIGITAL CAMERA ACCY,ML - DI Accessories,VP - Digital Imaging,89.99,?,?,Fully-Stocked,1
1,2,2017-11-24,CAMERA BATTERIES,DIGITAL CAMERA ACCY,ML - DI Accessories,VP - Digital Imaging,39.99,?,?,Fully-Stocked,18
2,3,2019-11-28,SO PARTS,MOBILE ACCESSORIES,ML - Connected Car,VP - Electrify & Car,49.99,?,46.98,Constrained,7
3,4,2021-04-21,SO RANGEHOODS REG,KITCHEN VENTILATION,ML - Kitchen,VP - Major Appliances,549.99,?,498,Fully-Stocked,6
4,1,2018-10-27,MISC CAMERA ACCES,DIGITAL CAMERA ACCY,ML - DI Accessories,VP - Digital Imaging,89.99,?,?,Fully-Stocked,7


In [10]:
time_transformer = TimeFeatures(time_column="sales_date")
price_transformer = PriceFeatures()


# Individually

In [33]:
data = time_transformer.fit_transform(data)
data.head()

Unnamed: 0,encoded_sku_id,sales_date,subclass_name,class_name,ml_name,category_name,retail_price,promo_price,competitor_price,inventory,...,comp_cleaned,has_comp,Holidays,year,month,day,dayofweek,weekofyear,quarter,is_weekend
0,1,2017-09-28,MISC CAMERA ACCES,DIGITAL CAMERA ACCY,ML - DI Accessories,VP - Digital Imaging,89.99,?,,Fully-Stocked,...,,0,False,2017,9,28,3,39,3,0
1,2,2017-11-24,CAMERA BATTERIES,DIGITAL CAMERA ACCY,ML - DI Accessories,VP - Digital Imaging,39.99,?,,Fully-Stocked,...,,0,False,2017,11,24,4,47,4,0
2,3,2019-11-28,SO PARTS,MOBILE ACCESSORIES,ML - Connected Car,VP - Electrify & Car,49.99,?,46.98,Constrained,...,46.98,1,True,2019,11,28,3,48,4,0
3,4,2021-04-21,SO RANGEHOODS REG,KITCHEN VENTILATION,ML - Kitchen,VP - Major Appliances,549.99,?,498.0,Fully-Stocked,...,498.0,1,False,2021,4,21,2,16,2,0
4,1,2018-10-27,MISC CAMERA ACCES,DIGITAL CAMERA ACCY,ML - DI Accessories,VP - Digital Imaging,89.99,?,,Fully-Stocked,...,,0,False,2018,10,27,5,43,4,1


In [25]:
data = price_transformer.fit_transform(data)
data.head(100)

Unnamed: 0,encoded_sku_id,sales_date,subclass_name,class_name,ml_name,category_name,retail_price,promo_price,competitor_price,inventory,daily_units,real_price,price_com_diff,price_com_diff_pct,has_promo,promo_cleaned,comp_cleaned,has_comp
0,1,2017-09-28,MISC CAMERA ACCES,DIGITAL CAMERA ACCY,ML - DI Accessories,VP - Digital Imaging,89.99,?,,Fully-Stocked,1,89.99,,,0,,,0
1,2,2017-11-24,CAMERA BATTERIES,DIGITAL CAMERA ACCY,ML - DI Accessories,VP - Digital Imaging,39.99,?,,Fully-Stocked,18,39.99,,,0,,,0
2,3,2019-11-28,SO PARTS,MOBILE ACCESSORIES,ML - Connected Car,VP - Electrify & Car,49.99,?,46.98,Constrained,7,49.99,3.01,0.064070,0,,46.98,1
3,4,2021-04-21,SO RANGEHOODS REG,KITCHEN VENTILATION,ML - Kitchen,VP - Major Appliances,549.99,?,498.00,Fully-Stocked,6,549.99,51.99,0.104398,0,,498.00,1
4,1,2018-10-27,MISC CAMERA ACCES,DIGITAL CAMERA ACCY,ML - DI Accessories,VP - Digital Imaging,89.99,?,,Fully-Stocked,7,89.99,,,0,,,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,91,2021-06-01,MISC CAMERA ACCES,DIGITAL CAMERA ACCY,ML - DI Accessories,VP - Digital Imaging,44.99,?,,Fully-Stocked,3,44.99,,,0,,,0
96,92,2018-07-30,SO CAR ACCESSORIES,MOBILE ACCESSORIES,ML - Connected Car,VP - Electrify & Car,16.99,?,13.17,Fully-Stocked,10,16.99,3.82,0.290053,0,,13.17,1
97,93,2019-05-20,SO EL SING OVEN REG,BUILT IN COOKING,ML - Kitchen,VP - Major Appliances,1529.99,1439.99,,Fully-Stocked,1,1439.99,,,1,1439.99,,0
98,94,2017-09-25,MISC CAMERA ACCES,DIGITAL CAMERA ACCY,ML - DI Accessories,VP - Digital Imaging,39.99,?,,Out-of-Stock,11,39.99,,,0,,,0


# Put into a pipeline

In [34]:
pipe = Pipeline(steps=[
    ("time_transformer", TimeFeatures(time_column="sales_date")),
    ("price_transformer", PriceFeatures())
], verbose=True)
data = pipe.fit_transform(data_raw)
data.head()

[Pipeline] .. (step 1 of 2) Processing time_transformer, total=   2.6s
[Pipeline] . (step 2 of 2) Processing price_transformer, total=   1.0s


Unnamed: 0,encoded_sku_id,sales_date,subclass_name,class_name,ml_name,category_name,retail_price,promo_price,competitor_price,inventory,...,weekofyear,quarter,is_weekend,real_price,price_com_diff,price_com_diff_pct,promo_cleaned,comp_cleaned,has_comp,has_promo
0,1,2017-09-28,MISC CAMERA ACCES,DIGITAL CAMERA ACCY,ML - DI Accessories,VP - Digital Imaging,89.99,?,,Fully-Stocked,...,39,3,0,89.99,,,,,0,0
1,2,2017-11-24,CAMERA BATTERIES,DIGITAL CAMERA ACCY,ML - DI Accessories,VP - Digital Imaging,39.99,?,,Fully-Stocked,...,47,4,0,39.99,,,,,0,0
2,3,2019-11-28,SO PARTS,MOBILE ACCESSORIES,ML - Connected Car,VP - Electrify & Car,49.99,?,46.98,Constrained,...,48,4,0,49.99,3.01,0.06407,,46.98,1,0
3,4,2021-04-21,SO RANGEHOODS REG,KITCHEN VENTILATION,ML - Kitchen,VP - Major Appliances,549.99,?,498.0,Fully-Stocked,...,16,2,0,549.99,51.99,0.104398,,498.0,1,0
4,1,2018-10-27,MISC CAMERA ACCES,DIGITAL CAMERA ACCY,ML - DI Accessories,VP - Digital Imaging,89.99,?,,Fully-Stocked,...,43,4,1,89.99,,,,,0,0
