In [1]:
# import packages
import pandas as pd
# import seaborn as sb
import matplotlib.pyplot as plt
import numpy as np
import datetime
import re
from sklearn.externals import joblib 
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA
from scipy.interpolate import interpn
from collections import defaultdict
from sklearn.model_selection import GridSearchCV

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)



In [2]:
# suppress warnings
import warnings
warnings.filterwarnings('ignore')

In [3]:
# load data
items = pd.read_csv('./data/ex_items_all.csv', sep = '|', parse_dates=['dateFirstSell'])
items.head()

Unnamed: 0,itemID,brand,manufacturer,customerRating,category1,category2,category3,recommendedRetailPrice,minSalesPrice,maxSalesPrice,meanSalesPrice,minSalesPriceCounts,maxSalesPriceCounts,unitsSold,revenue,relRevenue,dateFirstSell,dateLastSell,soldDaily,soldWeekly,soldMonthly,recentlySold,soldWeeklyContinuous
0,1,0,1,4.38,1,1,1,8.84,3.11,3.43,3.111661,690.0,3.0,693.0,2156.19,2.418927e-05,2018-01-23,2018-06-26,0.206452,False,1.0,4.0,0.73913
1,2,0,2,3.0,1,2,2,16.92,9.15,9.15,9.15,5.0,5.0,5.0,45.75,5.132475e-07,2018-02-24,2018-05-29,0.042105,False,0.5,32.0,0.266667
2,3,0,3,5.0,1,3,3,15.89,9.89,14.04,12.733253,91.0,240.0,331.0,4269.59,4.78985e-05,2018-01-10,2018-06-29,0.087719,False,0.833333,1.0,0.4
3,4,0,2,4.44,1,2,2,40.17,13.01,14.1,13.798895,53.0,145.0,198.0,2734.03,3.067178e-05,2018-01-18,2018-06-28,0.067901,False,0.833333,2.0,0.416667
4,5,0,2,2.33,1,1,1,17.04,7.48,7.84,7.735556,52.0,128.0,180.0,1392.48,1.562157e-05,2018-01-06,2018-06-23,0.08284,False,0.833333,7.0,0.4


In [4]:
items_simple = items[['itemID', 'customerRating', 'manufacturer', 'brand', 'category1', 'category2', 'category3','recommendedRetailPrice', 'dateFirstSell', 'soldWeekly', 'recentlySold']]

In [5]:
items_simple.category1 = pd.Categorical(items_simple.category1)
items_simple['category1'] = items_simple.category1.cat.codes
items_simple.category2 = pd.Categorical(items_simple.category2)
items_simple['category2'] = items_simple.category2.cat.codes
items_simple.category3 = pd.Categorical(items_simple.category3)
items_simple['category3'] = items_simple.category3.cat.codes
items_simple.head()

Unnamed: 0,itemID,customerRating,manufacturer,brand,category1,category2,category3,recommendedRetailPrice,dateFirstSell,soldWeekly,recentlySold
0,1,4.38,1,0,0,0,0,8.84,2018-01-23,False,4.0
1,2,3.0,2,0,0,1,1,16.92,2018-02-24,False,32.0
2,3,5.0,3,0,0,2,2,15.89,2018-01-10,False,1.0
3,4,4.44,2,0,0,1,1,40.17,2018-01-18,False,2.0
4,5,2.33,2,0,0,0,0,17.04,2018-01-06,False,7.0


In [6]:
# load data
df_train = pd.read_csv('data/orders.csv', sep='|', parse_dates=['time'])
df_train['date'] = [d.date() for d in df_train['time']]
df_items = pd.read_csv('data/items.csv', sep='|')

In [7]:
infos = pd.read_csv('data/infos_promotions_all.csv', sep='|', parse_dates=['predicted_promotion'])
infos['predicted_promotion'] = [d.date() for d in infos['predicted_promotion']]

In [8]:
infos['is_promotion'] = 1

In [9]:
infos[infos['itemID'] == 5010]

Unnamed: 0,index,itemID,simulationPrice,promotion,predicted_promotion,is_promotion
5803,5009,5010,5.95,2018-07-10,2018-04-30,1
5804,5009,5010,5.95,2018-07-10,2018-05-23,1
5805,5009,5010,5.95,2018-07-10,2018-06-10,1
5806,5009,5010,5.95,2018-07-10,2018-06-13,1
5807,5009,5010,5.95,2018-07-10,2018-06-21,1


In [10]:
test = items_simple[items_simple['recentlySold'] < 30][['itemID']]

In [14]:
test['date'] = datetime.date(2018,7,1)

In [15]:
test['order'] = 1
test = pd.merge(left=test, right=infos[['simulationPrice', 'itemID']], on='itemID').rename(columns={'simulationPrice': 'salesPrice'})
test['transactID'] = 1
test['time'] = 1
test = test[df_train.columns]

In [16]:
test.columns

Index(['time', 'transactID', 'itemID', 'order', 'salesPrice', 'date'], dtype='object')

In [17]:
train = df_train
# test = df_test
all_data = train.append(test)

In [18]:
# aggregate sales per day
aggregated = all_data.groupby(['itemID','date']).sum()["order"].to_frame().reset_index().rename(columns={'order':'count'})
aggregated_sp = all_data.groupby(['itemID','date']).mean()["salesPrice"].to_frame().reset_index().rename(columns={'salesPrice':'avg_salesPrice'})
# aggregated_promotion = all_data.merge(infos, on='itemID').groupby(['itemID','date']).count()["predicted_promotion"].to_frame().reset_index()



In [19]:
aggregated = pd.merge(aggregated,aggregated_sp,how='left')
aggregated = pd.merge(aggregated, infos['itemID predicted_promotion'.split()], left_on=['itemID', 'date'], right_on=['itemID', 'predicted_promotion'], how='left')
aggregated['predicted_promotion'] = aggregated['predicted_promotion'].apply(lambda x: isinstance(x, datetime.date))

In [20]:
aggregated.head()

Unnamed: 0,itemID,date,count,avg_salesPrice,predicted_promotion
0,1,2018-01-23,1,3.11,False
1,1,2018-01-25,1,3.11,False
2,1,2018-01-29,307,3.11,True
3,1,2018-01-30,3,3.11,False
4,1,2018-01-31,1,3.11,False


In [21]:
# (currently unused)
def add_datepart(df, fldname, drop=True):
    """Add additional date information to dataframe"""
    fld = df[fldname]
    if not np.issubdtype(fld.dtype, np.datetime64):
        df[fldname] = fld = pd.to_datetime(fld, 
                                     infer_datetime_format=True)
    targ_pre = re.sub('[Dd]ate$', '', fldname)
    for n in ('Year', 'Month', 'Week', 'Day', 'Dayofweek', 
            'Dayofyear', 'Is_month_end', 'Is_month_start', 
            'Is_quarter_end', 'Is_quarter_start', 'Is_year_end', 
            'Is_year_start'):
        df[targ_pre+n] = getattr(fld.dt,n.lower())
        
    df[targ_pre+'Elapsed'] = fld.astype(np.int64) // 10**9 
    if drop: df.drop(fldname, axis=1, inplace=True)

In [22]:
import numpy as np
import re
# add additional date information
add_datepart(aggregated, 'date', drop = False)
add_datepart(aggregated, 'date', drop = False)

In [23]:
aggregated["WeekGroup"] = aggregated.apply(lambda row: (row["Dayofyear"] + 1) // 14 , axis=1)

In [24]:
aggregated = aggregated[aggregated["WeekGroup"] > 0]

In [25]:
aggregated.head()

Unnamed: 0,itemID,date,count,avg_salesPrice,predicted_promotion,Year,Month,Week,Day,Dayofweek,Dayofyear,Is_month_end,Is_month_start,Is_quarter_end,Is_quarter_start,Is_year_end,Is_year_start,Elapsed,WeekGroup
0,1,2018-01-23,1,3.11,False,2018,1,4,23,1,23,False,False,False,False,False,False,1516665600,1
1,1,2018-01-25,1,3.11,False,2018,1,4,25,3,25,False,False,False,False,False,False,1516838400,1
2,1,2018-01-29,307,3.11,True,2018,1,5,29,0,29,False,False,False,False,False,False,1517184000,2
3,1,2018-01-30,3,3.11,False,2018,1,5,30,1,30,False,False,False,False,False,False,1517270400,2
4,1,2018-01-31,1,3.11,False,2018,1,5,31,2,31,True,False,False,False,False,False,1517356800,2


In [27]:
from itertools import product
# Create "grid" with columns
index_cols = ['itemID', 'WeekGroup']

grid = []
for block_num in aggregated['WeekGroup'].unique():
    cur_items = aggregated.loc[aggregated['WeekGroup'] == block_num, 'itemID'].unique()
    grid.append(np.array(list(product(*[cur_items, [block_num]])),dtype='int32'))

grid = pd.DataFrame(np.vstack(grid), columns = index_cols,dtype=np.int32)
grid.head()

Unnamed: 0,itemID,WeekGroup
0,1,1
1,3,1
2,4,1
3,5,1
4,8,1


In [28]:
train_m = aggregated.groupby(['WeekGroup','itemID']).agg({'count': 'sum','avg_salesPrice': np.mean, 'predicted_promotion': 'sum'}).reset_index()

# Merging sales numbers with the grid dataframe
train_m = pd.merge(grid,train_m,on=['WeekGroup','itemID'],how='left').fillna(0)

In [29]:
train_m[train_m['itemID'] == 1]

Unnamed: 0,itemID,WeekGroup,count,avg_salesPrice,predicted_promotion
0,1,1,2,3.11,0.0
1664,1,2,313,3.11,1.0
3278,1,3,35,3.11,0.0
5156,1,4,3,3.11,0.0
7532,1,5,1,3.11,0.0
10446,1,6,1,3.11,0.0
13614,1,7,2,3.11,0.0
16947,1,8,299,3.11,1.0
20689,1,9,3,3.11,0.0
24597,1,10,31,3.11,0.0


In [30]:
### add 0 sales for missing dates per itemID
for prod in train_m.itemID.unique():
    s = train_m.loc[train_m['itemID'] == prod][["WeekGroup","count","avg_salesPrice", "predicted_promotion"]]
    s = s.set_index("WeekGroup")
    idx = range(s.index.min(), s.index.max()+1)
    s = s.reindex(idx)
#     s = s.reindex(idx, fill_value=0)
    s['weekGroup'] = s.index
    if "df_week" not in globals():
        s["itemID"] = prod
        df_week = s
    else:
        s["itemID"] = prod
        df_week = pd.concat([df_week,s], ignore_index=True)
df_week.head(3)

Unnamed: 0,count,avg_salesPrice,predicted_promotion,weekGroup,itemID
0,2.0,3.11,0.0,1,1
1,313.0,3.11,1.0,2,1
2,35.0,3.11,0.0,3,1


In [31]:
df_week.reset_index(inplace=True)
df_week[df_week['itemID']==5]

Unnamed: 0,index,count,avg_salesPrice,predicted_promotion,weekGroup,itemID
39,39,1.0,7.84,0.0,1,5
40,40,,,,2,5
41,41,1.0,7.84,0.0,3,5
42,42,,,,4,5
43,43,,,,5,5
44,44,,,,6,5
45,45,127.0,7.6,1.0,7,5
46,46,4.0,7.48,0.0,8,5
47,47,39.0,7.48,0.0,9,5
48,48,3.0,7.48,0.0,10,5


In [32]:
df_week.sort_values(by=['itemID', 'weekGroup'], inplace=True)
df_week['count'].fillna(value=0, inplace=True)
df_week['predicted_promotion'].fillna(value=0, inplace=True)
# df_week['avg_salesPrice'].fillna(value=df_week.groupby('itemID')['avg_salesPrice'].transform('mean'), inplace=True)
# df_week['avg_salesPrice'].fillna(value=0, inplace=True)
df_week['avg_salesPrice'].fillna(method='ffill', inplace=True)

In [33]:
# adding the category id too from the items table.
df_m = pd.merge(df_week,items_simple,on=['itemID'],how='left')

In [34]:
df_m['dateFirstSell'] = [d.month for d in df_m['dateFirstSell']]

In [35]:
df_m['retail_price_diff'] = df_m.apply(lambda x: (x.avg_salesPrice - x.recommendedRetailPrice)/x.recommendedRetailPrice, axis = 1)
df_m['price_fluctuate'] = df_m.groupby('itemID').avg_salesPrice.shift(1)
df_m['price_fluctuate'] = df_m.groupby('itemID')['price_fluctuate'].fillna(method='bfill')
df_m['price_fluctuate'] = df_m.apply(lambda x: 0 if x.price_fluctuate == x.avg_salesPrice 
                                     else (-1 if x.price_fluctuate > x.avg_salesPrice else 1), axis = 1)

In [36]:
df_m[df_m['itemID']==5]

Unnamed: 0,index,count,avg_salesPrice,predicted_promotion,weekGroup,itemID,customerRating,manufacturer,brand,category1,category2,category3,recommendedRetailPrice,dateFirstSell,soldWeekly,recentlySold,retail_price_diff,price_fluctuate
46,39,1.0,7.84,0.0,1,5,2.33,2,0,0,0,0,17.04,1,False,7.0,-0.539906,0
47,40,0.0,7.84,0.0,2,5,2.33,2,0,0,0,0,17.04,1,False,7.0,-0.539906,0
48,41,1.0,7.84,0.0,3,5,2.33,2,0,0,0,0,17.04,1,False,7.0,-0.539906,0
49,42,0.0,7.84,0.0,4,5,2.33,2,0,0,0,0,17.04,1,False,7.0,-0.539906,0
50,43,0.0,7.84,0.0,5,5,2.33,2,0,0,0,0,17.04,1,False,7.0,-0.539906,0
51,44,0.0,7.84,0.0,6,5,2.33,2,0,0,0,0,17.04,1,False,7.0,-0.539906,0
52,45,127.0,7.6,1.0,7,5,2.33,2,0,0,0,0,17.04,1,False,7.0,-0.553991,-1
53,46,4.0,7.48,0.0,8,5,2.33,2,0,0,0,0,17.04,1,False,7.0,-0.561033,-1
54,47,39.0,7.48,0.0,9,5,2.33,2,0,0,0,0,17.04,1,False,7.0,-0.561033,-1
55,48,3.0,7.48,0.0,10,5,2.33,2,0,0,0,0,17.04,1,False,7.0,-0.561033,1


In [37]:
groupcollist = ['brand','category1','category3']
aggregationlist = [('avg_salesPrice', np.mean, 'avg'),('count', np.sum, 'sum'),('count', np.mean, 'avg')]

for type_id in groupcollist:
    for column_id,aggregator,aggtype in aggregationlist:
        # get numbers from sales data and set column names
        mean_df = df_m.groupby([type_id,'weekGroup']).aggregate(aggregator).reset_index()[[column_id,type_id,'weekGroup']]
        mean_df.columns = [type_id+'_'+aggtype+'_'+column_id,type_id,'weekGroup']
        # merge new columns on sales_m data
        df_m = pd.merge(df_m,mean_df,on=['weekGroup',type_id],how='left')

In [38]:
df_m[df_m['itemID']==1]

Unnamed: 0,index,count,avg_salesPrice,predicted_promotion,weekGroup,itemID,customerRating,manufacturer,brand,category1,category2,category3,recommendedRetailPrice,dateFirstSell,soldWeekly,recentlySold,retail_price_diff,price_fluctuate,brand_avg_avg_salesPrice,brand_sum_count,brand_avg_count,category1_avg_avg_salesPrice,category1_sum_count,category1_avg_count,category3_avg_avg_salesPrice,category3_sum_count,category3_avg_count
0,0,2.0,3.11,0.0,1,1,4.38,1,0,0,0,0,8.84,1,False,4.0,-0.64819,0,80.488057,55881.0,63.429058,138.274918,18579.0,42.710345,116.237975,6196.0,43.027778
1,1,313.0,3.11,1.0,2,1,4.38,1,0,0,0,0,8.84,1,False,4.0,-0.64819,1,79.411863,58626.0,54.841908,126.672261,12467.0,22.708561,97.324982,7530.0,37.277228
2,2,35.0,3.11,0.0,3,1,4.38,1,0,0,0,0,8.84,1,False,4.0,-0.64819,-1,99.114054,55269.0,42.580123,161.402185,15306.0,22.279476,164.694577,8329.0,32.662745
3,3,3.0,3.11,0.0,4,1,4.38,1,0,0,0,0,8.84,1,False,4.0,-0.64819,0,99.941989,133488.0,76.585198,162.040587,17479.0,21.186667,149.429555,6014.0,20.954704
4,4,1.0,3.11,0.0,5,1,4.38,1,0,0,0,0,8.84,1,False,4.0,-0.64819,0,98.063839,166790.0,78.711656,153.780803,17300.0,18.502674,162.54288,6006.0,18.423313
5,5,1.0,3.11,0.0,6,1,4.38,1,0,0,0,0,8.84,1,False,4.0,-0.64819,0,87.401705,105227.0,46.41685,144.235071,16499.0,16.499,151.609726,3800.0,11.143695
6,6,2.0,3.11,0.0,7,1,4.38,1,0,0,0,0,8.84,1,False,4.0,-0.64819,0,86.288317,118255.0,47.721953,147.427073,14736.0,14.007605,153.49006,6581.0,18.07967
7,7,299.0,3.11,1.0,8,1,4.38,1,0,0,0,0,8.84,1,False,4.0,-0.64819,1,99.660895,153578.0,57.155936,152.308299,31313.0,26.269295,191.227146,14797.0,34.17321
8,8,3.0,3.11,0.0,9,1,4.38,1,0,0,0,0,8.84,1,False,4.0,-0.64819,-1,78.084628,130916.0,48.148584,138.7249,17704.0,15.131624,137.877245,6727.0,16.209639
9,9,31.0,3.11,0.0,10,1,4.38,1,0,0,0,0,8.84,1,False,4.0,-0.64819,0,79.734311,125886.0,44.561416,135.78201,14371.0,12.67284,140.250683,4605.0,12.023499


In [39]:
from sklearn.preprocessing import MinMaxScaler
# scale data between -1 & 1
scaler = MinMaxScaler()
scale_col = ['count', 'avg_salesPrice', 'customerRating', 'retail_price_diff', 
             'brand_avg_avg_salesPrice', 'brand_sum_count', 'brand_avg_count', 
             'category1_avg_avg_salesPrice', 'category1_sum_count', 'category1_avg_count', 
             'category3_avg_avg_salesPrice', 'category3_sum_count', 'category3_avg_count']
for c in scale_col:
    scaler.fit(df_m[[c]])
    df_m[c] = pd.DataFrame(scaler.transform(df_m[[c]]))[0]

In [40]:
df_m[df_m['itemID']==10]

Unnamed: 0,index,count,avg_salesPrice,predicted_promotion,weekGroup,itemID,customerRating,manufacturer,brand,category1,category2,category3,recommendedRetailPrice,dateFirstSell,soldWeekly,recentlySold,retail_price_diff,price_fluctuate,brand_avg_avg_salesPrice,brand_sum_count,brand_avg_count,category1_avg_avg_salesPrice,category1_sum_count,category1_avg_count,category3_avg_avg_salesPrice,category3_sum_count,category3_avg_count
107,22740,0.074135,0.002621,1.0,3,10,1.0,4,0,0,0,0,19.43,2,False,4.0,0.267945,0,0.024212,0.331369,0.022722,0.321645,0.147985,0.062379,0.124626,0.13021,0.034527
108,22741,0.000207,0.00254,0.0,4,10,1.0,4,0,0,0,0,19.43,2,False,4.0,0.259647,-1,0.024417,0.800336,0.040867,0.32306,0.169304,0.059173,0.112982,0.094019,0.022151
109,22742,0.013046,0.00254,0.0,5,10,1.0,4,0,0,0,0,19.43,2,False,4.0,0.259647,0,0.023952,1.0,0.042002,0.304751,0.167548,0.051298,0.122985,0.093894,0.019475
110,22743,0.000207,0.00254,0.0,6,10,1.0,4,0,0,0,0,19.43,2,False,4.0,0.259647,0,0.021312,0.630895,0.024769,0.28359,0.15969,0.04542,0.114645,0.059407,0.01178
111,22744,0.000207,0.00254,0.0,7,10,1.0,4,0,0,0,0,19.43,2,False,4.0,0.259647,0,0.021037,0.709005,0.025465,0.290666,0.142393,0.03811,0.116079,0.102883,0.019112
112,22745,0.000828,0.00254,0.0,8,10,1.0,4,0,0,0,0,19.43,2,False,4.0,0.259647,0,0.024348,0.920787,0.030499,0.301486,0.305026,0.074085,0.144864,0.231326,0.036124
113,22746,0.0,0.00254,0.0,9,10,1.0,4,0,0,0,0,19.43,2,False,4.0,0.259647,0,0.019005,0.784915,0.025693,0.271376,0.171512,0.041408,0.10417,0.105165,0.017135
114,22747,0.000414,0.00254,0.0,10,10,1.0,4,0,0,0,0,19.43,2,False,4.0,0.259647,0,0.019414,0.754757,0.023779,0.264852,0.138812,0.034194,0.105981,0.071991,0.01271
115,22748,0.000207,0.00254,0.0,11,10,1.0,4,0,0,0,0,19.43,2,False,4.0,0.259647,0,0.016619,0.847677,0.027266,0.218929,0.18971,0.052564,0.095927,0.111309,0.021142
116,22749,0.000414,0.00254,0.0,12,10,1.0,4,0,0,0,0,19.43,2,False,4.0,0.259647,0,0.017704,0.875982,0.024657,0.242805,0.277016,0.067531,0.118627,0.182159,0.02905


In [41]:
lag_variables  = ['count','avg_salesPrice',
                  'brand_avg_avg_salesPrice','brand_sum_count','brand_avg_count',
#                   'manufacturer_avg_avg_salesPrice','manufacturer_sum_count','manufacturer_avg_count',
                  'category1_avg_avg_salesPrice','category1_sum_count','category1_avg_count',
                  'category3_avg_avg_salesPrice','category3_sum_count','category3_avg_count']
lags = [1 ,2 ,3, 4, 5]
# we will keep the results in thsi dataframe
sales_means = df_m.copy()
for lag in lags:
    sales_new_df = df_m.copy()
    sales_new_df.weekGroup+=lag
    # subset only the lag variables we want
    sales_new_df = sales_new_df[['weekGroup','itemID']+lag_variables]
    sales_new_df.columns = ['weekGroup','itemID']+ [lag_feat+'_lag_'+str(lag) for lag_feat in lag_variables]
    # join with date_block_num,shop_id and item_id
    sales_means = pd.merge(sales_means, sales_new_df,on=['weekGroup','itemID'] ,how='left')

In [42]:
for feat in sales_means.columns:
    if 'count' in feat:
        sales_means[feat]=sales_means[feat].fillna(0)
    elif 'salesPrice' or 'diff' in feat:
#         sales_means[feat]=sales_means[feat].fillna(sales_means[feat].median())
        sales_means[feat]=sales_means[feat].fillna(value=sales_means.groupby('itemID')[feat].transform('mean'))

In [43]:
sales_means[sales_means['itemID']==10]

Unnamed: 0,index,count,avg_salesPrice,predicted_promotion,weekGroup,itemID,customerRating,manufacturer,brand,category1,category2,category3,recommendedRetailPrice,dateFirstSell,soldWeekly,recentlySold,retail_price_diff,price_fluctuate,brand_avg_avg_salesPrice,brand_sum_count,brand_avg_count,category1_avg_avg_salesPrice,category1_sum_count,category1_avg_count,category3_avg_avg_salesPrice,category3_sum_count,category3_avg_count,count_lag_1,avg_salesPrice_lag_1,brand_avg_avg_salesPrice_lag_1,brand_sum_count_lag_1,brand_avg_count_lag_1,category1_avg_avg_salesPrice_lag_1,category1_sum_count_lag_1,category1_avg_count_lag_1,category3_avg_avg_salesPrice_lag_1,category3_sum_count_lag_1,category3_avg_count_lag_1,count_lag_2,avg_salesPrice_lag_2,brand_avg_avg_salesPrice_lag_2,brand_sum_count_lag_2,brand_avg_count_lag_2,category1_avg_avg_salesPrice_lag_2,category1_sum_count_lag_2,category1_avg_count_lag_2,category3_avg_avg_salesPrice_lag_2,category3_sum_count_lag_2,category3_avg_count_lag_2,count_lag_3,avg_salesPrice_lag_3,brand_avg_avg_salesPrice_lag_3,brand_sum_count_lag_3,brand_avg_count_lag_3,category1_avg_avg_salesPrice_lag_3,category1_sum_count_lag_3,category1_avg_count_lag_3,category3_avg_avg_salesPrice_lag_3,category3_sum_count_lag_3,category3_avg_count_lag_3,count_lag_4,avg_salesPrice_lag_4,brand_avg_avg_salesPrice_lag_4,brand_sum_count_lag_4,brand_avg_count_lag_4,category1_avg_avg_salesPrice_lag_4,category1_sum_count_lag_4,category1_avg_count_lag_4,category3_avg_avg_salesPrice_lag_4,category3_sum_count_lag_4,category3_avg_count_lag_4,count_lag_5,avg_salesPrice_lag_5,brand_avg_avg_salesPrice_lag_5,brand_sum_count_lag_5,brand_avg_count_lag_5,category1_avg_avg_salesPrice_lag_5,category1_sum_count_lag_5,category1_avg_count_lag_5,category3_avg_avg_salesPrice_lag_5,category3_sum_count_lag_5,category3_avg_count_lag_5
107,22740,0.074135,0.002621,1.0,3,10,1.0,4,0,0,0,0,19.43,2,False,4.0,0.267945,0,0.024212,0.331369,0.022722,0.321645,0.147985,0.062379,0.124626,0.13021,0.034527,0.0,0.002548,0.021202,0.0,0.0,0.282316,0.0,0.0,0.116089,0.0,0.0,0.0,0.002549,0.021591,0.0,0.0,0.286706,0.0,0.0,0.115807,0.0,0.0,0.0,0.00255,0.022212,0.0,0.0,0.295178,0.0,0.0,0.118292,0.0,0.0,0.0,0.002551,0.022612,0.0,0.0,0.299511,0.0,0.0,0.12005,0.0,0.0,0.0,0.002553,0.023213,0.0,0.0,0.3042,0.0,0.0,0.122697,0.0,0.0
108,22741,0.000207,0.00254,0.0,4,10,1.0,4,0,0,0,0,19.43,2,False,4.0,0.259647,-1,0.024417,0.800336,0.040867,0.32306,0.169304,0.059173,0.112982,0.094019,0.022151,0.074135,0.002621,0.024212,0.331369,0.022722,0.321645,0.147985,0.062379,0.124626,0.13021,0.034527,0.0,0.002549,0.021591,0.0,0.0,0.286706,0.0,0.0,0.115807,0.0,0.0,0.0,0.00255,0.022212,0.0,0.0,0.295178,0.0,0.0,0.118292,0.0,0.0,0.0,0.002551,0.022612,0.0,0.0,0.299511,0.0,0.0,0.12005,0.0,0.0,0.0,0.002553,0.023213,0.0,0.0,0.3042,0.0,0.0,0.122697,0.0,0.0
109,22742,0.013046,0.00254,0.0,5,10,1.0,4,0,0,0,0,19.43,2,False,4.0,0.259647,0,0.023952,1.0,0.042002,0.304751,0.167548,0.051298,0.122985,0.093894,0.019475,0.000207,0.00254,0.024417,0.800336,0.040867,0.32306,0.169304,0.059173,0.112982,0.094019,0.022151,0.074135,0.002621,0.024212,0.331369,0.022722,0.321645,0.147985,0.062379,0.124626,0.13021,0.034527,0.0,0.00255,0.022212,0.0,0.0,0.295178,0.0,0.0,0.118292,0.0,0.0,0.0,0.002551,0.022612,0.0,0.0,0.299511,0.0,0.0,0.12005,0.0,0.0,0.0,0.002553,0.023213,0.0,0.0,0.3042,0.0,0.0,0.122697,0.0,0.0
110,22743,0.000207,0.00254,0.0,6,10,1.0,4,0,0,0,0,19.43,2,False,4.0,0.259647,0,0.021312,0.630895,0.024769,0.28359,0.15969,0.04542,0.114645,0.059407,0.01178,0.013046,0.00254,0.023952,1.0,0.042002,0.304751,0.167548,0.051298,0.122985,0.093894,0.019475,0.000207,0.00254,0.024417,0.800336,0.040867,0.32306,0.169304,0.059173,0.112982,0.094019,0.022151,0.074135,0.002621,0.024212,0.331369,0.022722,0.321645,0.147985,0.062379,0.124626,0.13021,0.034527,0.0,0.002551,0.022612,0.0,0.0,0.299511,0.0,0.0,0.12005,0.0,0.0,0.0,0.002553,0.023213,0.0,0.0,0.3042,0.0,0.0,0.122697,0.0,0.0
111,22744,0.000207,0.00254,0.0,7,10,1.0,4,0,0,0,0,19.43,2,False,4.0,0.259647,0,0.021037,0.709005,0.025465,0.290666,0.142393,0.03811,0.116079,0.102883,0.019112,0.000207,0.00254,0.021312,0.630895,0.024769,0.28359,0.15969,0.04542,0.114645,0.059407,0.01178,0.013046,0.00254,0.023952,1.0,0.042002,0.304751,0.167548,0.051298,0.122985,0.093894,0.019475,0.000207,0.00254,0.024417,0.800336,0.040867,0.32306,0.169304,0.059173,0.112982,0.094019,0.022151,0.074135,0.002621,0.024212,0.331369,0.022722,0.321645,0.147985,0.062379,0.124626,0.13021,0.034527,0.0,0.002553,0.023213,0.0,0.0,0.3042,0.0,0.0,0.122697,0.0,0.0
112,22745,0.000828,0.00254,0.0,8,10,1.0,4,0,0,0,0,19.43,2,False,4.0,0.259647,0,0.024348,0.920787,0.030499,0.301486,0.305026,0.074085,0.144864,0.231326,0.036124,0.000207,0.00254,0.021037,0.709005,0.025465,0.290666,0.142393,0.03811,0.116079,0.102883,0.019112,0.000207,0.00254,0.021312,0.630895,0.024769,0.28359,0.15969,0.04542,0.114645,0.059407,0.01178,0.013046,0.00254,0.023952,1.0,0.042002,0.304751,0.167548,0.051298,0.122985,0.093894,0.019475,0.000207,0.00254,0.024417,0.800336,0.040867,0.32306,0.169304,0.059173,0.112982,0.094019,0.022151,0.074135,0.002621,0.024212,0.331369,0.022722,0.321645,0.147985,0.062379,0.124626,0.13021,0.034527
113,22746,0.0,0.00254,0.0,9,10,1.0,4,0,0,0,0,19.43,2,False,4.0,0.259647,0,0.019005,0.784915,0.025693,0.271376,0.171512,0.041408,0.10417,0.105165,0.017135,0.000828,0.00254,0.024348,0.920787,0.030499,0.301486,0.305026,0.074085,0.144864,0.231326,0.036124,0.000207,0.00254,0.021037,0.709005,0.025465,0.290666,0.142393,0.03811,0.116079,0.102883,0.019112,0.000207,0.00254,0.021312,0.630895,0.024769,0.28359,0.15969,0.04542,0.114645,0.059407,0.01178,0.013046,0.00254,0.023952,1.0,0.042002,0.304751,0.167548,0.051298,0.122985,0.093894,0.019475,0.000207,0.00254,0.024417,0.800336,0.040867,0.32306,0.169304,0.059173,0.112982,0.094019,0.022151
114,22747,0.000414,0.00254,0.0,10,10,1.0,4,0,0,0,0,19.43,2,False,4.0,0.259647,0,0.019414,0.754757,0.023779,0.264852,0.138812,0.034194,0.105981,0.071991,0.01271,0.0,0.00254,0.019005,0.784915,0.025693,0.271376,0.171512,0.041408,0.10417,0.105165,0.017135,0.000828,0.00254,0.024348,0.920787,0.030499,0.301486,0.305026,0.074085,0.144864,0.231326,0.036124,0.000207,0.00254,0.021037,0.709005,0.025465,0.290666,0.142393,0.03811,0.116079,0.102883,0.019112,0.000207,0.00254,0.021312,0.630895,0.024769,0.28359,0.15969,0.04542,0.114645,0.059407,0.01178,0.013046,0.00254,0.023952,1.0,0.042002,0.304751,0.167548,0.051298,0.122985,0.093894,0.019475
115,22748,0.000207,0.00254,0.0,11,10,1.0,4,0,0,0,0,19.43,2,False,4.0,0.259647,0,0.016619,0.847677,0.027266,0.218929,0.18971,0.052564,0.095927,0.111309,0.021142,0.000414,0.00254,0.019414,0.754757,0.023779,0.264852,0.138812,0.034194,0.105981,0.071991,0.01271,0.0,0.00254,0.019005,0.784915,0.025693,0.271376,0.171512,0.041408,0.10417,0.105165,0.017135,0.000828,0.00254,0.024348,0.920787,0.030499,0.301486,0.305026,0.074085,0.144864,0.231326,0.036124,0.000207,0.00254,0.021037,0.709005,0.025465,0.290666,0.142393,0.03811,0.116079,0.102883,0.019112,0.000207,0.00254,0.021312,0.630895,0.024769,0.28359,0.15969,0.04542,0.114645,0.059407,0.01178
116,22749,0.000414,0.00254,0.0,12,10,1.0,4,0,0,0,0,19.43,2,False,4.0,0.259647,0,0.017704,0.875982,0.024657,0.242805,0.277016,0.067531,0.118627,0.182159,0.02905,0.000207,0.00254,0.016619,0.847677,0.027266,0.218929,0.18971,0.052564,0.095927,0.111309,0.021142,0.000414,0.00254,0.019414,0.754757,0.023779,0.264852,0.138812,0.034194,0.105981,0.071991,0.01271,0.0,0.00254,0.019005,0.784915,0.025693,0.271376,0.171512,0.041408,0.10417,0.105165,0.017135,0.000828,0.00254,0.024348,0.920787,0.030499,0.301486,0.305026,0.074085,0.144864,0.231326,0.036124,0.000207,0.00254,0.021037,0.709005,0.025465,0.290666,0.142393,0.03811,0.116079,0.102883,0.019112


In [44]:
cols_to_drop = lag_variables[1:] + ['recommendedRetailPrice', 'category2', 'retail_price_diff', 'price_fluctuate']
cols_to_keep = [x for x in list(sales_means.columns) if x not in cols_to_drop]

In [43]:
# for col in cols_to_drop:
#     del sales_means[col]
# sales_means

In [44]:
# sales_means = sales_means[sales_means['WeekGroup']>2]

In [45]:
sales_means = sales_means[cols_to_keep]
sales_means = sales_means.drop(columns=["index"])#,"predicted_promotion"])

In [46]:
sales_means.groupby("weekGroup").count()

Unnamed: 0_level_0,count,predicted_promotion,itemID,customerRating,manufacturer,brand,category1,category3,dateFirstSell,soldWeekly,recentlySold,count_lag_1,avg_salesPrice_lag_1,brand_avg_avg_salesPrice_lag_1,brand_sum_count_lag_1,brand_avg_count_lag_1,category1_avg_avg_salesPrice_lag_1,category1_sum_count_lag_1,category1_avg_count_lag_1,category3_avg_avg_salesPrice_lag_1,category3_sum_count_lag_1,category3_avg_count_lag_1,count_lag_2,avg_salesPrice_lag_2,brand_avg_avg_salesPrice_lag_2,brand_sum_count_lag_2,brand_avg_count_lag_2,category1_avg_avg_salesPrice_lag_2,category1_sum_count_lag_2,category1_avg_count_lag_2,category3_avg_avg_salesPrice_lag_2,category3_sum_count_lag_2,category3_avg_count_lag_2,count_lag_3,avg_salesPrice_lag_3,brand_avg_avg_salesPrice_lag_3,brand_sum_count_lag_3,brand_avg_count_lag_3,category1_avg_avg_salesPrice_lag_3,category1_sum_count_lag_3,category1_avg_count_lag_3,category3_avg_avg_salesPrice_lag_3,category3_sum_count_lag_3,category3_avg_count_lag_3,count_lag_4,avg_salesPrice_lag_4,brand_avg_avg_salesPrice_lag_4,brand_sum_count_lag_4,brand_avg_count_lag_4,category1_avg_avg_salesPrice_lag_4,category1_sum_count_lag_4,category1_avg_count_lag_4,category3_avg_avg_salesPrice_lag_4,category3_sum_count_lag_4,category3_avg_count_lag_4,count_lag_5,avg_salesPrice_lag_5,brand_avg_avg_salesPrice_lag_5,brand_sum_count_lag_5,brand_avg_count_lag_5,category1_avg_avg_salesPrice_lag_5,category1_sum_count_lag_5,category1_avg_count_lag_5,category3_avg_avg_salesPrice_lag_5,category3_sum_count_lag_5,category3_avg_count_lag_5
weekGroup,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1
1,1664,1664,1664,1664,1664,1664,1664,1664,1664,1664,1664,1664,1591,1591,1664,1664,1591,1664,1664,1591,1664,1664,1664,1567,1567,1664,1664,1567,1664,1664,1567,1664,1664,1664,1534,1534,1664,1664,1534,1664,1664,1534,1664,1664,1664,1501,1501,1664,1664,1501,1664,1664,1501,1664,1664,1664,1458,1458,1664,1664,1458,1664,1664,1458,1664,1664
2,2152,2152,2152,2152,2152,2152,2152,2152,2152,2152,2152,2152,2102,2102,2152,2152,2102,2152,2152,2102,2152,2152,2152,2065,2065,2152,2152,2065,2152,2152,2065,2152,2152,2152,2015,2015,2152,2152,2015,2152,2152,2015,2152,2152,2152,1965,1965,2152,2152,1965,2152,2152,1965,2152,2152,2152,1890,1890,2152,2152,1890,2152,2152,1890,2152,2152
3,2685,2685,2685,2685,2685,2685,2685,2685,2685,2685,2685,2685,2648,2648,2685,2685,2648,2685,2685,2648,2685,2685,2685,2589,2589,2685,2685,2589,2685,2685,2589,2685,2685,2685,2508,2508,2685,2685,2508,2685,2685,2508,2685,2685,2685,2430,2430,2685,2685,2430,2685,2685,2430,2685,2685,2685,2315,2315,2685,2685,2315,2685,2685,2315,2685,2685
4,3423,3423,3423,3423,3423,3423,3423,3423,3423,3423,3423,3423,3324,3324,3423,3423,3324,3423,3423,3324,3423,3423,3423,3206,3206,3423,3423,3206,3423,3423,3206,3423,3423,3423,3098,3098,3423,3423,3098,3423,3423,3098,3423,3423,3423,2952,2952,3423,3423,2952,3423,3423,2952,3423,3423,3423,2792,2792,3423,3423,2792,3423,3423,2792,3423,3423
5,4166,4166,4166,4166,4166,4166,4166,4166,4166,4166,4166,4166,4025,4025,4166,4166,4025,4166,4166,4025,4166,4166,4166,3895,3895,4166,4166,3895,4166,4166,3895,4166,4166,4166,3747,3747,4166,4166,3747,4166,4166,3747,4166,4166,4166,3575,3575,4166,4166,3575,4166,4166,3575,4166,4166,4166,3360,3360,4166,4166,3360,4166,4166,3360,4166,4166
6,4606,4606,4606,4606,4606,4606,4606,4606,4606,4606,4606,4606,4471,4471,4606,4606,4471,4606,4606,4471,4606,4606,4606,4334,4334,4606,4606,4334,4606,4606,4334,4606,4606,4606,4169,4169,4606,4606,4169,4606,4606,4169,4606,4606,4606,3970,3970,4606,4606,3970,4606,4606,3970,4606,4606,4606,3741,3741,4606,4606,3741,4606,4606,3741,4606,4606
7,4915,4915,4915,4915,4915,4915,4915,4915,4915,4915,4915,4915,4796,4796,4915,4915,4796,4915,4915,4796,4915,4915,4915,4659,4659,4915,4915,4659,4915,4915,4659,4915,4915,4915,4499,4499,4915,4915,4499,4915,4915,4499,4915,4915,4915,4269,4269,4915,4915,4269,4915,4915,4269,4915,4915,4915,4072,4072,4915,4915,4072,4915,4915,4072,4915,4915
8,5269,5269,5269,5269,5269,5269,5269,5269,5269,5269,5269,5269,5149,5149,5269,5269,5149,5269,5269,5149,5269,5269,5269,5027,5027,5269,5269,5027,5269,5269,5027,5269,5269,5269,4844,4844,5269,5269,4844,5269,5269,4844,5269,5269,5269,4682,4682,5269,5269,4682,5269,5269,4682,5269,5269,5269,4525,4525,5269,5269,4525,5269,5269,4525,5269,5269
9,5479,5479,5479,5479,5479,5479,5479,5479,5479,5479,5479,5479,5325,5325,5479,5479,5325,5479,5479,5325,5479,5479,5479,5183,5183,5479,5479,5183,5479,5479,5183,5479,5479,5479,5048,5048,5479,5479,5048,5479,5479,5048,5479,5479,5479,4945,4945,5479,5479,4945,5479,5479,4945,5479,5479,5479,4387,4387,5479,5479,4387,5479,5479,4387,5479,5479
10,5694,5694,5694,5694,5694,5694,5694,5694,5694,5694,5694,5694,5555,5555,5694,5694,5555,5694,5694,5555,5694,5694,5694,5477,5477,5694,5694,5477,5694,5694,5477,5694,5694,5694,5397,5397,5694,5694,5397,5694,5694,5397,5694,5694,5694,4673,4673,5694,5694,4673,5694,5694,4673,5694,5694,5694,4170,4170,5694,5694,4170,5694,5694,4170,5694,5694


In [47]:
sales_means.to_csv('./data/sales_means_all.csv', index=False, sep='|')

In [48]:
sales_means.head()

Unnamed: 0,count,predicted_promotion,weekGroup,itemID,customerRating,manufacturer,brand,category1,category3,dateFirstSell,soldWeekly,recentlySold,count_lag_1,avg_salesPrice_lag_1,brand_avg_avg_salesPrice_lag_1,brand_sum_count_lag_1,brand_avg_count_lag_1,category1_avg_avg_salesPrice_lag_1,category1_sum_count_lag_1,category1_avg_count_lag_1,category3_avg_avg_salesPrice_lag_1,category3_sum_count_lag_1,category3_avg_count_lag_1,count_lag_2,avg_salesPrice_lag_2,brand_avg_avg_salesPrice_lag_2,brand_sum_count_lag_2,brand_avg_count_lag_2,category1_avg_avg_salesPrice_lag_2,category1_sum_count_lag_2,category1_avg_count_lag_2,category3_avg_avg_salesPrice_lag_2,category3_sum_count_lag_2,category3_avg_count_lag_2,count_lag_3,avg_salesPrice_lag_3,brand_avg_avg_salesPrice_lag_3,brand_sum_count_lag_3,brand_avg_count_lag_3,category1_avg_avg_salesPrice_lag_3,category1_sum_count_lag_3,category1_avg_count_lag_3,category3_avg_avg_salesPrice_lag_3,category3_sum_count_lag_3,category3_avg_count_lag_3,count_lag_4,avg_salesPrice_lag_4,brand_avg_avg_salesPrice_lag_4,brand_sum_count_lag_4,brand_avg_count_lag_4,category1_avg_avg_salesPrice_lag_4,category1_sum_count_lag_4,category1_avg_count_lag_4,category3_avg_avg_salesPrice_lag_4,category3_sum_count_lag_4,category3_avg_count_lag_4,count_lag_5,avg_salesPrice_lag_5,brand_avg_avg_salesPrice_lag_5,brand_sum_count_lag_5,brand_avg_count_lag_5,category1_avg_avg_salesPrice_lag_5,category1_sum_count_lag_5,category1_avg_count_lag_5,category3_avg_avg_salesPrice_lag_5,category3_sum_count_lag_5,category3_avg_count_lag_5
0,0.000414,0.0,1,1,0.876,1,0,0,0,1,False,4.0,0.0,0.000299,0.020913,0.0,0.0,0.278183,0.0,0.0,0.110149,0.0,0.0,0.0,0.000296,0.021205,0.0,0.0,0.281399,0.0,0.0,0.109378,0.0,0.0,0.0,0.000296,0.021663,0.0,0.0,0.287646,0.0,0.0,0.110724,0.0,0.0,0.0,0.000296,0.021913,0.0,0.0,0.290179,0.0,0.0,0.111251,0.0,0.0,0.0,0.000296,0.022277,0.0,0.0,0.292529,0.0,0.0,0.112136,0.0,0.0
1,0.064817,1.0,2,1,0.876,1,0,0,0,1,False,4.0,0.000414,0.000296,0.0196,0.335038,0.033847,0.270378,0.180096,0.122322,0.087664,0.096864,0.045484,0.0,0.000296,0.021205,0.0,0.0,0.281399,0.0,0.0,0.109378,0.0,0.0,0.0,0.000296,0.021663,0.0,0.0,0.287646,0.0,0.0,0.110724,0.0,0.0,0.0,0.000296,0.021913,0.0,0.0,0.290179,0.0,0.0,0.111251,0.0,0.0,0.0,0.000296,0.022277,0.0,0.0,0.292529,0.0,0.0,0.112136,0.0,0.0
2,0.007248,0.0,3,1,0.876,1,0,0,0,1,False,4.0,0.064817,0.000296,0.019334,0.351496,0.029265,0.244659,0.120133,0.063638,0.073238,0.117719,0.039405,0.000414,0.000296,0.0196,0.335038,0.033847,0.270378,0.180096,0.122322,0.087664,0.096864,0.045484,0.0,0.000296,0.021663,0.0,0.0,0.287646,0.0,0.0,0.110724,0.0,0.0,0.0,0.000296,0.021913,0.0,0.0,0.290179,0.0,0.0,0.111251,0.0,0.0,0.0,0.000296,0.022277,0.0,0.0,0.292529,0.0,0.0,0.112136,0.0,0.0
3,0.000621,0.0,4,1,0.876,1,0,0,0,1,False,4.0,0.007248,0.000296,0.024212,0.331369,0.022722,0.321645,0.147985,0.062379,0.124626,0.13021,0.034527,0.064817,0.000296,0.019334,0.351496,0.029265,0.244659,0.120133,0.063638,0.073238,0.117719,0.039405,0.000414,0.000296,0.0196,0.335038,0.033847,0.270378,0.180096,0.122322,0.087664,0.096864,0.045484,0.0,0.000296,0.021913,0.0,0.0,0.290179,0.0,0.0,0.111251,0.0,0.0,0.0,0.000296,0.022277,0.0,0.0,0.292529,0.0,0.0,0.112136,0.0,0.0
4,0.000207,0.0,5,1,0.876,1,0,0,0,1,False,4.0,0.000621,0.000296,0.024417,0.800336,0.040867,0.32306,0.169304,0.059173,0.112982,0.094019,0.022151,0.007248,0.000296,0.024212,0.331369,0.022722,0.321645,0.147985,0.062379,0.124626,0.13021,0.034527,0.064817,0.000296,0.019334,0.351496,0.029265,0.244659,0.120133,0.063638,0.073238,0.117719,0.039405,0.000414,0.000296,0.0196,0.335038,0.033847,0.270378,0.180096,0.122322,0.087664,0.096864,0.045484,0.0,0.000296,0.022277,0.0,0.0,0.292529,0.0,0.0,0.112136,0.0,0.0


In [50]:
joblib.dump(scaler, 'scaler.pkl')

['scaler.pkl']