In [62]:
# import packages
import pandas as pd
# import seaborn as sb
import matplotlib.pyplot as plt
import numpy as np
import datetime
import re
from sklearn.externals import joblib 
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA
from scipy.interpolate import interpn
from collections import defaultdict
from sklearn.model_selection import GridSearchCV

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [2]:
# suppress warnings
import warnings
warnings.filterwarnings('ignore')

In [3]:
#load data
items = pd.read_csv('./data/ex_items_binary.csv', sep = '|', parse_dates=['dateFirstSell'])#ex_items_binary
recently_sold = pd.read_csv('./data/ex_items.csv', sep = '|', parse_dates=['dateFirstSell'])
items = pd.merge(left=items, right=recently_sold[['itemID', 'recentlySold']], on='itemID')
items.head()

Unnamed: 0,itemID,brand,manufacturer,customerRating,category1,category2,category3,recommendedRetailPrice,minSalesPrice,maxSalesPrice,meanSalesPrice,minSalesPriceCounts,maxSalesPriceCounts,unitsSold,revenue,relRevenue,dateFirstSell,dateLastSell,soldWeekly,soldMonthly,recentlySold
0,1,0,1,4.38,1,1-1,1-1-1,8.84,3.11,3.11,3.11,690.0,690.0,690.0,2145.9,2.949022e-05,2018-01-23,2018-05-24,False,True,9.0
1,2,0,2,3.0,1,1-2,1-2-1,16.92,9.15,9.15,9.15,5.0,5.0,5.0,45.75,6.287235e-07,2018-02-24,2018-05-29,False,False,4.0
2,3,0,3,5.0,1,1-3,1-3-1,15.89,9.89,14.04,11.918371,91.0,99.0,190.0,2289.95,3.146984e-05,2018-01-10,2018-05-16,False,False,17.0
3,4,0,2,4.44,1,1-2,1-2-1,40.17,13.01,13.01,13.01,53.0,53.0,53.0,689.53,9.475928e-06,2018-01-18,2018-05-30,False,False,3.0
4,5,0,2,2.33,1,1-1,1-1-1,17.04,7.48,7.84,7.740377,49.0,128.0,177.0,1370.04,1.88279e-05,2018-01-06,2018-05-30,False,False,3.0


In [4]:
items_simple = items[['itemID', 'customerRating', 'manufacturer', 'brand', 'category1', 'category2', 'category3','recommendedRetailPrice', 'dateFirstSell', 'soldWeekly', 'recentlySold']]

In [5]:
items_simple.category1 = pd.Categorical(items_simple.category1)
items_simple['category1'] = items_simple.category1.cat.codes
items_simple.category2 = pd.Categorical(items_simple.category2)
items_simple['category2'] = items_simple.category2.cat.codes
items_simple.category3 = pd.Categorical(items_simple.category3)
items_simple['category3'] = items_simple.category3.cat.codes
items_simple.head()

Unnamed: 0,itemID,customerRating,manufacturer,brand,category1,category2,category3,recommendedRetailPrice,dateFirstSell,soldWeekly,recentlySold
0,1,4.38,1,0,0,0,0,8.84,2018-01-23,False,9.0
1,2,3.0,2,0,0,1,1,16.92,2018-02-24,False,4.0
2,3,5.0,3,0,0,2,2,15.89,2018-01-10,False,17.0
3,4,4.44,2,0,0,1,1,40.17,2018-01-18,False,3.0
4,5,2.33,2,0,0,0,0,17.04,2018-01-06,False,3.0


In [6]:
# load data
df_train = pd.read_csv('data/orders0206_train.csv', sep='|', parse_dates=['time'])
df_train['date'] = [d.date() for d in df_train['time']]
df_test = pd.read_csv('data/orders0206_test.csv', sep='|', parse_dates=['time'])
df_test['date'] = [d.date() for d in df_test['time']]
df_items = pd.read_csv('data/items.csv', sep='|')

In [7]:
infos = pd.read_csv('data/infos_promotions.csv', sep='|', parse_dates=['predicted_promotion'])
infos['predicted_promotion'] = [d.date() for d in infos['predicted_promotion']]

In [8]:
infos['is_promotion'] = 1

In [9]:
infos[infos['itemID'] == 5010]

Unnamed: 0,index,itemID,simulationPrice,promotion,promotion_train,promotion_test,predicted_promotion,is_promotion
9965,5009,5010,5.95,2018-07-10,"[datetime.date(2018, 4, 30), datetime.date(201...","[datetime.date(2018, 6, 3), datetime.date(2018...",2018-04-30,1
9966,5009,5010,5.95,2018-07-10,"[datetime.date(2018, 4, 30), datetime.date(201...","[datetime.date(2018, 6, 3), datetime.date(2018...",2018-05-23,1
9967,5009,5010,5.95,2018-07-10,"[datetime.date(2018, 4, 30), datetime.date(201...","[datetime.date(2018, 6, 3), datetime.date(2018...",2018-06-03,1
9968,5009,5010,5.95,2018-07-10,"[datetime.date(2018, 4, 30), datetime.date(201...","[datetime.date(2018, 6, 3), datetime.date(2018...",2018-06-10,1
9969,5009,5010,5.95,2018-07-10,"[datetime.date(2018, 4, 30), datetime.date(201...","[datetime.date(2018, 6, 3), datetime.date(2018...",2018-06-13,1
9970,5009,5010,5.95,2018-07-10,"[datetime.date(2018, 4, 30), datetime.date(201...","[datetime.date(2018, 6, 3), datetime.date(2018...",2018-06-15,1


In [22]:
test = items_simple[items_simple['recentlySold'] < 30][['itemID']]

In [23]:
test['date'] = datetime.date(2018,6,2)

In [24]:
test['order'] = 1
test = pd.merge(left=test, right=infos[['simulationPrice', 'itemID']], on='itemID').rename(columns={'simulationPrice': 'salesPrice'})
test['transactID'] = 1
test['time'] = 1
test = test[df_train.columns]

In [25]:
test.columns

Index(['time', 'transactID', 'itemID', 'order', 'salesPrice', 'date'], dtype='object')

In [26]:
train = df_train
# test = df_test
all_data = train.append(test)

In [27]:
# aggregate sales per day
aggregated = all_data.groupby(['itemID','date']).sum()["order"].to_frame().reset_index().rename(columns={'order':'count'})
aggregated_sp = all_data.groupby(['itemID','date']).mean()["salesPrice"].to_frame().reset_index().rename(columns={'salesPrice':'avg_salesPrice'})
# aggregated_promotion = all_data.merge(infos, on='itemID').groupby(['itemID','date']).count()["predicted_promotion"].to_frame().reset_index()



In [12]:
# # aggregate sales per day
# train_aggregated = train.groupby(['itemID','date']).sum()["order"].to_frame().reset_index().rename(columns={'order':'count'})
# test_aggregated = test.groupby(['itemID','date']).sum()["order"].to_frame().reset_index().rename(columns={'order':'count'})
# # aggregate sales per day
# train_aggregated_sp = train.groupby(['itemID','date']).mean()["salesPrice"].to_frame().reset_index().rename(columns={'salesPrice':'avg_salesPrice'})
# test_aggregated_sp = test.groupby(['itemID','date']).mean()["salesPrice"].to_frame().reset_index().rename(columns={'salesPrice':'avg_salesPrice'})

In [28]:
aggregated = pd.merge(aggregated,aggregated_sp,how='left')
aggregated = pd.merge(aggregated, infos['itemID predicted_promotion'.split()], left_on=['itemID', 'date'], right_on=['itemID', 'predicted_promotion'], how='left')
aggregated['predicted_promotion'] = aggregated['predicted_promotion'].apply(lambda x: isinstance(x, datetime.date))

In [29]:
aggregated.head()

Unnamed: 0,itemID,date,count,avg_salesPrice,predicted_promotion
0,1,2018-01-23,1,3.11,False
1,1,2018-01-25,1,3.11,False
2,1,2018-01-29,307,3.11,True
3,1,2018-01-30,3,3.11,False
4,1,2018-01-31,1,3.11,False


In [30]:
# (currently unused)
def add_datepart(df, fldname, drop=True):
#     https://towardsdatascience.com/multivariate-time-series-forecasting-using-random-forest-2372f3ecbad1
    """Add additional date information to dataframe"""
    fld = df[fldname]
    if not np.issubdtype(fld.dtype, np.datetime64):
        df[fldname] = fld = pd.to_datetime(fld, 
                                     infer_datetime_format=True)
    targ_pre = re.sub('[Dd]ate$', '', fldname)
    for n in ('Year', 'Month', 'Week', 'Day', 'Dayofweek', 
            'Dayofyear', 'Is_month_end', 'Is_month_start', 
            'Is_quarter_end', 'Is_quarter_start', 'Is_year_end', 
            'Is_year_start'):
        df[targ_pre+n] = getattr(fld.dt,n.lower())
        
    df[targ_pre+'Elapsed'] = fld.astype(np.int64) // 10**9 
    if drop: df.drop(fldname, axis=1, inplace=True)

In [31]:
import numpy as np
import re
# add additional date information
add_datepart(aggregated, 'date', drop = False)
add_datepart(aggregated, 'date', drop = False)

In [32]:
aggregated["WeekGroup"] = aggregated.apply(lambda row: (row["Dayofyear"] + 1) // 14 , axis=1)

In [33]:
aggregated = aggregated[aggregated["WeekGroup"] > 0]

In [34]:
aggregated.head()

Unnamed: 0,itemID,date,count,avg_salesPrice,predicted_promotion,Year,Month,Week,Day,Dayofweek,Dayofyear,Is_month_end,Is_month_start,Is_quarter_end,Is_quarter_start,Is_year_end,Is_year_start,Elapsed,WeekGroup
0,1,2018-01-23,1,3.11,False,2018,1,4,23,1,23,False,False,False,False,False,False,1516665600,1
1,1,2018-01-25,1,3.11,False,2018,1,4,25,3,25,False,False,False,False,False,False,1516838400,1
2,1,2018-01-29,307,3.11,True,2018,1,5,29,0,29,False,False,False,False,False,False,1517184000,2
3,1,2018-01-30,3,3.11,False,2018,1,5,30,1,30,False,False,False,False,False,False,1517270400,2
4,1,2018-01-31,1,3.11,False,2018,1,5,31,2,31,True,False,False,False,False,False,1517356800,2


In [35]:
from itertools import product
# Create "grid" with columns
index_cols = ['itemID', 'WeekGroup']

grid = []
for block_num in aggregated['WeekGroup'].unique():
    cur_items = aggregated.loc[aggregated['WeekGroup'] == block_num, 'itemID'].unique()
    grid.append(np.array(list(product(*[cur_items, [block_num]])),dtype='int32'))

grid = pd.DataFrame(np.vstack(grid), columns = index_cols,dtype=np.int32)
grid.head()

Unnamed: 0,itemID,WeekGroup
0,1,1
1,3,1
2,4,1
3,5,1
4,8,1


In [36]:
train_m = aggregated.groupby(['WeekGroup','itemID']).agg({'count': 'sum','avg_salesPrice': np.mean, 'predicted_promotion': 'sum'}).reset_index()

# Merging sales numbers with the grid dataframe
train_m = pd.merge(grid,train_m,on=['WeekGroup','itemID'],how='left').fillna(0)

In [37]:
train_m[train_m['itemID'] == 1]

Unnamed: 0,itemID,WeekGroup,count,avg_salesPrice,predicted_promotion
0,1,1,2,3.11,0.0
1664,1,2,313,3.11,1.0
3278,1,3,35,3.11,0.0
5156,1,4,3,3.11,0.0
7532,1,5,1,3.11,0.0
10446,1,6,1,3.11,0.0
13614,1,7,2,3.11,0.0
16947,1,8,299,3.11,1.0
20689,1,9,3,3.11,0.0
24597,1,10,31,3.11,0.0


In [23]:
# del df_week

In [38]:
### add 0 sales for missing dates per itemID
for prod in train_m.itemID.unique():
    s = train_m.loc[train_m['itemID'] == prod][["WeekGroup","count","avg_salesPrice", "predicted_promotion"]]
    s = s.set_index("WeekGroup")
    idx = range(s.index.min(), s.index.max()+1)
    s = s.reindex(idx)
#     s = s.reindex(idx, fill_value=0)
    s['weekGroup'] = s.index
    if "df_week" not in globals():
        s["itemID"] = prod
        df_week = s
    else:
        s["itemID"] = prod
        df_week = pd.concat([df_week,s], ignore_index=True)
df_week.head(3)

Unnamed: 0,count,avg_salesPrice,predicted_promotion,weekGroup,itemID
0,2.0,3.11,0.0,1,1
1,313.0,3.11,1.0,2,1
2,35.0,3.11,0.0,3,1


In [39]:
df_week.reset_index(inplace=True)
df_week[df_week['itemID']==5]

Unnamed: 0,index,count,avg_salesPrice,predicted_promotion,weekGroup,itemID
33,33,1.0,7.84,0.0,1,5
34,34,,,,2,5
35,35,1.0,7.84,0.0,3,5
36,36,,,,4,5
37,37,,,,5,5
38,38,,,,6,5
39,39,127.0,7.6,1.0,7,5
40,40,4.0,7.48,0.0,8,5
41,41,39.0,7.48,0.0,9,5
42,42,3.0,7.48,0.0,10,5


In [40]:
df_week.sort_values(by=['itemID', 'weekGroup'], inplace=True)
df_week['count'].fillna(value=0, inplace=True)
df_week['predicted_promotion'].fillna(value=0, inplace=True)
# df_week['avg_salesPrice'].fillna(value=df_week.groupby('itemID')['avg_salesPrice'].transform('mean'), inplace=True)
# df_week['avg_salesPrice'].fillna(value=0, inplace=True)
df_week['avg_salesPrice'].fillna(method='ffill', inplace=True)

In [41]:
# adding the category id too from the items table.
df_m = pd.merge(df_week,items_simple,on=['itemID'],how='left')

In [42]:
df_m['dateFirstSell'] = [d.month for d in df_m['dateFirstSell']]

In [43]:
df_m['retail_price_diff'] = df_m.apply(lambda x: (x.avg_salesPrice - x.recommendedRetailPrice)/x.recommendedRetailPrice, axis = 1)
df_m['price_fluctuate'] = df_m.groupby('itemID').avg_salesPrice.shift(1)
df_m['price_fluctuate'] = df_m.groupby('itemID')['price_fluctuate'].fillna(method='bfill')
df_m['price_fluctuate'] = df_m.apply(lambda x: 0 if x.price_fluctuate == x.avg_salesPrice 
                                     else (-1 if x.price_fluctuate > x.avg_salesPrice else 1), axis = 1)

In [44]:
df_m[df_m['itemID']==5]

Unnamed: 0,index,count,avg_salesPrice,predicted_promotion,weekGroup,itemID,customerRating,manufacturer,brand,category1,category2,category3,recommendedRetailPrice,dateFirstSell,soldWeekly,recentlySold,retail_price_diff,price_fluctuate
41,33,1.0,7.84,0.0,1,5,2.33,2,0,0,0,0,17.04,1,False,3.0,-0.539906,0
42,34,0.0,7.84,0.0,2,5,2.33,2,0,0,0,0,17.04,1,False,3.0,-0.539906,0
43,35,1.0,7.84,0.0,3,5,2.33,2,0,0,0,0,17.04,1,False,3.0,-0.539906,0
44,36,0.0,7.84,0.0,4,5,2.33,2,0,0,0,0,17.04,1,False,3.0,-0.539906,0
45,37,0.0,7.84,0.0,5,5,2.33,2,0,0,0,0,17.04,1,False,3.0,-0.539906,0
46,38,0.0,7.84,0.0,6,5,2.33,2,0,0,0,0,17.04,1,False,3.0,-0.539906,0
47,39,127.0,7.6,1.0,7,5,2.33,2,0,0,0,0,17.04,1,False,3.0,-0.553991,-1
48,40,4.0,7.48,0.0,8,5,2.33,2,0,0,0,0,17.04,1,False,3.0,-0.561033,-1
49,41,39.0,7.48,0.0,9,5,2.33,2,0,0,0,0,17.04,1,False,3.0,-0.561033,-1
50,42,3.0,7.48,0.0,10,5,2.33,2,0,0,0,0,17.04,1,False,3.0,-0.561033,1


In [45]:
groupcollist = ['brand','category1','category3']
aggregationlist = [('avg_salesPrice', np.mean, 'avg'),('count', np.sum, 'sum'),('count', np.mean, 'avg')]

for type_id in groupcollist:
    for column_id,aggregator,aggtype in aggregationlist:
        # get numbers from sales data and set column names
        mean_df = df_m.groupby([type_id,'weekGroup']).aggregate(aggregator).reset_index()[[column_id,type_id,'weekGroup']]
        mean_df.columns = [type_id+'_'+aggtype+'_'+column_id,type_id,'weekGroup']
        # merge new columns on sales_m data
        df_m = pd.merge(df_m,mean_df,on=['weekGroup',type_id],how='left')

In [46]:
df_m[df_m['itemID']==1]

Unnamed: 0,index,count,avg_salesPrice,predicted_promotion,weekGroup,itemID,customerRating,manufacturer,brand,category1,category2,category3,recommendedRetailPrice,dateFirstSell,soldWeekly,recentlySold,retail_price_diff,price_fluctuate,brand_avg_avg_salesPrice,brand_sum_count,brand_avg_count,category1_avg_avg_salesPrice,category1_sum_count,category1_avg_count,category3_avg_avg_salesPrice,category3_sum_count,category3_avg_count
0,0,2.0,3.11,0.0,1,1,4.38,1,0,0,0,0,8.84,1,False,9.0,-0.64819,0,80.488057,55881.0,63.429058,138.274918,18579.0,42.710345,116.237975,6196.0,43.027778
1,1,313.0,3.11,1.0,2,1,4.38,1,0,0,0,0,8.84,1,False,9.0,-0.64819,1,79.690687,58626.0,55.255419,126.672261,12467.0,22.708561,97.324982,7530.0,37.277228
2,2,35.0,3.11,0.0,3,1,4.38,1,0,0,0,0,8.84,1,False,9.0,-0.64819,-1,99.716895,55269.0,43.010895,161.587334,15306.0,22.311953,164.694577,8329.0,32.662745
3,3,3.0,3.11,0.0,4,1,4.38,1,0,0,0,0,8.84,1,False,9.0,-0.64819,0,100.438951,133488.0,77.971963,162.98943,17479.0,21.394125,150.986403,6014.0,21.250883
4,4,1.0,3.11,0.0,5,1,4.38,1,0,0,0,0,8.84,1,False,9.0,-0.64819,0,98.877822,166790.0,80.574879,156.060538,17300.0,18.948521,165.99476,6006.0,18.946372
5,5,1.0,3.11,0.0,6,1,4.38,1,0,0,0,0,8.84,1,False,9.0,-0.64819,0,86.829203,105227.0,48.092779,148.098693,16499.0,17.132918,156.504821,3800.0,11.620795
6,6,2.0,3.11,0.0,7,1,4.38,1,0,0,0,0,8.84,1,False,9.0,-0.64819,0,86.220636,118255.0,50.129292,152.155096,14736.0,14.780341,159.059194,6581.0,19.075362
7,7,299.0,3.11,1.0,8,1,4.38,1,0,0,0,0,8.84,1,False,9.0,-0.64819,1,100.624115,153578.0,61.4312,157.44415,31313.0,28.235347,201.930435,14797.0,36.9925
8,8,3.0,3.11,0.0,9,1,4.38,1,0,0,0,0,8.84,1,False,9.0,-0.64819,-1,74.920631,130916.0,54.685046,141.094431,17704.0,17.668663,121.320079,6727.0,19.669591
9,9,31.0,3.11,0.0,10,1,4.38,1,0,0,0,0,8.84,1,False,9.0,-0.64819,0,78.73735,125886.0,45.511931,151.374546,14371.0,13.544769,124.493861,4605.0,13.270893


In [47]:
from sklearn.preprocessing import MinMaxScaler
# scale data between -1 & 1
scaler = MinMaxScaler()
scale_col = ['count', 'avg_salesPrice', 'customerRating', 'retail_price_diff', 
             'brand_avg_avg_salesPrice', 'brand_sum_count', 'brand_avg_count', 
             'category1_avg_avg_salesPrice', 'category1_sum_count', 'category1_avg_count', 
             'category3_avg_avg_salesPrice', 'category3_sum_count', 'category3_avg_count']
for c in scale_col:
    scaler.fit(df_m[[c]])
    df_m[c] = pd.DataFrame(scaler.transform(df_m[[c]]))[0]

In [48]:
df_m[df_m['itemID']==10]

Unnamed: 0,index,count,avg_salesPrice,predicted_promotion,weekGroup,itemID,customerRating,manufacturer,brand,category1,category2,category3,recommendedRetailPrice,dateFirstSell,soldWeekly,recentlySold,retail_price_diff,price_fluctuate,brand_avg_avg_salesPrice,brand_sum_count,brand_avg_count,category1_avg_avg_salesPrice,category1_sum_count,category1_avg_count,category3_avg_avg_salesPrice,category3_sum_count,category3_avg_count
80,19048,0.074135,0.002621,1.0,3,10,1.0,4,0,0,0,0,19.43,2,False,2.0,0.26454,0,0.024362,0.331369,0.022951,0.318662,0.149213,0.060895,0.124626,0.13021,0.031029
81,19049,0.000207,0.00254,0.0,4,10,1.0,4,0,0,0,0,19.43,2,False,2.0,0.256203,-1,0.02454,0.800336,0.041607,0.321737,0.170824,0.058223,0.11417,0.094019,0.020188
82,19050,0.013046,0.00254,0.0,5,10,1.0,4,0,0,0,0,19.43,2,False,2.0,0.256203,0,0.024154,1.0,0.042996,0.30654,0.169043,0.051104,0.125618,0.093894,0.017998
83,19051,0.000207,0.00254,0.0,6,10,1.0,4,0,0,0,0,19.43,2,False,2.0,0.256203,0,0.021171,0.630895,0.025663,0.289077,0.161078,0.045819,0.118379,0.059407,0.011039
84,19052,0.000207,0.00254,0.0,7,10,1.0,4,0,0,0,0,19.43,2,False,2.0,0.256203,0,0.02102,0.709005,0.02675,0.297974,0.143545,0.03897,0.120327,0.102883,0.018121
85,19053,0.000828,0.00254,0.0,8,10,1.0,4,0,0,0,0,19.43,2,False,2.0,0.256203,0,0.024586,0.920787,0.032781,0.309575,0.308401,0.078139,0.153029,0.231326,0.035142
86,19054,0.0,0.00254,0.0,9,10,1.0,4,0,0,0,0,19.43,2,False,2.0,0.256203,0,0.018222,0.784915,0.029181,0.273714,0.173061,0.047378,0.091541,0.105165,0.018685
87,19055,0.000414,0.00254,0.0,10,10,1.0,4,0,0,0,0,19.43,2,False,2.0,0.256203,0,0.019167,0.754757,0.024286,0.296262,0.139915,0.035374,0.093962,0.071991,0.012607
88,19056,0.000414,0.00254,0.0,11,10,1.0,4,0,0,0,0,19.43,2,False,2.0,0.256203,0,0.018622,0.042251,0.00136,0.289999,0.015902,0.00116,0.092139,0.009943,0.001741


In [49]:
lag_variables  = ['count','avg_salesPrice',
                  'brand_avg_avg_salesPrice','brand_sum_count','brand_avg_count',
#                   'manufacturer_avg_avg_salesPrice','manufacturer_sum_count','manufacturer_avg_count',
                  'category1_avg_avg_salesPrice','category1_sum_count','category1_avg_count',
                  'category3_avg_avg_salesPrice','category3_sum_count','category3_avg_count']
lags = [1 ,2 ,3, 4, 5]
# we will keep the results in thsi dataframe
sales_means = df_m.copy()
for lag in lags:
    sales_new_df = df_m.copy()
    sales_new_df.weekGroup+=lag
    # subset only the lag variables we want
    sales_new_df = sales_new_df[['weekGroup','itemID']+lag_variables]
    sales_new_df.columns = ['weekGroup','itemID']+ [lag_feat+'_lag_'+str(lag) for lag_feat in lag_variables]
    # join with date_block_num,shop_id and item_id
    sales_means = pd.merge(sales_means, sales_new_df,on=['weekGroup','itemID'] ,how='left')

In [50]:
for feat in sales_means.columns:
    if 'count' in feat:
        sales_means[feat]=sales_means[feat].fillna(0)
    elif 'salesPrice' or 'diff' in feat:
#         sales_means[feat]=sales_means[feat].fillna(sales_means[feat].median())
        sales_means[feat]=sales_means[feat].fillna(value=sales_means.groupby('itemID')[feat].transform('mean'))

In [51]:
sales_means[sales_means['itemID']==10]

Unnamed: 0,index,count,avg_salesPrice,predicted_promotion,weekGroup,itemID,customerRating,manufacturer,brand,category1,category2,category3,recommendedRetailPrice,dateFirstSell,soldWeekly,recentlySold,retail_price_diff,price_fluctuate,brand_avg_avg_salesPrice,brand_sum_count,brand_avg_count,category1_avg_avg_salesPrice,category1_sum_count,category1_avg_count,category3_avg_avg_salesPrice,category3_sum_count,category3_avg_count,count_lag_1,avg_salesPrice_lag_1,brand_avg_avg_salesPrice_lag_1,brand_sum_count_lag_1,brand_avg_count_lag_1,category1_avg_avg_salesPrice_lag_1,category1_sum_count_lag_1,category1_avg_count_lag_1,category3_avg_avg_salesPrice_lag_1,category3_sum_count_lag_1,category3_avg_count_lag_1,count_lag_2,avg_salesPrice_lag_2,brand_avg_avg_salesPrice_lag_2,brand_sum_count_lag_2,brand_avg_count_lag_2,category1_avg_avg_salesPrice_lag_2,category1_sum_count_lag_2,category1_avg_count_lag_2,category3_avg_avg_salesPrice_lag_2,category3_sum_count_lag_2,category3_avg_count_lag_2,count_lag_3,avg_salesPrice_lag_3,brand_avg_avg_salesPrice_lag_3,brand_sum_count_lag_3,brand_avg_count_lag_3,category1_avg_avg_salesPrice_lag_3,category1_sum_count_lag_3,category1_avg_count_lag_3,category3_avg_avg_salesPrice_lag_3,category3_sum_count_lag_3,category3_avg_count_lag_3,count_lag_4,avg_salesPrice_lag_4,brand_avg_avg_salesPrice_lag_4,brand_sum_count_lag_4,brand_avg_count_lag_4,category1_avg_avg_salesPrice_lag_4,category1_sum_count_lag_4,category1_avg_count_lag_4,category3_avg_avg_salesPrice_lag_4,category3_sum_count_lag_4,category3_avg_count_lag_4,count_lag_5,avg_salesPrice_lag_5,brand_avg_avg_salesPrice_lag_5,brand_sum_count_lag_5,brand_avg_count_lag_5,category1_avg_avg_salesPrice_lag_5,category1_sum_count_lag_5,category1_avg_count_lag_5,category3_avg_avg_salesPrice_lag_5,category3_sum_count_lag_5,category3_avg_count_lag_5
80,19048,0.074135,0.002621,1.0,3,10,1.0,4,0,0,0,0,19.43,2,False,2.0,0.26454,0,0.024362,0.331369,0.022951,0.318662,0.149213,0.060895,0.124626,0.13021,0.031029,0.0,0.00255,0.022153,0.0,0.0,0.301692,0.0,0.0,0.117706,0.0,0.0,0.0,0.002551,0.022579,0.0,0.0,0.302468,0.0,0.0,0.121098,0.0,0.0,0.0,0.002553,0.023305,0.0,0.0,0.307261,0.0,0.0,0.126025,0.0,0.0,0.0,0.002556,0.023049,0.0,0.0,0.306798,0.0,0.0,0.120624,0.0,0.0,0.0,0.00256,0.023557,0.0,0.0,0.309004,0.0,0.0,0.120698,0.0,0.0
81,19049,0.000207,0.00254,0.0,4,10,1.0,4,0,0,0,0,19.43,2,False,2.0,0.256203,-1,0.02454,0.800336,0.041607,0.321737,0.170824,0.058223,0.11417,0.094019,0.020188,0.074135,0.002621,0.024362,0.331369,0.022951,0.318662,0.149213,0.060895,0.124626,0.13021,0.031029,0.0,0.002551,0.022579,0.0,0.0,0.302468,0.0,0.0,0.121098,0.0,0.0,0.0,0.002553,0.023305,0.0,0.0,0.307261,0.0,0.0,0.126025,0.0,0.0,0.0,0.002556,0.023049,0.0,0.0,0.306798,0.0,0.0,0.120624,0.0,0.0,0.0,0.00256,0.023557,0.0,0.0,0.309004,0.0,0.0,0.120698,0.0,0.0
82,19050,0.013046,0.00254,0.0,5,10,1.0,4,0,0,0,0,19.43,2,False,2.0,0.256203,0,0.024154,1.0,0.042996,0.30654,0.169043,0.051104,0.125618,0.093894,0.017998,0.000207,0.00254,0.02454,0.800336,0.041607,0.321737,0.170824,0.058223,0.11417,0.094019,0.020188,0.074135,0.002621,0.024362,0.331369,0.022951,0.318662,0.149213,0.060895,0.124626,0.13021,0.031029,0.0,0.002553,0.023305,0.0,0.0,0.307261,0.0,0.0,0.126025,0.0,0.0,0.0,0.002556,0.023049,0.0,0.0,0.306798,0.0,0.0,0.120624,0.0,0.0,0.0,0.00256,0.023557,0.0,0.0,0.309004,0.0,0.0,0.120698,0.0,0.0
83,19051,0.000207,0.00254,0.0,6,10,1.0,4,0,0,0,0,19.43,2,False,2.0,0.256203,0,0.021171,0.630895,0.025663,0.289077,0.161078,0.045819,0.118379,0.059407,0.011039,0.013046,0.00254,0.024154,1.0,0.042996,0.30654,0.169043,0.051104,0.125618,0.093894,0.017998,0.000207,0.00254,0.02454,0.800336,0.041607,0.321737,0.170824,0.058223,0.11417,0.094019,0.020188,0.074135,0.002621,0.024362,0.331369,0.022951,0.318662,0.149213,0.060895,0.124626,0.13021,0.031029,0.0,0.002556,0.023049,0.0,0.0,0.306798,0.0,0.0,0.120624,0.0,0.0,0.0,0.00256,0.023557,0.0,0.0,0.309004,0.0,0.0,0.120698,0.0,0.0
84,19052,0.000207,0.00254,0.0,7,10,1.0,4,0,0,0,0,19.43,2,False,2.0,0.256203,0,0.02102,0.709005,0.02675,0.297974,0.143545,0.03897,0.120327,0.102883,0.018121,0.000207,0.00254,0.021171,0.630895,0.025663,0.289077,0.161078,0.045819,0.118379,0.059407,0.011039,0.013046,0.00254,0.024154,1.0,0.042996,0.30654,0.169043,0.051104,0.125618,0.093894,0.017998,0.000207,0.00254,0.02454,0.800336,0.041607,0.321737,0.170824,0.058223,0.11417,0.094019,0.020188,0.074135,0.002621,0.024362,0.331369,0.022951,0.318662,0.149213,0.060895,0.124626,0.13021,0.031029,0.0,0.00256,0.023557,0.0,0.0,0.309004,0.0,0.0,0.120698,0.0,0.0
85,19053,0.000828,0.00254,0.0,8,10,1.0,4,0,0,0,0,19.43,2,False,2.0,0.256203,0,0.024586,0.920787,0.032781,0.309575,0.308401,0.078139,0.153029,0.231326,0.035142,0.000207,0.00254,0.02102,0.709005,0.02675,0.297974,0.143545,0.03897,0.120327,0.102883,0.018121,0.000207,0.00254,0.021171,0.630895,0.025663,0.289077,0.161078,0.045819,0.118379,0.059407,0.011039,0.013046,0.00254,0.024154,1.0,0.042996,0.30654,0.169043,0.051104,0.125618,0.093894,0.017998,0.000207,0.00254,0.02454,0.800336,0.041607,0.321737,0.170824,0.058223,0.11417,0.094019,0.020188,0.074135,0.002621,0.024362,0.331369,0.022951,0.318662,0.149213,0.060895,0.124626,0.13021,0.031029
86,19054,0.0,0.00254,0.0,9,10,1.0,4,0,0,0,0,19.43,2,False,2.0,0.256203,0,0.018222,0.784915,0.029181,0.273714,0.173061,0.047378,0.091541,0.105165,0.018685,0.000828,0.00254,0.024586,0.920787,0.032781,0.309575,0.308401,0.078139,0.153029,0.231326,0.035142,0.000207,0.00254,0.02102,0.709005,0.02675,0.297974,0.143545,0.03897,0.120327,0.102883,0.018121,0.000207,0.00254,0.021171,0.630895,0.025663,0.289077,0.161078,0.045819,0.118379,0.059407,0.011039,0.013046,0.00254,0.024154,1.0,0.042996,0.30654,0.169043,0.051104,0.125618,0.093894,0.017998,0.000207,0.00254,0.02454,0.800336,0.041607,0.321737,0.170824,0.058223,0.11417,0.094019,0.020188
87,19055,0.000414,0.00254,0.0,10,10,1.0,4,0,0,0,0,19.43,2,False,2.0,0.256203,0,0.019167,0.754757,0.024286,0.296262,0.139915,0.035374,0.093962,0.071991,0.012607,0.0,0.00254,0.018222,0.784915,0.029181,0.273714,0.173061,0.047378,0.091541,0.105165,0.018685,0.000828,0.00254,0.024586,0.920787,0.032781,0.309575,0.308401,0.078139,0.153029,0.231326,0.035142,0.000207,0.00254,0.02102,0.709005,0.02675,0.297974,0.143545,0.03897,0.120327,0.102883,0.018121,0.000207,0.00254,0.021171,0.630895,0.025663,0.289077,0.161078,0.045819,0.118379,0.059407,0.011039,0.013046,0.00254,0.024154,1.0,0.042996,0.30654,0.169043,0.051104,0.125618,0.093894,0.017998
88,19056,0.000414,0.00254,0.0,11,10,1.0,4,0,0,0,0,19.43,2,False,2.0,0.256203,0,0.018622,0.042251,0.00136,0.289999,0.015902,0.00116,0.092139,0.009943,0.001741,0.000414,0.00254,0.019167,0.754757,0.024286,0.296262,0.139915,0.035374,0.093962,0.071991,0.012607,0.0,0.00254,0.018222,0.784915,0.029181,0.273714,0.173061,0.047378,0.091541,0.105165,0.018685,0.000828,0.00254,0.024586,0.920787,0.032781,0.309575,0.308401,0.078139,0.153029,0.231326,0.035142,0.000207,0.00254,0.02102,0.709005,0.02675,0.297974,0.143545,0.03897,0.120327,0.102883,0.018121,0.000207,0.00254,0.021171,0.630895,0.025663,0.289077,0.161078,0.045819,0.118379,0.059407,0.011039


In [52]:
cols_to_drop = lag_variables[1:] + ['recommendedRetailPrice', 'category2', 'retail_price_diff', 'price_fluctuate']
cols_to_keep = [x for x in list(sales_means.columns) if x not in cols_to_drop]

In [53]:
# for col in cols_to_drop:
#     del sales_means[col]
# sales_means

In [54]:
# sales_means = sales_means[sales_means['WeekGroup']>2]

In [55]:
sales_means = sales_means[cols_to_keep]
sales_means = sales_means.drop(columns=["index"])#,"predicted_promotion"])

In [56]:
sales_means.groupby("weekGroup").count()

Unnamed: 0_level_0,count,predicted_promotion,itemID,customerRating,manufacturer,brand,category1,category3,dateFirstSell,soldWeekly,recentlySold,count_lag_1,avg_salesPrice_lag_1,brand_avg_avg_salesPrice_lag_1,brand_sum_count_lag_1,brand_avg_count_lag_1,category1_avg_avg_salesPrice_lag_1,category1_sum_count_lag_1,category1_avg_count_lag_1,category3_avg_avg_salesPrice_lag_1,category3_sum_count_lag_1,category3_avg_count_lag_1,count_lag_2,avg_salesPrice_lag_2,brand_avg_avg_salesPrice_lag_2,brand_sum_count_lag_2,brand_avg_count_lag_2,category1_avg_avg_salesPrice_lag_2,category1_sum_count_lag_2,category1_avg_count_lag_2,category3_avg_avg_salesPrice_lag_2,category3_sum_count_lag_2,category3_avg_count_lag_2,count_lag_3,avg_salesPrice_lag_3,brand_avg_avg_salesPrice_lag_3,brand_sum_count_lag_3,brand_avg_count_lag_3,category1_avg_avg_salesPrice_lag_3,category1_sum_count_lag_3,category1_avg_count_lag_3,category3_avg_avg_salesPrice_lag_3,category3_sum_count_lag_3,category3_avg_count_lag_3,count_lag_4,avg_salesPrice_lag_4,brand_avg_avg_salesPrice_lag_4,brand_sum_count_lag_4,brand_avg_count_lag_4,category1_avg_avg_salesPrice_lag_4,category1_sum_count_lag_4,category1_avg_count_lag_4,category3_avg_avg_salesPrice_lag_4,category3_sum_count_lag_4,category3_avg_count_lag_4,count_lag_5,avg_salesPrice_lag_5,brand_avg_avg_salesPrice_lag_5,brand_sum_count_lag_5,brand_avg_count_lag_5,category1_avg_avg_salesPrice_lag_5,category1_sum_count_lag_5,category1_avg_count_lag_5,category3_avg_avg_salesPrice_lag_5,category3_sum_count_lag_5,category3_avg_count_lag_5
weekGroup,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1
1,1664,1664,1664,1664,1664,1664,1664,1664,1664,1664,1664,1664,1578,1578,1664,1664,1578,1664,1664,1578,1664,1664,1664,1545,1545,1664,1664,1545,1664,1664,1545,1664,1664,1664,1493,1493,1664,1664,1493,1664,1664,1493,1664,1664,1664,1447,1447,1664,1664,1447,1664,1664,1447,1664,1664,1664,1390,1390,1664,1664,1390,1664,1664,1390,1664,1664
2,2139,2139,2139,2139,2139,2139,2139,2139,2139,2139,2139,2139,2082,2082,2139,2139,2082,2139,2139,2082,2139,2139,2139,2034,2034,2139,2139,2034,2139,2139,2034,2139,2139,2139,1960,1960,2139,2139,1960,2139,2139,1960,2139,2139,2139,1889,1889,2139,2139,1889,2139,2139,1889,2139,2139,2139,1794,1794,2139,2139,1794,2139,2139,1794,2139,2139
3,2656,2656,2656,2656,2656,2656,2656,2656,2656,2656,2656,2656,2612,2612,2656,2656,2612,2656,2656,2612,2656,2656,2656,2549,2549,2656,2656,2549,2656,2656,2549,2656,2656,2656,2439,2439,2656,2656,2439,2656,2656,2439,2656,2656,2656,2326,2326,2656,2656,2326,2656,2656,2326,2656,2656,2656,2173,2173,2656,2656,2173,2656,2656,2173,2656,2656
4,3366,3366,3366,3366,3366,3366,3366,3366,3366,3366,3366,3366,3254,3254,3366,3366,3254,3366,3366,3254,3366,3366,3366,3129,3129,3366,3366,3129,3366,3366,3129,3366,3366,3366,3002,3002,3366,3366,3002,3366,3366,3002,3366,3366,3366,2815,2815,3366,3366,2815,3366,3366,2815,3366,3366,3366,2600,2600,3366,3366,2600,3366,3366,2600,3366,3366
5,4076,4076,4076,4076,4076,4076,4076,4076,4076,4076,4076,4076,3905,3905,4076,4076,3905,4076,4076,3905,4076,4076,4076,3755,3755,4076,4076,3755,4076,4076,3755,4076,4076,4076,3564,3564,4076,4076,3564,4076,4076,3564,4076,4076,4076,3332,3332,4076,4076,3332,4076,4076,3332,4076,4076,4076,3117,3117,4076,4076,3117,4076,4076,3117,4076,4076
6,4454,4454,4454,4454,4454,4454,4454,4454,4454,4454,4454,4454,4306,4306,4454,4454,4306,4454,4454,4306,4454,4454,4454,4144,4144,4454,4454,4144,4454,4454,4144,4454,4454,4454,3924,3924,4454,4454,3924,4454,4454,3924,4454,4454,4454,3717,3717,4454,4454,3717,4454,4454,3717,4454,4454,4454,3559,3559,4454,4454,3559,4454,4454,3559,4454,4454
7,4674,4674,4674,4674,4674,4674,4674,4674,4674,4674,4674,4674,4530,4530,4674,4674,4530,4674,4674,4530,4674,4674,4674,4360,4360,4674,4674,4360,4674,4674,4360,4674,4674,4674,4209,4209,4674,4674,4209,4674,4674,4209,4674,4674,4674,4044,4044,4674,4674,4044,4674,4674,4044,4674,4674,4674,3468,3468,4674,4674,3468,4674,4674,3468,4674,4674
8,4887,4887,4887,4887,4887,4887,4887,4887,4887,4887,4887,4887,4728,4728,4887,4887,4728,4887,4887,4728,4887,4887,4887,4647,4647,4887,4887,4647,4887,4887,4647,4887,4887,4887,4582,4582,4887,4887,4582,4887,4887,4582,4887,4887,4887,3854,3854,4887,4887,3854,4887,4887,3854,4887,4887,4887,3336,3336,4887,4887,3336,4887,4887,3336,4887,4887
9,4866,4866,4866,4866,4866,4866,4866,4866,4866,4866,4866,4866,4866,4866,4866,4866,4866,4866,4866,4866,4866,4866,4866,4866,4866,4866,4866,4866,4866,4866,4866,4866,4866,4866,4188,4188,4866,4866,4188,4866,4866,4188,4866,4866,4866,3551,3551,4866,4866,3551,4866,4866,3551,4866,4866,4866,3095,3095,4866,4866,3095,4866,4866,3095,4866,4866
10,5670,5670,5670,5670,5670,5670,5670,5670,5670,5670,5670,5670,5670,5670,5670,5670,5670,5670,5670,5670,5670,5670,5670,4866,4866,5670,5670,4866,5670,5670,4866,5670,5670,5670,4188,4188,5670,5670,4188,5670,5670,4188,5670,5670,5670,3551,3551,5670,5670,3551,5670,5670,3551,5670,5670,5670,3095,3095,5670,5670,3095,5670,5670,3095,5670,5670


In [58]:
sales_means.drop('predicted_promotion', inplace=True, axis=1)

In [59]:
sales_means.to_csv('./data/sales_means.csv', index=False, sep='|')

In [60]:
sales_means.head()

Unnamed: 0,count,weekGroup,itemID,customerRating,manufacturer,brand,category1,category3,dateFirstSell,soldWeekly,recentlySold,count_lag_1,avg_salesPrice_lag_1,brand_avg_avg_salesPrice_lag_1,brand_sum_count_lag_1,brand_avg_count_lag_1,category1_avg_avg_salesPrice_lag_1,category1_sum_count_lag_1,category1_avg_count_lag_1,category3_avg_avg_salesPrice_lag_1,category3_sum_count_lag_1,category3_avg_count_lag_1,count_lag_2,avg_salesPrice_lag_2,brand_avg_avg_salesPrice_lag_2,brand_sum_count_lag_2,brand_avg_count_lag_2,category1_avg_avg_salesPrice_lag_2,category1_sum_count_lag_2,category1_avg_count_lag_2,category3_avg_avg_salesPrice_lag_2,category3_sum_count_lag_2,category3_avg_count_lag_2,count_lag_3,avg_salesPrice_lag_3,brand_avg_avg_salesPrice_lag_3,brand_sum_count_lag_3,brand_avg_count_lag_3,category1_avg_avg_salesPrice_lag_3,category1_sum_count_lag_3,category1_avg_count_lag_3,category3_avg_avg_salesPrice_lag_3,category3_sum_count_lag_3,category3_avg_count_lag_3,count_lag_4,avg_salesPrice_lag_4,brand_avg_avg_salesPrice_lag_4,brand_sum_count_lag_4,brand_avg_count_lag_4,category1_avg_avg_salesPrice_lag_4,category1_sum_count_lag_4,category1_avg_count_lag_4,category3_avg_avg_salesPrice_lag_4,category3_sum_count_lag_4,category3_avg_count_lag_4,count_lag_5,avg_salesPrice_lag_5,brand_avg_avg_salesPrice_lag_5,brand_sum_count_lag_5,brand_avg_count_lag_5,category1_avg_avg_salesPrice_lag_5,category1_sum_count_lag_5,category1_avg_count_lag_5,category3_avg_avg_salesPrice_lag_5,category3_sum_count_lag_5,category3_avg_count_lag_5
0,0.000414,1,1,0.876,1,0,0,0,1,False,9.0,0.0,0.000296,0.021623,0.0,0.0,0.292315,0.0,0.0,0.110255,0.0,0.0,0.0,0.000296,0.021895,0.0,0.0,0.291876,0.0,0.0,0.112066,0.0,0.0,0.0,0.000296,0.022355,0.0,0.0,0.294147,0.0,0.0,0.114631,0.0,0.0,0.0,0.000296,0.022036,0.0,0.0,0.291943,0.0,0.0,0.109146,0.0,0.0,0.0,0.000296,0.022205,0.0,0.0,0.290938,0.0,0.0,0.107282,0.0,0.0
1,0.064817,2,1,0.876,1,0,0,0,1,False,9.0,0.000414,0.000296,0.0196,0.335038,0.033847,0.267529,0.181763,0.120276,0.087664,0.096864,0.040875,0.0,0.000296,0.021895,0.0,0.0,0.291876,0.0,0.0,0.112066,0.0,0.0,0.0,0.000296,0.022355,0.0,0.0,0.294147,0.0,0.0,0.114631,0.0,0.0,0.0,0.000296,0.022036,0.0,0.0,0.291943,0.0,0.0,0.109146,0.0,0.0,0.0,0.000296,0.022205,0.0,0.0,0.290938,0.0,0.0,0.107282,0.0,0.0
2,0.007248,3,1,0.876,1,0,0,0,1,False,9.0,0.064817,0.000296,0.019403,0.351496,0.029485,0.242081,0.12098,0.06205,0.073238,0.117719,0.035412,0.000414,0.000296,0.0196,0.335038,0.033847,0.267529,0.181763,0.120276,0.087664,0.096864,0.040875,0.0,0.000296,0.022355,0.0,0.0,0.294147,0.0,0.0,0.114631,0.0,0.0,0.0,0.000296,0.022036,0.0,0.0,0.291943,0.0,0.0,0.109146,0.0,0.0,0.0,0.000296,0.022205,0.0,0.0,0.290938,0.0,0.0,0.107282,0.0,0.0
3,0.000621,4,1,0.876,1,0,0,0,1,False,9.0,0.007248,0.000296,0.024362,0.331369,0.022951,0.318662,0.149213,0.060895,0.124626,0.13021,0.031029,0.064817,0.000296,0.019403,0.351496,0.029485,0.242081,0.12098,0.06205,0.073238,0.117719,0.035412,0.000414,0.000296,0.0196,0.335038,0.033847,0.267529,0.181763,0.120276,0.087664,0.096864,0.040875,0.0,0.000296,0.022036,0.0,0.0,0.291943,0.0,0.0,0.109146,0.0,0.0,0.0,0.000296,0.022205,0.0,0.0,0.290938,0.0,0.0,0.107282,0.0,0.0
4,0.000207,5,1,0.876,1,0,0,0,1,False,9.0,0.000621,0.000296,0.02454,0.800336,0.041607,0.321737,0.170824,0.058223,0.11417,0.094019,0.020188,0.007248,0.000296,0.024362,0.331369,0.022951,0.318662,0.149213,0.060895,0.124626,0.13021,0.031029,0.064817,0.000296,0.019403,0.351496,0.029485,0.242081,0.12098,0.06205,0.073238,0.117719,0.035412,0.000414,0.000296,0.0196,0.335038,0.033847,0.267529,0.181763,0.120276,0.087664,0.096864,0.040875,0.0,0.000296,0.022205,0.0,0.0,0.290938,0.0,0.0,0.107282,0.0,0.0


In [61]:
joblib.dump(scaler, 'scaler.pkl')

['scaler.pkl']