In [23]:
# Import libraries
import numpy as np
import pandas as pd

import os

#from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import TruncatedSVD
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import Lasso, Ridge

import string

import datetime

import matplotlib.pyplot as plt
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

In [2]:
# Constants goes here
NMONTHS = 24
pd.set_option('max_columns', 400)

In [3]:
# Import data
DATA_FOLDER = 'Data/'

transactions    = pd.read_csv(os.path.join(DATA_FOLDER, 'sales_train.csv.gz'))
items           = pd.read_csv(os.path.join(DATA_FOLDER, 'items.csv'))
item_categories = pd.read_csv(os.path.join(DATA_FOLDER, 'item_categories.csv'))
shops           = pd.read_csv(os.path.join(DATA_FOLDER, 'shops.csv'))
test            = pd.read_csv(os.path.join(DATA_FOLDER, 'test.csv.gz'))

In [4]:
# For decrease memory consumption
def downcast_dtypes(df):
    '''
    Changes column types in the dataframe: 

            `float64` type to `float32`
            `int64`   type to `int32`
    '''
    
    # Select columns to downcast
    float_cols = [c for c in df if df[c].dtype == "float64"]
    int_cols =   [c for c in df if df[c].dtype == "int64"]
    
    # Downcast
    df[float_cols] = df[float_cols].astype(np.float32)
    df[int_cols]   = df[int_cols].astype(np.int32)
    
    return df

In [5]:
transactions = downcast_dtypes(transactions)
items = downcast_dtypes(items)
item_categories = downcast_dtypes(item_categories)
shops = downcast_dtypes(shops)
test = downcast_dtypes(test)

In [6]:
items.head()

Unnamed: 0,item_name,item_id,item_category_id
0,! ВО ВЛАСТИ НАВАЖДЕНИЯ (ПЛАСТ.) D,0,40
1,!ABBYY FineReader 12 Professional Edition Full...,1,76
2,***В ЛУЧАХ СЛАВЫ (UNV) D,2,40
3,***ГОЛУБАЯ ВОЛНА (Univ) D,3,40
4,***КОРОБКА (СТЕКЛО) D,4,40


In [7]:
# New function to construct base dataframes for train, validation and test
def construct_base_train_val_test(months=NMONTHS):
    """
    months - number of last months to construct base train/test matrix dataset
    This function construct grouped by months sales information for train, validation and test
    """
    # Group transactions by month, shop and item (add price mean here)
    grp_trans = transactions.groupby(['date_block_num', 'shop_id', 'item_id'], as_index=False).agg({'item_cnt_day': 'sum'})
    
    # Split dataset by months
    lst_grp_trans = []
    max_month = grp_trans.date_block_num.max() + 1
    for month in range(max_month-months-2, max_month): # Get only last year data
        month_df = grp_trans[grp_trans.date_block_num == month]
        # Rename target column and price column
        month_df.rename(index=str, columns={"item_cnt_day": "item_cnt_day_" + str(month)}, inplace=True) 
        lst_grp_trans.append(month_df)
        
        
    # Join data with test dataset
    all_data = test[['shop_id', 'item_id']].set_index(['shop_id', 'item_id'])
    for month in range(max_month-months-2, max_month): # [8..34)
        all_data = all_data.join(lst_grp_trans[month+months-max_month+2][['shop_id', 'item_id', 'item_cnt_day_'+str(month)]
                                                                        ].set_index(['shop_id', 'item_id']))
        
    #print(all_data.head())
    
    # Replace NaNs by 0
    all_data.fillna(0, inplace=True)
    
    # Get train target and validation target column
    val_target = all_data.item_cnt_day_33
    train_target = all_data.item_cnt_day_32
    
    # Calculate x_train, x_val, x_test
    x_train = all_data.copy()
    x_train.drop(['item_cnt_day_32', 'item_cnt_day_33'], axis=1, inplace=True)
    
    x_val = all_data.copy()
    first_col = str(max_month-months-2)
    x_val.drop(['item_cnt_day_' + first_col, 'item_cnt_day_33',], 
               axis=1, inplace=True)
    
    x_test = all_data.copy()
    second_col = str(max_month-months-1)
    x_test.drop(['item_cnt_day_' + first_col, 'item_cnt_day_' + second_col], axis=1, inplace=True)
    
    return x_train, x_val, x_test, train_target, val_target

In [8]:
%%time
tr_train, tr_val, tr_test, tr_target, val_target = construct_base_train_val_test()
print(tr_train.shape, tr_val.shape, tr_test.shape)

(214200, 24) (214200, 24) (214200, 24)
Wall time: 9.08 s


In [9]:
# TF-IDF transformation to the item names
tfidf = TfidfVectorizer()
item_name_matrix = tfidf.fit_transform(items.item_name)
item_name_matrix.shape
# (22170, 18222)

(22170, 18222)

In [10]:
%%time
tsvd = TruncatedSVD(n_components=300, random_state=26)
tsvd_item_matrix = tsvd.fit_transform(item_name_matrix)

Wall time: 6.43 s


In [11]:
# Join with the items dataframe
items = items.join(pd.DataFrame(tsvd_item_matrix, index=items.index))
items.head()

Unnamed: 0,item_name,item_id,item_category_id,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,256,257,258,259,260,261,262,263,264,265,266,267,268,269,270,271,272,273,274,275,276,277,278,279,280,281,282,283,284,285,286,287,288,289,290,291,292,293,294,295,296,297,298,299
0,! ВО ВЛАСТИ НАВАЖДЕНИЯ (ПЛАСТ.) D,0,40,0.000641,0.00015,0.0006665875,-9.6e-05,0.000303,-0.000537,-0.00026,-0.000361,0.001628417,9.9e-05,5.6e-05,0.00043,0.001018,0.001085,-0.000322,0.000328,0.00013,0.000627,-0.000143,-0.000688,0.000116,0.000966,0.000732,0.000976,0.000311,-9.6e-05,-3.1e-05,0.000353,-0.000332,8.5e-05,-0.000701,0.0021,-0.000286,0.000899,0.000522,0.000392,-0.000219,0.001390099,-0.001162,-0.00048,0.000434,0.000671,-0.000319,0.001602,0.000208,-0.000112,0.000825,-0.000135,4.6e-05,-0.00013,-0.000582,-0.000442,-0.001379,0.000992,3e-05,0.000997,0.000102,0.001513968,-0.000998,-0.000825,-0.001091,-0.00024,0.002661,-0.000658,0.001752,-0.00032,-0.002273,-0.001298,-0.000505,-0.000826,-0.000964,-0.002076,0.000972,0.001193,0.000881,-0.001403,0.000341,0.000471,0.000686,-0.000117,0.001055,0.002024,-0.001867,9.3e-05,0.000649,-0.000272,-0.001606,-0.000111,-0.000499,-0.001416,-0.000739,0.000289,-0.0021,0.001139,-0.000337,-0.000141,-0.000469,-0.000754,0.002121,0.001653,0.002613,0.001338,-0.002171,-0.002578,0.002437,-0.00218,-0.000174,-0.001672,-0.000575,-0.000135,0.001852,0.000132,0.002096,0.000193,-0.00135,0.001015,0.001434,0.000269,-0.000181,-0.002386,0.002269,-0.001062,0.001876,-0.000378,0.000191,0.000332,0.000331,-0.001211,0.002408,0.001586,-0.000629,0.001917,0.002461,-0.001492,-0.002915,0.001704,-0.002238,-0.001447,0.000138,0.002142,0.001178,0.000871,0.001367,0.002531,0.000747,7e-06,0.001785,0.002238,0.000483,-0.000676,0.000595,0.00147,0.001023,0.00078,-0.002417,0.000295,-0.000854,0.003376,-0.001158,0.002055,-0.002853,0.000794,0.000801,-0.002082,0.002438,0.000499,0.003421,0.000175,-0.002071,-0.001182,-0.000732,0.001006,-0.000963,-0.000943,0.001838,-0.002317,-0.000641,-0.00447,-0.002378,-0.000183,-7e-05,-0.002607,-0.005434,0.000276,0.00532,-0.001488,0.000495,0.004292,-0.001266,-0.000966,0.002701,-0.007133,-0.003252,-0.001798,0.005546,-0.002238,-0.003755,0.006275,0.001321,-0.001671,0.004613,-0.004375,-0.008932,0.000599,-0.002665,0.0039,-0.002043,0.002998,0.006054,-0.001779,0.004422,8.9e-05,-0.00142,0.002192,0.004981,6.7e-05,0.001476,-0.00459,-0.000585,0.000607,-0.001207,0.001108,-0.000548,0.001221,-0.0017,-0.000938,-0.002201,0.002105,-0.000432,-0.001797,0.000828,-0.000966,0.001502,-0.00213,-8.2e-05,-0.002346,-0.000823,-0.000978,-0.002553,-0.00061,2.2e-05,0.00098,0.002423,0.000793,-0.000691,-0.000927,-0.000184,0.002126,0.000368,0.000145,-0.000311,-0.00138,-0.00097,0.002174,-0.000228,-2.4e-05,-0.000513,0.000714,-0.000423,-0.001445,0.000936,0.000292,0.001015,-0.002972,-0.004625,-0.003633,-0.000317,-0.001973,0.000128,7.7e-05,-0.001764,-0.001665,-0.00288,-0.000664,0.001539,-0.003229,0.002571,0.000372,-0.002971,-0.00264,0.001137,9e-05,-0.000583,-0.000975,3.7e-05,0.001724,-0.00301,0.001215,-0.001037,0.000481,0.002997,0.001726,0.002455,-0.001184,-0.000464,0.001044,-0.002293,-0.000827,0.00219,-0.00076
1,!ABBYY FineReader 12 Professional Edition Full...,1,76,0.298384,-0.01151,-0.04987161,-0.098047,-0.003741,0.030858,0.004195,0.008521,-0.08459343,-0.010519,0.039641,0.027822,-0.090619,0.014181,0.015476,0.097297,0.076452,0.010247,0.005338,-0.045015,0.027524,-0.039835,0.031161,-0.007417,-0.010267,-0.019308,0.017142,0.03858,0.002962,-0.014835,-0.032812,-0.021327,0.07645,-0.043525,0.016501,-0.011996,-0.028111,0.0317687,-0.023226,-0.000706,-0.017068,-0.004445,-0.038585,0.067433,-0.037746,-0.002445,-0.000951,-0.014575,0.010652,0.008106,0.098434,0.013538,0.043443,-0.067499,0.029519,0.010611,0.007852,-0.004200715,-0.024482,0.035577,0.005648,0.02244,-0.057402,-0.000353,0.01277,0.030241,-0.020691,0.107616,-0.038318,0.005585,0.00938,0.002466,0.024298,-0.089214,0.026898,0.051091,-0.054318,0.013154,0.01658,-0.001817,0.03017,0.04223,0.046394,0.005172,-0.04552,0.011618,0.059522,0.038868,0.001624,-0.015064,-0.008912,0.007991,0.113633,-0.041706,0.04356,-0.020298,0.066936,0.02023,0.077463,0.029236,0.018998,0.065137,0.015633,-0.030162,-0.062857,-0.035873,0.14389,0.115665,-0.078885,-0.090478,0.087421,0.033222,0.176106,-0.143539,0.042416,-0.139721,-0.03964,0.096817,-0.027358,-0.172981,0.090362,0.201508,0.043411,-0.030667,0.028673,-0.021178,0.06081,-0.070457,-0.080754,0.03621,-0.006664,0.012152,-0.004762,-0.004402,0.042337,-0.006315,-0.018676,-0.069513,0.003699,-0.098359,0.058967,-0.024849,-0.013552,-0.058307,-0.016519,0.043552,0.053193,-0.014163,0.005292,0.011784,-0.00593,0.002724,0.007621,0.033157,-0.0114,-0.019388,-0.010306,0.018793,0.023673,-0.072264,-0.036255,0.033611,0.045506,0.0484,-0.048904,0.012851,-0.019248,0.014986,-0.025766,0.005721,-0.013907,0.02473,-0.007843,0.009124,0.010899,0.00165,0.018662,-0.062365,-0.03094,-0.000721,-0.02832,-0.039225,-0.005273,0.016225,0.001435,-0.008131,-0.000909,0.018911,0.058345,-0.051399,0.007472,0.034407,-0.011977,-0.053851,0.014743,-0.001158,-0.033954,0.015461,0.008859,0.025254,0.015465,-0.007737,0.012149,-0.007004,-0.009896,-0.006823,0.009185,-0.000243,-0.004838,-0.003156,-0.011639,-0.005549,0.029343,-0.028318,-0.004108,-0.008257,-0.003521,-0.002559,-0.016126,0.018132,0.015957,0.016032,0.017635,0.007596,-0.008566,-0.004381,0.011576,-0.022654,0.00576,0.002186,-0.003539,-0.020389,0.010977,0.016467,0.013106,-0.019476,0.00439,0.003231,0.00266,0.024151,0.012313,-0.019059,-0.016002,-0.006641,-0.003346,-0.003983,0.007706,0.003996,-0.037331,-0.008719,-0.000862,0.006037,0.006583,-0.02056,-0.000352,0.002026,-0.012361,-0.001055,0.000405,0.014526,-0.002944,0.005746,0.013176,-0.01219,-0.029298,-0.010114,0.022513,0.00867,0.005417,0.004211,-0.007197,-0.013927,0.024285,0.015334,0.016123,-0.010833,-0.007963,-0.012546,0.034626,0.020455,-0.008502,-0.020245,-0.009117,0.014781,-0.027964,-0.014698,0.003759,-0.021213,0.005464,0.035614,-0.02404,0.011326,-0.013554,-0.029464,0.004807,-0.002612,0.029757,0.008931,0.010555,-0.036053
2,***В ЛУЧАХ СЛАВЫ (UNV) D,2,40,3e-06,4e-06,-6.139519e-07,-7e-06,5e-06,1e-06,-2e-06,-8e-06,-5.604289e-07,-3e-06,6e-06,8e-06,-2e-06,2e-06,6e-06,-2e-06,2e-06,-1.6e-05,-6e-06,8e-06,2e-06,2.7e-05,-1.1e-05,-6e-06,-1.2e-05,2e-06,-3e-06,1e-06,-6e-06,-1.2e-05,-1.1e-05,2e-05,-1e-05,-4e-06,5.4e-05,-9e-06,8e-06,9.735683e-07,-5e-06,9e-06,-6e-06,-1.2e-05,7e-06,-1.2e-05,-1.2e-05,2e-06,-1.4e-05,-1.6e-05,-6e-06,-8e-06,1e-05,-6e-06,1.8e-05,-9e-06,-2e-06,-1.2e-05,-9e-06,-4.242091e-07,-3e-06,-9e-06,8e-06,1.7e-05,-1.2e-05,-9e-06,-1.6e-05,-1.1e-05,2.6e-05,1.6e-05,-2.4e-05,1.6e-05,-1.2e-05,-1.5e-05,5e-06,-2.7e-05,1.3e-05,-2.9e-05,-2.5e-05,-4e-05,-2e-06,4e-05,-3.9e-05,7.3e-05,-3.3e-05,-8e-06,2.1e-05,-2.6e-05,1.4e-05,3.1e-05,3.7e-05,6.5e-05,1.1e-05,-6.8e-05,-2.7e-05,-4.4e-05,3.1e-05,4.1e-05,2.1e-05,9.9e-05,1.3e-05,6.7e-05,-2.2e-05,1.2e-05,-3.9e-05,6.9e-05,1.6e-05,3e-05,-1.6e-05,-9.1e-05,4e-06,-1.4e-05,1.4e-05,2.5e-05,-2e-06,1.6e-05,-4.3e-05,5.5e-05,2.7e-05,1.1e-05,9.6e-05,1.4e-05,0.000132,5e-05,-1.6e-05,1.3e-05,5.5e-05,5.1e-05,-7.1e-05,-4.1e-05,5.4e-05,9e-05,-9.6e-05,7.7e-05,9.9e-05,1.8e-05,-6e-06,-8.4e-05,-8.1e-05,0.000111,-2.2e-05,-3.9e-05,-5.8e-05,2.5e-05,0.000167,-6.3e-05,-0.000107,3e-06,-2.3e-05,-4.7e-05,-7.5e-05,-4.5e-05,2.7e-05,-9.1e-05,-5.4e-05,-5e-06,-4.6e-05,0.000164,5.7e-05,5.2e-05,-6.1e-05,-7.6e-05,-7.9e-05,6.1e-05,1e-06,2.4e-05,-0.000114,5.6e-05,6e-06,-0.000196,0.000237,-0.000145,7.4e-05,-2.9e-05,0.000156,0.000297,-7.2e-05,8e-05,-4.1e-05,9e-05,-0.000179,0.00026,0.000118,0.000119,0.000119,2.3e-05,7.9e-05,-0.000198,-0.000132,-4.4e-05,-2.6e-05,-0.000209,-1.3e-05,0.000127,-0.000131,0.000101,-1e-05,4e-06,0.000233,0.000168,0.000165,-0.000189,-0.000137,0.00032,7e-06,-1.1e-05,0.000347,0.000101,1.5e-05,-0.000134,-0.000404,-0.000156,7.2e-05,-0.000293,6.4e-05,-0.00019,-0.000181,0.000273,-6e-06,0.000641,-0.000184,0.00045,-0.000194,-0.000278,0.000269,4.9e-05,-9e-06,-0.000204,-8.1e-05,-0.000268,-0.000121,0.000468,-0.000563,-0.000366,-0.000355,0.000603,3e-06,3.7e-05,-0.000333,0.000196,-0.000261,0.000385,-0.000185,-0.000822,0.000218,-0.000228,-0.000182,0.000467,0.000104,0.000277,-0.000365,0.000236,-0.000468,0.000128,0.000121,0.000452,-0.000302,-0.000551,-0.000263,-0.000989,0.000298,0.000813,-0.000169,-0.000276,-0.000168,-0.00049,0.000707,0.000111,0.000184,-0.000147,-0.000367,0.000371,-0.000257,-0.001299,-0.000393,0.000285,0.000316,-0.000205,0.00023,0.000188,-0.00118,-0.000531,0.000217,0.000224,-0.000233,0.000208,0.000971,0.000946,0.00074,-0.000848,0.000495,-7.2e-05,-0.000881,-0.001112,6.7e-05,0.000391,-0.000413,0.001365,0.000524,0.000173,-0.000476,-0.000823
3,***ГОЛУБАЯ ВОЛНА (Univ) D,3,40,0.000141,0.000577,0.0009436656,-1.1e-05,0.000652,0.000343,0.000444,-0.000324,0.0004831048,0.001831,0.000295,-0.001242,-0.000144,0.000731,0.001084,0.001503,-0.00167,0.000105,-0.002996,-0.000378,0.001857,0.002993,0.004607,-0.004333,-0.001101,0.006599,0.001826,0.002912,0.000266,0.00119,0.000323,-0.000208,-0.000681,0.000313,0.000179,0.000823,-0.001247,-0.00108599,0.000241,0.003438,0.000321,0.002306,-0.0011,-0.001984,-0.003048,-0.002858,-0.001518,-0.003285,-0.00424,0.011884,0.008569,0.000962,0.002149,-0.005463,0.001963,-6.2e-05,0.000291,0.002231716,-0.001239,0.003596,-0.001342,-0.000201,0.000639,0.001181,-0.001407,-0.00256,0.000294,9.4e-05,-9.3e-05,0.000638,-0.001074,-0.000492,0.001551,0.002837,-0.004614,0.003329,-0.002232,-0.000205,-0.000878,-0.00634,0.000489,-0.000233,0.000737,-0.007452,0.004882,0.002109,0.003779,-0.002695,0.004981,0.000408,0.00135,-0.004711,0.004678,0.001286,-0.00385,0.000877,-0.003018,0.003037,0.003688,-0.004534,0.005008,0.007993,0.003552,0.005236,-0.006194,-0.005718,0.007705,0.004285,-0.004712,0.007208,-0.004189,-0.002156,-0.001952,0.004564,-0.004977,-0.002026,-0.001583,0.002798,0.00111,0.002534,-0.012067,-0.007336,-0.004927,0.000931,-0.003177,-0.005327,-0.000627,-0.002225,0.005805,0.0012,-0.000551,9.5e-05,-0.000888,-0.000141,0.003339,0.002674,-0.000627,-0.006383,0.003142,0.000932,-0.000261,0.002692,-0.000106,0.002394,-0.001239,0.000402,0.003907,0.00032,-4.7e-05,-0.001592,0.001364,-0.00148,-0.003288,-0.000509,-0.00097,0.00012,0.00365,-0.000256,-0.000718,0.000229,0.001319,-0.002306,-0.000412,-0.001214,-0.00157,-0.000369,0.000705,0.002182,-0.000594,-0.000814,-2.1e-05,0.003003,-0.00225,-0.00219,0.000633,0.001939,0.001776,-0.00196,0.00144,-0.000765,0.002794,0.003329,0.004969,0.000282,0.000724,0.000553,0.00722,0.003351,0.002872,0.003271,0.00106,0.001867,0.000226,-0.001244,0.001111,-0.002691,-0.003084,-0.00125,-0.001023,-0.003454,-0.002419,0.000808,-0.002185,-9.8e-05,-0.001276,0.004027,0.00133,0.000998,0.003552,-0.000479,-2.3e-05,0.00403,-1e-06,0.004974,-0.003122,-0.001656,-0.002873,0.001925,0.002078,0.000153,-0.000726,0.001401,-0.000151,0.000357,-0.002408,-0.001778,0.003111,0.003655,-0.002753,-0.000694,0.000346,0.001899,-0.004788,-0.002772,0.000152,-0.002126,0.004797,-0.004534,0.00421,0.000738,0.001367,-0.001854,0.000406,0.001776,-0.004658,-0.000532,-0.003503,0.001646,0.000665,0.001178,-0.006822,-0.006971,0.004443,0.002668,0.002886,0.004213,0.000448,0.000725,-0.002257,-0.004798,0.003627,0.002024,-0.002795,0.00211,0.004822,-0.00559,0.000425,-0.002324,-0.000352,-0.003491,-0.000135,0.002037,0.002583,0.001392,-0.003179,-0.004921,-0.00076,-0.004287,-0.002408,0.002521,0.00494,0.001048,0.001829,0.004277,-9.1e-05,0.003737,0.001641,-0.00339,0.00212,0.005542,0.001935,0.002332,-0.002535,-0.000144,-0.00234,0.000407,-0.000345,0.00108,-0.003409,0.001682
4,***КОРОБКА (СТЕКЛО) D,4,40,0.001315,0.000337,0.001050196,4e-06,1.7e-05,0.000594,0.001523,-0.002326,-0.00184512,-0.000206,0.006114,0.005612,-0.000583,0.00117,0.00419,0.001533,0.003192,0.000599,-0.004061,0.005021,0.000649,0.004079,-0.001799,0.000961,0.002706,-0.002363,0.003184,0.007353,0.00313,0.003727,-0.00142,-0.00125,0.00427,-0.005122,0.002751,-0.000749,0.000756,-0.001272249,0.002223,0.000499,-0.004033,-0.00045,-0.00571,0.006392,0.001467,-0.001412,-0.000438,-0.00082,0.007481,0.004086,-0.004851,0.002707,0.005221,0.00037,-0.008136,0.00678,0.002523,-9.388109e-06,-0.002359,0.008064,0.001985,-0.004071,-0.000806,2.4e-05,0.007489,0.001091,-0.008933,0.003086,-0.004435,-0.000379,-0.001213,0.001548,0.001673,-0.010279,0.003243,0.002068,-0.006116,0.008902,0.000884,0.015379,0.002919,0.004876,0.007518,0.007177,-0.004141,0.005828,0.018976,0.010952,0.01136,-0.000942,0.009235,0.005742,0.018278,-0.018571,-0.000948,-0.00186,0.012881,0.009503,0.007419,-0.002524,0.007808,0.002436,0.006984,-0.014288,0.002445,-0.005568,0.028443,0.033273,-0.013169,-0.010432,0.008295,-0.009296,0.038504,-0.034975,0.014663,-0.037073,-0.018059,0.022396,-0.022541,-0.032612,0.031876,0.06316,-0.003779,-0.022462,0.008719,-0.027098,0.023193,-0.032014,-0.027932,0.041657,-0.008983,0.012029,-0.015592,-0.009331,0.044077,0.005454,-0.025524,-0.054258,-0.009161,-0.05273,0.027188,0.003047,0.002667,-0.012409,0.000929,0.011137,0.0181,-0.009168,-0.006445,-0.000273,-0.014188,0.002771,-0.005962,0.004824,-0.001232,-0.0158,-0.005747,0.003972,0.017593,-0.018852,-0.008748,-0.000961,0.012147,0.004304,-0.011865,-0.004635,0.001301,0.000896,-0.003323,0.003301,-0.01296,0.011218,-0.005215,0.002375,0.001367,-0.008689,0.002564,-0.007655,-0.008821,-0.003176,-0.003071,-0.013218,-0.00456,0.008378,0.011413,-0.007118,0.002307,0.004721,0.014071,-0.012133,0.012656,0.008294,-0.007102,-0.001807,-0.001238,-0.000762,-0.017603,0.003257,-0.001288,-0.006236,0.002375,-0.003392,-0.006549,-0.000473,-0.006263,0.003811,0.017863,-0.003918,0.005753,0.007393,0.012715,0.010678,-5.2e-05,0.001925,-8.9e-05,-0.001868,-0.016565,0.003831,-0.011473,0.003709,-0.009845,-0.008939,0.00133,-0.008167,-0.003088,-0.001033,-0.003556,-0.0069,2.4e-05,-0.000254,-0.008039,-0.013229,0.000986,-0.001306,-0.010221,-7.3e-05,-0.009054,0.00367,-0.013605,-0.007757,-0.003928,0.012177,-0.006977,-0.012728,0.0151,0.026285,0.004947,0.002888,-0.003515,-0.005043,-0.010733,-0.001617,0.025141,-0.002485,-0.009011,0.014925,0.005307,0.004645,0.004761,0.005509,0.003643,-0.00303,-0.006755,-0.004527,0.013097,-0.011348,-0.014291,0.010261,-0.012549,0.000849,-0.005128,-0.005612,-0.003347,0.013093,0.006318,0.001392,-0.006956,0.003608,-0.00993,-0.019906,0.005093,0.00966,-0.008438,-0.01328,-0.002472,-0.013667,-0.002042,-0.01243,0.004134,-0.010155,0.003927,0.009936,-0.015988,0.012645,-0.002523,0.001052,0.007305,0.000453,0.005982,0.003222


In [12]:
# Add means of item_cnt_day
def add_means(train, val, test, months=NMONTHS):
    """
    train - base train dataframe
    val - base validation dataframe (train, shifted by 1 month further)
    test - base test dataframe (val, shifted by 1 month further)
    This function add sliding means by last n column values (last n months)
    """
    # Define filters for price and item count features separation
    train_filter_item_cnt_col = [col for col in train if col.startswith('item_cnt_day')]
    val_filter_item_cnt_col = [col for col in val if col.startswith('item_cnt_day')]
    test_filter_item_cnt_col = [col for col in test if col.startswith('item_cnt_day')]
    
    # Get 6 separate dataframes
    train_item_cnt_df = train[train_filter_item_cnt_col]
    val_item_cnt_df = val[val_filter_item_cnt_col]
    test_item_cnt_df = test[test_filter_item_cnt_col]
    
    # Add features to train
    np_train_count = np.array(train_item_cnt_df)
    
    for i in range(1, months+1):
        train['mean_cnt_' + str(i)] = pd.Series(np_train_count[:, -i:].mean(axis=1), index=train.index)
        
    # Add features to validation
    np_val_count = np.array(val_item_cnt_df)
    
    for i in range(1, months+1):
        val['mean_cnt_' + str(i)] = pd.Series(np_val_count[:, -i:].mean(axis=1), index=train.index)
        
    # Add features to test
    np_test_count = np.array(test_item_cnt_df)
    
    for i in range(1, months+1):
        test['mean_cnt_' + str(i)] = pd.Series(np_test_count[:, -i:].mean(axis=1), index=test.index)
        
    return train, val, test

In [13]:
%%time
tr_train, tr_val, tr_test = add_means(tr_train, tr_val, tr_test)
print(tr_train.shape, tr_val.shape, tr_test.shape)

(214200, 48) (214200, 48) (214200, 48)
Wall time: 429 ms


In [14]:
# Add some mean encoded features
def get_month_mean_encodings(train, val, test):
    """
    train - base train dataframe
    val - base validation dataframe (train, shifted by 1 month further)
    test - base test dataframe (val, shifted by 1 month further)
    Function calculate cumsum divided by cumcount for current shop and current item saled quantities,
    also it makes the same transformations for current month price
    We aggregate info by months
    """
    # Drop indexes
    train.reset_index(inplace=True)
    val.reset_index(inplace=True)
    test.reset_index(inplace=True)
    
    # Get dataframes with sold item quantities, grouped by shop and item
    
    # Define item count column filters
    train_filter_cnt_col = [col for col in train if col.startswith('item_cnt_day')]
    val_filter_cnt_col = [col for col in val if col.startswith('item_cnt_day')]
    test_filter_cnt_col = [col for col in test if col.startswith('item_cnt_day')]
    
    # Aggregate train by shop and item
    train_shop_agg_cnt_df = train.groupby(['shop_id'])[train_filter_cnt_col].agg(['sum'])
    train_item_agg_cnt_df = train.groupby(['item_id'])[train_filter_cnt_col].agg(['sum'])
    
    # Aggregate validation set by shop and item
    val_shop_agg_cnt_df = val.groupby(['shop_id'])[val_filter_cnt_col].agg(['sum'])
    val_item_agg_cnt_df = val.groupby(['item_id'])[val_filter_cnt_col].agg(['sum'])
    
    # Aggregate test set by shop and item
    test_shop_agg_cnt_df = test.groupby(['shop_id'])[test_filter_cnt_col].agg(['sum'])
    test_item_agg_cnt_df = test.groupby(['item_id'])[test_filter_cnt_col].agg(['sum'])
    

    # Now we get train cumulative sum of this features on rows
    train_shop_agg_cnt_df = train_shop_agg_cnt_df.cumsum(axis=1)
    train_item_agg_cnt_df = train_item_agg_cnt_df.cumsum(axis=1)
    
    # Get validation cumulative sum of this features on rows
    val_shop_agg_cnt_df = val_shop_agg_cnt_df.cumsum(axis=1)
    val_item_agg_cnt_df = val_item_agg_cnt_df.cumsum(axis=1)

    # Get test cumulative sum of this features on rows
    test_shop_agg_cnt_df = test_shop_agg_cnt_df.cumsum(axis=1)
    test_item_agg_cnt_df = test_item_agg_cnt_df.cumsum(axis=1)

    # Calculate arithmetic progression (1, 2, 3, 4, ...) with months length
    np_to_divide = np.array(train_item_agg_cnt_df.columns.labels[0] + 1, dtype=float)
    
    # Divide train cumulative sum by column number (np.cumcount emulation)
    train_shop_agg_cnt_df = train_shop_agg_cnt_df / np_to_divide
    train_item_agg_cnt_df = train_item_agg_cnt_df / np_to_divide
    
    # Divide validation cumulative sum by column number (np.cumcount emulation)
    val_shop_agg_cnt_df = val_shop_agg_cnt_df / np_to_divide
    val_item_agg_cnt_df = val_item_agg_cnt_df / np_to_divide
    
    # Divide test cumulative sum by column number (np.cumcount emulation)
    test_shop_agg_cnt_df = test_shop_agg_cnt_df / np_to_divide
    test_item_agg_cnt_df = test_item_agg_cnt_df / np_to_divide
    
    
    # Move index to columns of train daraframes
    train_shop_agg_cnt_df.reset_index(inplace=True)
    train_item_agg_cnt_df.reset_index(inplace=True)
    
    # Move index to columns of validation daraframes
    val_shop_agg_cnt_df.reset_index(inplace=True)
    val_item_agg_cnt_df.reset_index(inplace=True)
    
    # Move index to columns of test daraframes
    test_shop_agg_cnt_df.reset_index(inplace=True)
    test_item_agg_cnt_df.reset_index(inplace=True)

    
    # Join all train mean encoding features with initial data
    train_ext = pd.merge(train, train_shop_agg_cnt_df, how='left', left_on='shop_id', right_on='shop_id')
    train_ext = pd.merge(train_ext, train_item_agg_cnt_df, how='left', left_on='item_id', right_on='item_id')

    # Join all validation mean encoding features with initial data
    val_ext = pd.merge(val, val_shop_agg_cnt_df, how='left', left_on='shop_id', right_on='shop_id')
    val_ext = pd.merge(val_ext, val_item_agg_cnt_df, how='left', left_on='item_id', right_on='item_id')
                       
    # Join all test mean encoding features with initial data
    test_ext = pd.merge(test, test_shop_agg_cnt_df, how='left', left_on='shop_id', right_on='shop_id')
    test_ext = pd.merge(test_ext, test_item_agg_cnt_df, how='left', left_on='item_id', right_on='item_id')

    return train_ext, val_ext, test_ext

In [15]:
%%time
tr_train, tr_val, tr_test = get_month_mean_encodings(tr_train, tr_val, tr_test)

# Drop duplicate column
tr_train.drop(['item_id_x'], axis=1, inplace=True)
tr_val.drop(['item_id_x'], axis=1, inplace=True)
tr_test.drop(['item_id_x'], axis=1, inplace=True)

print(tr_train.shape, tr_val.shape, tr_test.shape)

KeyError: "labels ['item_id_x'] not contained in axis"

In [16]:
tr_train.head()

Unnamed: 0,shop_id,item_id,item_cnt_day_8_x,item_cnt_day_9_x,item_cnt_day_10_x,item_cnt_day_11_x,item_cnt_day_12_x,item_cnt_day_13_x,item_cnt_day_14_x,item_cnt_day_15_x,item_cnt_day_16_x,item_cnt_day_17_x,item_cnt_day_18_x,item_cnt_day_19_x,item_cnt_day_20_x,item_cnt_day_21_x,item_cnt_day_22_x,item_cnt_day_23_x,item_cnt_day_24_x,item_cnt_day_25_x,item_cnt_day_26_x,item_cnt_day_27_x,item_cnt_day_28_x,item_cnt_day_29_x,item_cnt_day_30_x,item_cnt_day_31_x,mean_cnt_1,mean_cnt_2,mean_cnt_3,mean_cnt_4,mean_cnt_5,mean_cnt_6,mean_cnt_7,mean_cnt_8,mean_cnt_9,mean_cnt_10,mean_cnt_11,mean_cnt_12,mean_cnt_13,mean_cnt_14,mean_cnt_15,mean_cnt_16,mean_cnt_17,mean_cnt_18,mean_cnt_19,mean_cnt_20,mean_cnt_21,mean_cnt_22,mean_cnt_23,mean_cnt_24,"('item_cnt_day_8', 'sum')_x","('item_cnt_day_9', 'sum')_x","('item_cnt_day_10', 'sum')_x","('item_cnt_day_11', 'sum')_x","('item_cnt_day_12', 'sum')_x","('item_cnt_day_13', 'sum')_x","('item_cnt_day_14', 'sum')_x","('item_cnt_day_15', 'sum')_x","('item_cnt_day_16', 'sum')_x","('item_cnt_day_17', 'sum')_x","('item_cnt_day_18', 'sum')_x","('item_cnt_day_19', 'sum')_x","('item_cnt_day_20', 'sum')_x","('item_cnt_day_21', 'sum')_x","('item_cnt_day_22', 'sum')_x","('item_cnt_day_23', 'sum')_x","('item_cnt_day_24', 'sum')_x","('item_cnt_day_25', 'sum')_x","('item_cnt_day_26', 'sum')_x","('item_cnt_day_27', 'sum')_x","('item_cnt_day_28', 'sum')_x","('item_cnt_day_29', 'sum')_x","('item_cnt_day_30', 'sum')_x","('item_cnt_day_31', 'sum')_x","(item_cnt_day_8_y, sum)","(item_cnt_day_9_y, sum)","(item_cnt_day_10_y, sum)","(item_cnt_day_11_y, sum)","(item_cnt_day_12_y, sum)","(item_cnt_day_13_y, sum)","(item_cnt_day_14_y, sum)","(item_cnt_day_15_y, sum)","(item_cnt_day_16_y, sum)","(item_cnt_day_17_y, sum)","(item_cnt_day_18_y, sum)","(item_cnt_day_19_y, sum)","(item_cnt_day_20_y, sum)","(item_cnt_day_21_y, sum)","(item_cnt_day_22_y, sum)","(item_cnt_day_23_y, sum)","(item_cnt_day_24_y, sum)","(item_cnt_day_25_y, sum)","(item_cnt_day_26_y, sum)","(item_cnt_day_27_y, sum)","(item_cnt_day_28_y, sum)","(item_cnt_day_29_y, sum)","(item_cnt_day_30_y, sum)","(item_cnt_day_31_y, sum)"
0,5,5037,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,2.0,2.0,0.0,0.0,0.0,1.0,1.0,1.0,3.0,3.0,2.0,1.666667,1.5,1.2,1.0,0.857143,1.0,1.111111,1.1,1.0,1.0,0.923077,0.857143,0.8,0.75,0.705882,0.666667,0.631579,0.6,0.571429,0.545455,0.521739,0.5,446.0,469.0,504.666656,612.75,636.599976,632.666687,653.428589,641.875,645.555542,654.400024,675.0,701.916687,713.692322,722.071411,743.266663,796.3125,812.588257,813.944458,817.473694,821.049988,824.523804,825.954529,827.956543,844.208313,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,14.076923,16.714285,19.133333,30.4375,33.529411,33.444443,33.157894,32.75,35.095238,37.81818,38.47826,41.75
1,5,5320,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,446.0,469.0,504.666656,612.75,636.599976,632.666687,653.428589,641.875,645.555542,654.400024,675.0,701.916687,713.692322,722.071411,743.266663,796.3125,812.588257,813.944458,817.473694,821.049988,824.523804,825.954529,827.956543,844.208313,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,5,5233,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,2.0,0.0,1.0,1.0,0.5,1.0,1.5,1.2,1.0,0.857143,0.75,0.666667,0.6,0.545455,0.5,0.461538,0.428571,0.4,0.375,0.352941,0.333333,0.315789,0.3,0.285714,0.272727,0.26087,0.25,446.0,469.0,504.666656,612.75,636.599976,632.666687,653.428589,641.875,645.555542,654.400024,675.0,701.916687,713.692322,722.071411,743.266663,796.3125,812.588257,813.944458,817.473694,821.049988,824.523804,825.954529,827.956543,844.208313,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4,3.571429,8.318182,9.434783,15.25
3,5,5232,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.5,0.333333,0.25,0.2,0.166667,0.142857,0.125,0.111111,0.1,0.090909,0.083333,0.076923,0.071429,0.066667,0.0625,0.058824,0.055556,0.052632,0.05,0.047619,0.045455,0.043478,0.041667,446.0,469.0,504.666656,612.75,636.599976,632.666687,653.428589,641.875,645.555542,654.400024,675.0,701.916687,713.692322,722.071411,743.266663,796.3125,812.588257,813.944458,817.473694,821.049988,824.523804,825.954529,827.956543,844.208313,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.666667
4,5,5268,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,446.0,469.0,504.666656,612.75,636.599976,632.666687,653.428589,641.875,645.555542,654.400024,675.0,701.916687,713.692322,722.071411,743.266663,796.3125,812.588257,813.944458,817.473694,821.049988,824.523804,825.954529,827.956543,844.208313,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [17]:
# Extract city column from shops dataframe
shop_name_split = [el.split(' ') for el in shops.shop_name.values]
shops['city'] = pd.Series([el[0] for el in shop_name_split], index=shops.index)
shops.head(10)

Unnamed: 0,shop_name,shop_id,city
0,"!Якутск Орджоникидзе, 56 фран",0,!Якутск
1,"!Якутск ТЦ ""Центральный"" фран",1,!Якутск
2,"Адыгея ТЦ ""Мега""",2,Адыгея
3,"Балашиха ТРК ""Октябрь-Киномир""",3,Балашиха
4,"Волжский ТЦ ""Волга Молл""",4,Волжский
5,"Вологда ТРЦ ""Мармелад""",5,Вологда
6,"Воронеж (Плехановская, 13)",6,Воронеж
7,"Воронеж ТРЦ ""Максимир""",7,Воронеж
8,"Воронеж ТРЦ Сити-Парк ""Град""",8,Воронеж
9,Выездная Торговля,9,Выездная


In [18]:
# Add city feature
tr_train = pd.merge(tr_train, shops, how='left', left_on='shop_id', right_on='shop_id')
tr_val = pd.merge(tr_val, shops, how='left', left_on='shop_id', right_on='shop_id')
tr_test = pd.merge(tr_test, shops, how='left', left_on='shop_id', right_on='shop_id')
print(tr_train.shape, tr_val.shape, tr_test.shape)

# Drop unnecessary shop_name column
tr_train.drop(['shop_name'], axis=1, inplace=True)
tr_val.drop(['shop_name'], axis=1, inplace=True)
tr_test.drop(['shop_name'], axis=1, inplace=True)
print(tr_train.shape, tr_val.shape, tr_test.shape)

(214200, 100) (214200, 100) (214200, 100)
(214200, 99) (214200, 99) (214200, 99)


In [19]:
# Factorize city name
tr_train['city'] = tr_train['city'].factorize()[0]
tr_val['city'] = tr_val['city'].factorize()[0]
tr_test['city'] = tr_test['city'].factorize()[0]

In [20]:
# Add tf-idf item name features
tr_train = tr_train.join(items, how='left', on='item_id', rsuffix='_itm')
tr_val = tr_val.join(items, how='left', on='item_id', rsuffix='_itm')
tr_test = tr_test.join(items, how='left', on='item_id', rsuffix='_itm')

# Drop useless columns
tr_train.drop(['item_name', 'item_id_itm'], axis=1, inplace=True)
tr_val.drop(['item_name', 'item_id_itm'], axis=1, inplace=True)
tr_test.drop(['item_name', 'item_id_itm'], axis=1, inplace=True)

print(tr_train.shape, tr_val.shape, tr_test.shape)

(214200, 400) (214200, 400) (214200, 400)


In [21]:
tr_train.iloc[:, -305:].head()

Unnamed: 0,"(item_cnt_day_29_y, sum)","(item_cnt_day_30_y, sum)","(item_cnt_day_31_y, sum)",city,item_category_id,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,256,257,258,259,260,261,262,263,264,265,266,267,268,269,270,271,272,273,274,275,276,277,278,279,280,281,282,283,284,285,286,287,288,289,290,291,292,293,294,295,296,297,298,299
0,37.81818,38.47826,41.75,0,19,0.079082,0.002221,0.020591,0.19021,0.019453,0.285132,-0.22368,0.280355,0.210752,-0.050954,0.077036,0.080032,-0.163381,0.015022,0.112685,-0.061891,-0.049274,-0.04563,-0.017297,-0.021274,0.027726,0.019591,-0.019144,0.017491,-0.043102,0.006013,-0.015368,-0.023692,0.003968,-0.022937,0.001272,-0.004655,-0.035973,-0.054491,-0.022236,-0.033944,-0.022346,-0.006857,0.005182,-0.012873,0.021376,-0.017513,0.01123,-0.013015,-0.015055,-0.102259,0.022022,-0.04076,0.002774,-0.019,-0.025949,-0.022804,0.005145,-0.026562,-0.013219,0.014977,0.011392,0.011584,0.021308,-0.000551,0.012479,0.015555,0.019622,0.000181,0.005953,-0.003875,-0.014303,0.020001,-0.063006,0.022029,-0.017804,-0.004072,-0.005042,-0.013528,-0.008939,0.043461,-0.04372,-0.022917,0.023542,-0.030945,-0.003825,0.010244,-0.021132,0.02314,-0.022852,0.017399,0.005681,-0.028971,-0.02739,-0.006911,0.021308,0.009292,-0.01735,-0.000147,-0.003343,-0.006062,0.009291,0.005816,-0.041124,0.015491,0.001717,-0.029106,-0.019643,-0.003939,0.016604,-0.002345,-0.002582,-0.043976,0.019029,-0.003705,-0.006136,-0.005814,-0.015199,0.01349,-0.021743,0.010248,-0.00471,-0.010066,0.004367,-0.016443,0.009904,-0.014784,0.013932,-0.009924,0.00712,-0.004698,0.010167,-0.015185,0.039849,0.022425,0.006819,0.021993,-0.038858,-0.015359,0.032628,-0.005043,-0.004266,-0.021465,-0.006647,-0.017699,-0.014367,0.006524,0.005086,0.024749,0.009834,0.012348,-0.018169,-0.006818,-0.014469,-0.007364,-0.025772,0.004186,0.016532,-0.024144,0.009763,0.002107,0.024014,0.01546,0.037792,0.031135,-0.036164,0.037063,0.015953,-0.012918,0.002167,0.006488,0.029965,-0.041069,0.006577,-0.041852,0.004722,0.00337,0.026239,0.009073,0.031113,-0.037213,-0.017332,0.040085,0.037619,-0.040824,0.01066,-0.027082,-0.006413,-0.012178,0.041732,-0.022731,-0.013488,-0.025942,-0.006729,-0.070825,-0.009103,-0.006033,0.007976,-0.005644,-0.003217,0.010186,-0.014129,-0.015376,-0.035695,0.040684,-0.050095,0.024948,-0.05332,-0.042195,0.02861,-0.002905,-0.039563,-0.006819,0.00365,0.003939,0.012286,-0.027344,0.027665,-0.005295,-0.011571,0.020388,-0.005933,0.008467,-0.035216,0.026737,-0.030921,-0.024796,-0.033065,-0.068224,0.016837,-0.038565,0.036673,-0.008856,-0.043477,0.00698,0.010261,0.048092,-0.030301,0.012408,-0.079709,0.035322,0.001409,-2.5e-05,-0.006253,-0.00756,-0.029462,0.034594,0.034168,0.04249,0.016874,0.140981,0.023657,-0.025401,0.016997,0.05294,0.036615,0.003027,0.020136,0.003438,0.03834,0.04519,-0.013041,0.008444,0.11937,-0.01055,-0.010067,-0.006573,0.068689,0.036465,-0.010815,0.00904,0.0145,0.0344,0.050978,0.039984,0.037195,0.037316,0.047825,-0.067076,-0.019021,-0.003786,0.061849,-0.022502,-0.041246,0.031843,0.011609,0.00115,0.023896,0.049366,-0.047553,0.060254,0.04626,-0.025228,-0.011181,0.008685,0.017789,-0.018557,0.000573,0.023761,0.032483,0.004814,-0.028143,0.054562,-0.055948,0.010696
1,0.0,0.0,0.0,0,55,0.035927,0.001655,0.005497,0.055388,0.003611,0.14116,0.115684,-0.042826,0.011119,-0.017594,-0.025579,0.001349,-0.027699,0.004448,-0.118488,-0.015326,0.005925,0.017333,-0.086258,-0.019349,-0.067467,-0.009481,-0.003078,-0.003444,-0.019041,-0.004846,-0.01925,-0.010544,-0.002954,0.034675,-0.051904,-0.024511,0.054976,0.013516,0.008994,0.006282,0.031044,0.014482,-0.001396,0.005816,0.017963,-0.055013,-0.036074,0.017794,0.034291,0.014899,-0.036596,0.022436,-0.00792,0.007018,-0.089615,0.096973,0.117243,-0.072194,0.049058,0.000762,-0.008179,0.226831,0.168226,-0.101559,-0.024694,-0.039228,0.006028,0.035702,0.00092,0.001238,-0.006417,0.011933,0.015311,-0.033765,-0.023292,0.065502,-0.009864,0.02289,0.011172,0.002206,0.044472,-0.058283,0.018583,0.015652,0.041987,0.007069,-0.01887,-0.007144,0.023855,-0.047085,0.007944,0.004217,-0.011086,-0.043698,0.02099,-0.012297,-0.04633,0.010383,0.028151,0.0417,-0.033557,0.009595,-0.040613,-0.02226,0.031006,-0.023048,-0.018403,0.006696,-0.037508,-0.02198,0.016599,0.010424,0.019586,0.019083,-0.007497,0.017611,-0.010388,-0.012519,0.005719,0.000913,-0.003863,0.027941,0.005552,0.030034,-0.000476,0.004736,-0.00786,0.036065,-0.022348,-0.035276,-0.016247,0.037543,-0.01321,0.04863,-0.03555,-0.013909,0.014694,-0.003964,0.022394,-0.008694,-0.010667,-0.048427,-0.007833,0.056489,0.139871,-0.023392,-0.157941,-0.150678,0.040308,-0.054095,-0.108782,0.01683,0.100809,-0.052604,-0.024755,-0.033528,0.058643,0.043714,0.071954,0.011329,-0.015972,0.001898,0.010594,-0.000132,-0.004168,-0.000989,-0.020646,0.028551,0.041694,0.012988,-0.040326,-0.007691,-0.042157,0.014866,0.017407,0.020933,-0.003098,0.011897,0.00424,-0.018438,-0.015597,-0.02021,0.026162,0.00244,0.034637,0.004406,-0.035159,-0.077123,-0.020413,-0.003067,0.030394,0.022985,-0.022887,-0.016111,0.059386,0.022911,0.005508,-0.008863,-0.008447,-0.002155,-0.014633,-0.014153,0.0241,-0.006991,0.001939,-0.02045,0.006266,0.011862,0.007159,-0.003019,-0.019578,0.004398,0.016391,0.016113,-0.002188,0.009887,-0.009145,0.020481,0.013753,-0.003158,0.016745,0.006399,-0.018635,-0.001044,-0.004638,-0.010103,-0.002701,-0.008007,0.002281,0.014895,-0.000336,0.006309,-0.008605,-0.007806,-0.006658,0.00842,-0.009185,0.00135,-0.002886,0.004174,0.013722,-0.007302,-0.000905,0.008302,0.022218,-0.010218,0.003382,0.016338,0.017963,0.015737,0.013873,0.00892,-0.000786,0.006684,-0.020996,0.010122,-0.011989,0.00114,0.005426,-0.023339,0.015429,0.005894,-0.009446,-0.014993,-0.009058,-0.016885,0.001515,-0.020024,-0.008461,-0.016685,0.004308,-0.000639,0.017495,0.004648,-0.016125,0.013247,0.024533,0.007185,0.025337,0.010921,0.004287,-0.013752,0.019848,-0.012371,-0.001633,0.014008,0.004465,0.023953,0.000528,0.001219,0.009501,0.004333,-0.0121,-0.004293,-0.00818,0.012084,0.007222,-0.014114,0.008591,-0.013691,0.015844,-0.005519,0.006485,-0.006548
2,8.318182,9.434783,15.25,0,19,0.23834,-0.005698,-0.035638,0.227311,0.021017,-0.183646,0.078323,0.071073,0.133965,-0.105218,0.030641,0.074185,-0.101883,0.016126,0.012808,-0.029131,0.002813,-0.024078,-0.032239,-0.036647,-0.024738,-0.000505,0.035632,-0.039545,-0.083784,-0.003929,-0.035447,0.125456,0.041329,-0.098174,-0.023917,0.002815,0.064047,-0.112503,0.032417,-0.051438,0.046593,-0.244249,0.220271,-0.249202,0.348367,0.39895,0.187542,-0.090083,0.06882,0.069374,-0.010624,0.043369,-0.005961,-0.00123,-0.043219,-0.005751,0.0145,0.012691,0.016175,-0.012911,0.012512,0.029699,-0.00734,0.020472,-0.047621,-0.017101,0.047695,0.023311,-0.011687,0.009905,-0.004455,-0.005584,-0.011278,0.004316,-0.017368,-0.003503,0.001786,-0.007477,0.001848,0.004532,-0.01055,0.003146,-0.005472,-0.00764,-0.004676,0.008571,-0.007363,0.004743,0.007433,0.006183,0.004602,0.014164,-0.015843,0.001446,0.008448,-0.008247,-0.014658,-0.008757,0.026819,0.013059,0.003597,0.00551,-0.00898,-0.004401,0.000561,0.002469,-0.008903,-0.013167,-0.010465,0.004449,-0.003466,-0.010054,0.001284,-0.012628,0.017155,0.007576,-0.023991,-0.001368,-0.005449,0.004431,-0.004937,-0.013072,-0.007596,-0.011113,-0.004703,-0.015561,0.017737,-0.001533,0.001564,-0.00381,0.002533,0.011329,0.006954,0.002351,-0.0056,0.008784,-0.001608,0.016569,0.002324,0.004258,0.018998,-0.008237,0.01308,-0.010048,-0.014518,-0.002266,-0.005494,0.006578,0.01764,0.00838,-0.018677,0.001413,0.004904,0.016794,-0.008699,0.008353,0.017349,-0.011362,0.015431,-0.006998,0.005637,0.005634,0.009613,0.011757,0.000417,-0.000198,-0.001699,-4.7e-05,-0.031426,-0.006941,0.00521,-0.009769,0.009287,-0.004709,0.011558,0.021913,-0.001601,0.031918,0.020207,-0.006788,-0.007833,0.025812,-0.005893,-0.00243,0.01175,0.041923,0.010472,-0.026628,-0.00843,0.002981,-0.006546,0.021362,0.029477,-0.031425,-0.01607,0.007754,0.017931,-0.03534,0.007389,0.009497,-0.005198,-0.003868,0.00034,-0.014746,0.015414,0.004048,-0.006265,0.006497,0.001137,-0.002135,-0.011674,0.02029,0.012236,0.00664,0.006754,0.000387,0.008554,-0.018857,0.023522,0.007629,-0.025494,-0.015075,-0.020142,0.009551,0.015873,0.021207,0.001728,-0.005631,-0.022725,-0.022423,0.017781,-0.05237,-0.024493,0.01481,0.011747,-0.027043,0.002707,0.023163,-0.001775,-0.037343,-0.00679,-0.019611,-0.015064,-0.000208,0.02071,-0.001181,0.012484,-0.022647,0.01077,0.005539,0.00028,0.004796,-0.045195,-0.00183,-0.013253,-0.013136,-0.003654,-0.015238,0.007748,-0.019288,0.000499,-0.030489,0.016015,-0.000377,-0.011421,-0.001595,0.008805,0.002377,-0.004883,0.002018,0.008173,0.004488,0.004699,-0.013414,-0.002693,-0.000494,-0.033746,-0.009947,0.00094,-0.00341,0.002733,0.015782,0.003479,0.000463,-0.015778,-0.003328,-0.012906,0.011368,-0.008399,-0.015037,0.006317,-0.006347,0.003764,-0.000735,0.000413,0.012644,-0.028473,-0.030405,-0.007984,0.018558,-0.002854,-0.001155,0.014952,-0.011517
3,0.0,0.0,2.666667,0,23,0.247804,-0.006887,-0.04115,0.322485,0.031554,-0.033653,-0.093625,-0.199454,-0.13046,-0.035947,-0.035828,-0.008418,0.058784,0.010328,-0.07129,-0.010181,0.001105,0.006767,0.011765,-0.010937,-0.043522,-0.002509,0.039883,-0.039467,-0.073701,-0.00055,-0.017977,0.082062,0.033198,-0.076113,-0.061805,-0.01028,0.086076,-0.043107,0.048106,-0.019038,0.037992,-0.161122,0.136935,-0.22473,0.300653,0.440018,0.225655,-0.115529,0.076411,-0.008824,0.016288,0.000166,0.007974,0.01271,-0.03777,-0.043097,-0.011655,0.045661,-0.01376,-0.024227,-0.004936,-0.020424,-0.034668,0.018392,0.013277,0.021899,0.042611,0.007983,-0.001041,0.007834,0.005536,-0.013991,-0.028247,0.006035,-0.0125,0.006067,6.2e-05,-0.016039,0.003907,0.005086,-0.011966,0.003541,0.011314,0.015871,0.005344,0.002533,-0.004786,0.009373,-0.021277,0.026556,0.033034,0.002243,-0.00231,-0.011968,-0.009579,0.001774,0.011339,-0.002996,0.003063,0.00054,0.009826,-0.005665,0.003369,-0.004653,-0.001498,0.008211,0.001842,-0.019862,0.008563,2.8e-05,0.004056,-0.0161,-0.015269,-0.013028,0.010721,-0.001501,-0.017924,-0.015721,-0.012963,0.001506,-0.00349,0.008935,0.006028,0.006094,0.001985,-0.025801,0.029957,0.018496,0.000909,0.001869,-0.006912,0.016213,-0.022159,0.008981,-0.017043,0.005174,0.009624,0.013931,0.006248,0.010267,-0.003462,-0.001292,0.010456,0.011875,0.034516,-0.012799,-0.003373,-0.029042,-0.013037,-0.004628,0.001682,-0.003866,0.0087,-0.006029,-0.010887,0.021826,-0.004333,0.031419,0.008606,0.002219,-0.007332,-0.022466,-0.010272,0.006887,0.013644,0.007899,0.004736,-0.034079,-0.003249,0.009544,-0.030374,-0.017638,-0.00474,-0.000134,-0.000243,0.015344,0.000546,0.013967,-0.000225,0.022528,-0.010387,-0.027483,0.007282,-0.001025,0.019578,-0.012058,0.018277,-0.016766,-0.001762,0.005357,0.016497,0.004554,0.014359,-0.022277,-0.003614,-0.012812,0.000618,0.008481,-0.013617,0.010127,0.018623,0.023349,-0.01103,-0.026177,0.027094,0.004866,-0.016218,-0.021307,-0.008698,-0.006159,0.020838,-0.01451,-0.009512,0.003091,-0.016549,-0.021429,0.015052,0.000601,-0.022055,0.032234,-0.023649,-0.030555,0.011425,0.000128,-0.02853,-0.00703,-0.028586,-0.015625,-0.004209,-0.020427,0.005909,-0.006736,0.000443,0.015313,-0.03103,0.031341,-0.027292,-0.019632,0.008231,0.001365,0.043877,0.023977,-0.013732,0.022337,0.027726,0.010903,-0.002961,-0.027177,0.053864,-0.04005,0.000435,0.028374,0.080569,0.002441,-0.008272,-0.02758,-0.024673,-0.023611,-0.031787,-0.04298,-0.00945,0.012626,-0.065,-0.010069,0.010975,0.037097,0.015431,-0.005535,0.039717,-0.014769,0.002192,-0.031764,-0.002687,-0.012023,0.030884,-0.013966,0.006329,0.029294,0.010643,0.015904,-0.002466,0.034663,-0.021732,-0.022537,-0.004006,-0.028191,0.0045,0.019761,-0.032632,-0.021575,-0.014104,-0.001513,0.009753,0.000784,0.012888,0.012114,-0.005169,-0.017907,-0.002717,0.014629,0.001293,-0.013155,-0.006749,-0.023866
4,0.0,0.0,0.0,0,20,0.248577,-0.007308,-0.034032,0.181913,0.01626,-0.141397,0.045264,0.031535,0.030507,-0.051538,0.000951,0.049959,0.000232,0.012322,-0.057142,0.032879,0.014279,-0.026335,0.030722,-0.015273,-0.056095,-0.033212,0.05044,-0.04432,-0.138048,-0.014345,-0.003363,0.112113,0.045593,0.042528,-0.137881,-0.049406,0.204223,-0.008522,0.112235,0.052798,0.068653,-0.278318,0.225039,-0.239364,0.26965,0.472366,0.163887,-0.056556,0.018676,-0.01144,-0.011665,-0.004563,0.064799,0.118672,-0.117262,-0.000445,-0.027707,0.095094,-0.047472,-0.039532,-0.069672,-0.014789,-0.067073,-0.051078,0.210602,0.110922,0.068296,-0.023899,-0.019637,-0.009189,-0.002675,-0.02682,-0.013244,-0.006928,0.00098,-0.009,-0.008487,0.024142,-0.004757,0.003877,0.020473,-0.014544,-0.0209,-0.010605,-0.017681,-0.000177,-0.020358,-0.027737,0.006718,-0.001737,-0.020484,0.013045,-0.044039,0.000169,0.025802,-0.015413,-0.032385,0.002869,-0.004836,0.008223,0.013479,0.010094,0.002201,-0.016075,-0.01895,0.008299,-0.013228,-0.023833,-0.001235,-0.003399,-0.002534,-0.002719,-0.01821,0.002006,0.022078,0.021336,-0.007183,-0.009185,-0.000561,-0.000245,0.009383,0.010845,-0.017489,0.008621,-0.007556,-0.012606,0.034482,0.028747,-0.000742,0.009261,-0.007406,0.0055,-0.020357,0.003098,0.00259,0.013089,-0.006494,0.022342,0.000353,0.006716,-0.000767,-0.004993,0.01301,-0.005605,-0.008012,-0.008571,-0.016421,-0.015465,0.019049,0.002822,-0.001139,0.007766,0.008767,0.01456,-0.001722,0.00168,-0.002878,0.02067,0.001881,-0.018206,-0.010539,-0.004258,0.012225,0.003192,0.00543,0.010159,-0.005186,-0.017968,-0.021036,-0.007632,-0.014311,0.000464,-0.003472,0.004186,0.005173,0.020565,-0.008647,0.009953,0.008354,0.003849,0.010962,0.020373,-0.012678,0.010075,0.022388,0.007608,0.010584,-0.026869,-0.012212,0.006113,-0.006023,0.012149,0.002068,-0.019694,-0.012516,0.008792,0.002004,0.00236,-0.003475,-0.005724,0.001057,-0.007613,0.002875,-0.011096,0.005296,-0.002356,-0.001164,0.00956,0.001118,-0.010613,-0.00693,0.004855,0.011469,0.002689,0.005779,0.00282,0.005615,-0.00279,0.004123,-0.005942,-0.003167,-0.003027,0.00162,-0.00851,0.01342,-0.005649,0.004084,0.008763,0.001535,-0.00627,0.002637,-0.006489,-0.00763,0.004364,0.012935,-0.020978,0.003128,0.004197,0.010203,-0.016055,-0.00212,-0.005788,-0.004107,-0.004241,0.00725,-0.022371,0.00095,0.006488,0.004567,0.001723,0.001113,-0.011104,-0.009353,-0.007819,0.001693,0.001925,-0.001396,0.004186,0.002986,-0.014541,-0.005928,-0.013714,0.002292,0.00279,0.001587,-0.005309,-0.002249,0.000506,-0.016192,-0.006563,0.022283,-0.004649,0.001594,-0.005512,-0.006816,0.000253,-0.011272,-0.00177,0.006049,0.003471,-0.004407,-0.004695,0.003535,0.00798,-0.002035,-0.005475,-0.005105,0.011568,0.003267,0.002879,4.7e-05,-0.005477,-0.001456,0.004835,-0.002899,-0.002566,-0.000134,-0.004104,-0.020138,0.003054,0.007063,0.003258,0.008193,-0.004893


In [24]:
# Prepare and scale data
np_train = np.array(tr_train)
np_val = np.array(tr_val)
np_test = np.array(tr_test)

tr_target_clip = np.clip(np.array(tr_target), 0, 20)
val_target_clip = np.clip(np.array(val_target), 0, 20)

In [33]:
tr_train.describe()

Unnamed: 0,shop_id,item_id,item_cnt_day_8_x,item_cnt_day_9_x,item_cnt_day_10_x,item_cnt_day_11_x,item_cnt_day_12_x,item_cnt_day_13_x,item_cnt_day_14_x,item_cnt_day_15_x,item_cnt_day_16_x,item_cnt_day_17_x,item_cnt_day_18_x,item_cnt_day_19_x,item_cnt_day_20_x,item_cnt_day_21_x,item_cnt_day_22_x,item_cnt_day_23_x,item_cnt_day_24_x,item_cnt_day_25_x,item_cnt_day_26_x,item_cnt_day_27_x,item_cnt_day_28_x,item_cnt_day_29_x,item_cnt_day_30_x,item_cnt_day_31_x,mean_cnt_1,mean_cnt_2,mean_cnt_3,mean_cnt_4,mean_cnt_5,mean_cnt_6,mean_cnt_7,mean_cnt_8,mean_cnt_9,mean_cnt_10,mean_cnt_11,mean_cnt_12,mean_cnt_13,mean_cnt_14,mean_cnt_15,mean_cnt_16,mean_cnt_17,mean_cnt_18,mean_cnt_19,mean_cnt_20,mean_cnt_21,mean_cnt_22,mean_cnt_23,mean_cnt_24,"('item_cnt_day_8', 'sum')_x","('item_cnt_day_9', 'sum')_x","('item_cnt_day_10', 'sum')_x","('item_cnt_day_11', 'sum')_x","('item_cnt_day_12', 'sum')_x","('item_cnt_day_13', 'sum')_x","('item_cnt_day_14', 'sum')_x","('item_cnt_day_15', 'sum')_x","('item_cnt_day_16', 'sum')_x","('item_cnt_day_17', 'sum')_x","('item_cnt_day_18', 'sum')_x","('item_cnt_day_19', 'sum')_x","('item_cnt_day_20', 'sum')_x","('item_cnt_day_21', 'sum')_x","('item_cnt_day_22', 'sum')_x","('item_cnt_day_23', 'sum')_x","('item_cnt_day_24', 'sum')_x","('item_cnt_day_25', 'sum')_x","('item_cnt_day_26', 'sum')_x","('item_cnt_day_27', 'sum')_x","('item_cnt_day_28', 'sum')_x","('item_cnt_day_29', 'sum')_x","('item_cnt_day_30', 'sum')_x","('item_cnt_day_31', 'sum')_x","(item_cnt_day_8_y, sum)","(item_cnt_day_9_y, sum)","(item_cnt_day_10_y, sum)","(item_cnt_day_11_y, sum)","(item_cnt_day_12_y, sum)","(item_cnt_day_13_y, sum)","(item_cnt_day_14_y, sum)","(item_cnt_day_15_y, sum)","(item_cnt_day_16_y, sum)","(item_cnt_day_17_y, sum)","(item_cnt_day_18_y, sum)","(item_cnt_day_19_y, sum)","(item_cnt_day_20_y, sum)","(item_cnt_day_21_y, sum)","(item_cnt_day_22_y, sum)","(item_cnt_day_23_y, sum)","(item_cnt_day_24_y, sum)","(item_cnt_day_25_y, sum)","(item_cnt_day_26_y, sum)","(item_cnt_day_27_y, sum)","(item_cnt_day_28_y, sum)","(item_cnt_day_29_y, sum)","(item_cnt_day_30_y, sum)","(item_cnt_day_31_y, sum)",city,item_category_id,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,256,257,258,259,260,261,262,263,264,265,266,267,268,269,270,271,272,273,274,275,276,277,278,279,280,281,282,283,284,285,286,287,288,289,290,291,292,293,294,295,296,297,298,299
count,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0,214200.0
mean,31.642857,11019.398627,0.205892,0.185588,0.207502,0.318478,0.195868,0.200019,0.229104,0.172759,0.199748,0.208492,0.206004,0.247241,0.225037,0.248922,0.3238,0.485752,0.315481,0.254888,0.256083,0.275196,0.263193,0.241839,0.244188,0.278137,0.278137,0.261162,0.254689,0.256839,0.260433,0.259802,0.25914,0.266126,0.290444,0.293914,0.289793,0.284345,0.281523,0.27613,0.271522,0.267125,0.261462,0.259688,0.256525,0.253644,0.256649,0.254498,0.251423,0.249572,1049.959473,998.276001,1018.826233,1169.863647,1135.01062,1116.012085,1124.266113,1094.365967,1085.666138,1083.330078,1080.262329,1095.115479,1097.595825,1111.059448,1146.807007,1230.613281,1253.113525,1256.572266,1257.838867,1265.519287,1268.906494,1266.547241,1266.758545,1273.731445,8.647451,8.221079,8.385884,9.633333,9.352136,9.193261,9.254521,9.004853,8.936614,8.918441,8.894468,9.018391,9.0517,9.152275,9.448846,10.13263,10.315984,10.337748,10.35976,10.419834,10.449942,10.436208,10.428407,10.48089,13.5,46.309608,0.06236285,0.027285,0.01508,0.016582,0.01355,0.016627,0.006135,-0.006313,0.010062,0.006813,0.01383,-0.011715,-0.004865,0.009641,-0.005039,-0.001772,0.002497,-0.000688,0.003488,0.001935,0.003719,0.003821,0.002663,0.000955,-0.00232,0.00266,0.008203,-0.004709,0.00193,0.005241,-0.002733,0.006766,0.002801,-0.000349,-0.000102,0.003506,0.003218,-0.005472,0.00352,0.005287,-0.00224,-0.000336,0.00398,0.00756,0.001315,-0.001873,0.001791,-0.000495,-0.001218,0.002597,-0.00257,0.001188,0.002129,0.00157,0.000202,0.000302,0.000842,0.003617,-0.000498,-0.004133,0.001987,0.004129,0.000348,-0.000493,0.000857,-0.000215,0.001428,-0.006545,0.000243,1.4e-05,0.001999,-0.0003801265,-0.001604,-0.002039059,0.001377,-0.001239,0.000929,0.000559,-0.00199,-0.002132915,-0.002115,-0.000893,0.000319,-0.001,0.001548541,0.000804,0.002821,-0.000229,-0.0009340325,-0.0006074462,-0.001339,-0.000671,-0.001275,0.000665,-0.001669,-0.001309,0.000755,-0.000969,0.002296,0.000128,0.003748,-0.000175,0.000158,-0.001404,0.001973,0.000264,-0.001775,-0.002605,0.002265,-0.001837,-0.000987,0.000459,-0.001626,-0.000951,0.001506,0.000394,-0.000179,-0.000157,-0.000576,-0.001138,0.000601,0.001231,0.001878,-6.9e-05,0.000373,4e-06,-0.000208,-0.000748,2.8e-05,-0.001614,0.000887,-0.000134,0.000704,0.001111754,0.000353,0.001341,0.000762,-4.218676e-05,-0.000346,0.0003715487,0.000243,0.000905,0.000612,-0.000872,-0.001143,-0.000502,-0.000321,0.0001659711,0.001388,-0.000139,-0.00129,-0.00022,-4.8e-05,0.001317,0.0009289046,0.000656,-0.000719,0.000731,-0.001307,0.001808,-0.000391,-0.001037201,0.000708,0.000409,-0.00252,-0.000971,0.000198,-4e-06,-0.000617,0.000866,-0.000725,-4e-06,-2.4e-05,0.000772,0.001289,-0.001004,-0.000487,-0.000632,-0.00026,0.000518,0.00124,-0.000385,4.9e-05,-0.000335,-0.000365,-0.00095,0.000475,0.000946,0.00017,7.8e-05,0.001068,-8.8e-05,0.000233,-0.000912,0.000771,0.0001787039,-0.000718,-0.001072,-0.001167,-0.000955,0.000107,-0.000433,0.0002025018,0.000579,-0.000519,0.000354,0.001137522,-0.0004,0.000638,-0.000296,9.9e-05,-0.000498,0.00057,-5.3324e-07,0.001072,-0.000414,-0.000497,-0.000645,-0.000773,0.000589,8.7e-05,0.00021,-0.000118,0.000108,-0.000472,-8.9e-05,-0.000104,0.00014,0.000624,0.000345,-0.000352,-9.1e-05,0.000379,0.0001004188,0.0009679207,-0.000284,0.001135,0.000637,-0.000462,0.000633,-0.000354,0.000377,0.000393,1.543692e-05,0.000775,0.000714,0.000306,0.00017,0.000327,0.0004,0.0003,2.2e-05,0.00023,0.000922,-0.000466,-0.000167,-0.000429,0.000173,0.000351,-0.00024,-0.000554,-0.00059,-0.000771,-0.000166,0.000463,0.000793,-0.0002,0.00087,-0.000268,-4.4e-05,-0.000181,0.000344,0.00018,1.9e-05,4.1e-05,0.000725,0.001146,-0.000588,0.000386,-8.3e-05,-0.000502,0.000395,0.000509,0.000303,0.000386,-0.000657,-0.000303,0.000309,0.000311,-0.000407,-0.000503,-0.000488,-0.000548,-2.1e-05,-0.000256,-0.000309,0.000403,-0.000587,-0.000329,0.000981
std,17.561933,6252.64459,4.710657,3.827024,4.099517,5.561073,3.108006,3.122812,3.477254,2.460417,2.758149,3.115632,2.651091,2.826971,2.993169,3.034121,4.221527,5.549481,4.070988,1.879012,1.7232,4.113914,3.82317,2.283569,2.143467,2.147027,2.147027,2.063035,2.080431,2.317985,2.443393,2.168737,2.038548,2.162386,2.480726,2.590768,2.595588,2.598966,2.584397,2.560132,2.56785,2.545215,2.515104,2.527154,2.52832,2.536433,2.649615,2.6942,2.718541,2.739775,898.86322,852.496033,857.278809,985.196533,950.874817,925.290833,915.569214,890.959106,871.638062,864.312805,850.656677,854.009033,851.911804,859.271484,880.651123,945.285522,961.912903,961.790222,961.920288,967.571045,969.282166,969.593384,966.891663,966.334656,130.073685,113.770996,106.676331,115.914604,108.761002,102.953781,100.393745,95.700043,92.341171,91.044128,89.00042,87.755341,86.841248,86.038887,85.983406,89.372726,89.67382,86.99028,83.991905,82.682266,81.199722,79.702232,78.270706,77.161003,8.071869,16.716581,0.1021335,0.085095,0.074598,0.087649,0.060158,0.078732,0.072514,0.067037,0.05755,0.066059,0.070989,0.059189,0.051389,0.07012,0.053349,0.053035,0.051558,0.047115,0.046915,0.047435,0.049889,0.046795,0.040751,0.040499,0.047683,0.045653,0.046457,0.043032,0.037606,0.048667,0.043319,0.054581,0.044767,0.037983,0.04722,0.030395,0.053634,0.031052,0.025904,0.037666,0.035776,0.036686,0.050137,0.044421,0.042952,0.036624,0.039718,0.037741,0.034091,0.035334,0.034888,0.036953,0.036748,0.033718,0.032203,0.033489,0.033231,0.042397,0.037114,0.034174,0.041524,0.038361,0.030492,0.031107,0.032708,0.033823,0.029574,0.033025,0.034828,0.038385,0.036645,0.03374377,0.038826,0.03619954,0.023175,0.032934,0.029121,0.029493,0.028888,0.03650422,0.03131,0.030485,0.032162,0.033295,0.03293476,0.029987,0.032587,0.031538,0.03351695,0.0313766,0.029834,0.033633,0.030097,0.030407,0.030734,0.0338,0.033772,0.031872,0.034106,0.029252,0.038073,0.031084,0.034288,0.031202,0.027155,0.030539,0.031511,0.029986,0.030654,0.029192,0.027012,0.027438,0.029944,0.028505,0.029809,0.028615,0.028219,0.030802,0.030845,0.0277,0.028812,0.029856,0.030057,0.028663,0.028036,0.029215,0.028171,0.028666,0.02702,0.026057,0.025895,0.030687,0.026371,0.02958217,0.029208,0.029765,0.027736,0.02995047,0.029526,0.02757172,0.028904,0.026368,0.028259,0.028583,0.028442,0.02706,0.025384,0.0262111,0.02576,0.025837,0.028445,0.032931,0.02748,0.031185,0.03083299,0.026772,0.026079,0.02737,0.027875,0.030852,0.025109,0.03015174,0.027285,0.02763,0.027735,0.025921,0.027381,0.026832,0.025893,0.026123,0.027474,0.026161,0.024788,0.027907,0.0279,0.026629,0.028148,0.027156,0.027755,0.027175,0.025998,0.025816,0.028165,0.026893,0.025582,0.026365,0.025387,0.026006,0.024786,0.027207,0.027606,0.026935,0.027674,0.025179,0.026774,0.02376791,0.024598,0.025243,0.024072,0.02453,0.022732,0.023399,0.02475058,0.025391,0.021847,0.023678,0.02668916,0.023995,0.023916,0.024642,0.0258,0.026045,0.024861,0.02707694,0.026585,0.02551,0.026715,0.02923,0.025436,0.026431,0.025138,0.026523,0.02736,0.024691,0.02441,0.024375,0.024072,0.023679,0.024406,0.02341,0.024996,0.023965,0.024527,0.02317989,0.02437925,0.023694,0.024877,0.023616,0.023959,0.024051,0.022763,0.023612,0.021335,0.02286943,0.023395,0.023517,0.023737,0.022938,0.022805,0.023405,0.02205,0.023302,0.021293,0.022836,0.021917,0.022744,0.02209,0.021741,0.024066,0.02132,0.021209,0.022381,0.022201,0.022707,0.023262,0.021113,0.022651,0.021544,0.021487,0.021401,0.021496,0.021403,0.022,0.020043,0.020552,0.020115,0.021342,0.021674,0.020928,0.020487,0.021258,0.020411,0.019709,0.020827,0.020435,0.021597,0.019656,0.020526,0.020309,0.021559,0.021248,0.020052,0.01985,0.020572,0.020864,0.02063,0.020328,0.020089,0.020778,0.019843
min,2.0,30.0,-1.0,-1.0,-4.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-2.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-0.5,-0.333333,-0.25,-0.2,-0.166667,-0.142857,-0.125,0.0,-0.1,-0.090909,-0.083333,-0.076923,-0.071429,-0.066667,-0.0625,-0.058824,-0.055556,-0.052632,-0.05,-0.047619,-0.045455,-0.043478,-0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,-0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.912114e-13,-0.022265,-0.083527,-0.264782,-0.097811,-0.284908,-0.414451,-0.368758,-0.24206,-0.142574,-0.168624,-0.349857,-0.247845,-0.110569,-0.283153,-0.31607,-0.163205,-0.255404,-0.228924,-0.32713,-0.244233,-0.230541,-0.203561,-0.268361,-0.210641,-0.137076,-0.120628,-0.22159,-0.279722,-0.232574,-0.15058,-0.093624,-0.265342,-0.196232,-0.295627,-0.129271,-0.198366,-0.278318,-0.156219,-0.257172,-0.243514,-0.185707,-0.201181,-0.19051,-0.1454,-0.270257,-0.127899,-0.27509,-0.15285,-0.149856,-0.219183,-0.176877,-0.172814,-0.233451,-0.27536,-0.23854,-0.118572,-0.231849,-0.228954,-0.219096,-0.193187,-0.22066,-0.196092,-0.189033,-0.221121,-0.206887,-0.241755,-0.233387,-0.259656,-0.305022,-0.24551,-0.193808,-0.323167,-0.3270242,-0.063532,-0.303067,-0.210255,-0.297328,-0.212637,-0.2501189,-0.243767,-0.212384,-0.22703,-0.203298,-0.1098967,-0.190569,-0.200557,-0.255192,-0.1929319,-0.1788609,-0.220277,-0.173911,-0.169579,-0.160781,-0.160985,-0.214383,-0.180713,-0.208545,-0.177562,-0.171499,-0.184639,-0.218102,-0.199449,-0.215427,-0.164463,-0.219402,-0.159684,-0.215296,-0.170348,-0.194875,-0.230352,-0.197382,-0.18223,-0.162741,-0.26947,-0.178206,-0.176519,-0.220253,-0.194011,-0.191412,-0.154488,-0.221289,-0.161505,-0.150858,-0.149774,-0.201629,-0.208745,-0.125965,-0.127681,-0.243109,-0.217596,-0.193565,-0.296007,-0.1905611,-0.246798,-0.287098,-0.182249,-0.1885376,-0.138853,-0.1906006,-0.1527,-0.14355,-0.191409,-0.218093,-0.169344,-0.209815,-0.185921,-0.1746576,-0.150281,-0.238194,-0.253455,-0.1421,-0.179328,-0.140322,-0.2181654,-0.189492,-0.135567,-0.160798,-0.178139,-0.218665,-0.128696,-0.2612485,-0.194569,-0.213368,-0.160708,-0.178498,-0.165015,-0.20341,-0.162662,-0.191237,-0.184907,-0.359281,-0.146631,-0.188593,-0.167064,-0.199886,-0.170239,-0.223957,-0.144302,-0.147099,-0.149231,-0.142227,-0.152926,-0.169573,-0.192225,-0.188906,-0.111433,-0.166496,-0.20509,-0.182261,-0.207714,-0.163156,-0.140531,-0.178416,-0.219794,-0.2118246,-0.134509,-0.142458,-0.178105,-0.180538,-0.205968,-0.151166,-0.2259298,-0.198891,-0.109358,-0.166328,-0.1339165,-0.193576,-0.178294,-0.218585,-0.171839,-0.175737,-0.185253,-0.1367344,-0.199883,-0.173865,-0.204081,-0.282942,-0.311162,-0.226912,-0.124054,-0.179668,-0.193629,-0.230475,-0.125686,-0.162213,-0.151037,-0.166174,-0.248841,-0.145789,-0.201838,-0.189639,-0.243747,-0.1382621,-0.2163302,-0.150489,-0.218287,-0.185389,-0.121048,-0.124696,-0.138747,-0.101852,-0.219216,-0.2036059,-0.123685,-0.151706,-0.162868,-0.174707,-0.173819,-0.160265,-0.193415,-0.148153,-0.207287,-0.122848,-0.155467,-0.128418,-0.163295,-0.143632,-0.152918,-0.100355,-0.143594,-0.136906,-0.152658,-0.118139,-0.133957,-0.115834,-0.162425,-0.177609,-0.119063,-0.158678,-0.172283,-0.125846,-0.208189,-0.119077,-0.16384,-0.114122,-0.15309,-0.128322,-0.179219,-0.14524,-0.176913,-0.172372,-0.101945,-0.143441,-0.10586,-0.127946,-0.125965,-0.122633,-0.11715,-0.179413,-0.135299,-0.130113,-0.120286,-0.15765,-0.115394,-0.12075,-0.120835,-0.125967,-0.131226,-0.101761
25%,16.0,5381.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,469.0,546.0,602.0,672.5,653.200012,642.5,656.285706,641.875,645.555542,654.400024,660.727295,661.083313,670.769226,679.357117,714.06665,766.3125,775.705872,776.055542,775.894714,786.5,789.904785,786.636353,794.347839,811.708313,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.173913,0.291667,7.0,37.0,0.002513749,2.3e-05,-0.010407,-0.002936,-0.002513,-0.002699,-0.004959,-0.014388,-0.007261,-0.013443,-0.010228,-0.019964,-0.018797,-0.003229,-0.015363,-0.016595,-0.01607,-0.012283,-0.013885,-0.012151,-0.012981,-0.010315,-0.011145,-0.014109,-0.022171,-0.010286,-0.007297,-0.017441,-0.006906,-0.009639,-0.014856,-0.012286,-0.011007,-0.008797,-0.009056,-0.004791,-0.007358,-0.013699,-0.004398,-0.00674,-0.014028,-0.011701,-0.008596,-0.006872,-0.012796,-0.013036,-0.009267,-0.01142,-0.011698,-0.009484,-0.013664,-0.013754,-0.014921,-0.009443,-0.007199,-0.009058,-0.005428,-0.009453,-0.012306,-0.012704,-0.012004,-0.008875,-0.010066,-0.009854,-0.011415,-0.010281,-0.00581,-0.012167,-0.010122,-0.006196,-0.007822,-0.01164559,-0.008901,-0.007650371,-0.005622,-0.008873,-0.008335,-0.009756,-0.011562,-0.01011538,-0.014764,-0.009587,-0.010424,-0.008699,-0.009831874,-0.008752,-0.005686,-0.011422,-0.01122518,-0.01003744,-0.006212,-0.009065,-0.010152,-0.008997,-0.012529,-0.00955,-0.010465,-0.009627,-0.009379,-0.010079,-0.009409,-0.007778,-0.00937,-0.011176,-0.007523,-0.007114,-0.011883,-0.011664,-0.008446,-0.009407,-0.008337,-0.005606,-0.008976,-0.010393,-0.007403,-0.00685,-0.010472,-0.008582,-0.009739,-0.009942,-0.0108,-0.008305,-0.0078,-0.00991,-0.008547,-0.009662,-0.009218,-0.010111,-0.009034,-0.009581,-0.006473,-0.007596,-0.00844,-0.008790134,-0.009666,-0.009522,-0.00931,-0.008394296,-0.009093,-0.00863523,-0.008913,-0.007473,-0.007901,-0.009662,-0.010826,-0.009345,-0.00813,-0.007809194,-0.007538,-0.007822,-0.009635,-0.008766,-0.008992,-0.009673,-0.006840822,-0.008203,-0.009201,-0.009504,-0.010956,-0.006919,-0.010075,-0.00927562,-0.007772,-0.009794,-0.012955,-0.010526,-0.008171,-0.007031,-0.008558,-0.008403,-0.007524,-0.006644,-0.007789,-0.007059,-0.007705,-0.008996,-0.010258,-0.009502,-0.007692,-0.007105,-0.007901,-0.009372,-0.009136,-0.010336,-0.00738,-0.008817,-0.009617,-0.008516,-0.008104,-0.009819,-0.007509,-0.009085,-0.008818,-0.007924,-0.008129,-0.007780153,-0.010079,-0.010233,-0.007924,-0.009859,-0.006328,-0.008177,-0.007173575,-0.006889,-0.007924,-0.007493,-0.008448326,-0.00813,-0.008701,-0.008528,-0.007606,-0.008441,-0.008493,-0.007760615,-0.007242,-0.007805,-0.009611,-0.007953,-0.008507,-0.007283,-0.008561,-0.007257,-0.009038,-0.007099,-0.009023,-0.008558,-0.008032,-0.007954,-0.00785,-0.007846,-0.010094,-0.00895,-0.007259,-0.008708923,-0.008676386,-0.008956,-0.008014,-0.008881,-0.010573,-0.008747,-0.009418,-0.009486,-0.007752,-0.008349135,-0.00863,-0.007961,-0.008961,-0.009487,-0.007971,-0.008606,-0.0073,-0.009301,-0.007626,-0.008687,-0.007269,-0.009432,-0.008717,-0.007898,-0.007929,-0.008654,-0.009034,-0.00993,-0.008603,-0.008477,-0.008935,-0.007583,-0.008394,-0.00718,-0.009626,-0.009334,-0.008201,-0.008975,-0.008122,-0.008666,-0.007718,-0.007706,-0.007669,-0.008757,-0.007231,-0.007744,-0.008878,-0.007332,-0.007527,-0.008123,-0.008735,-0.009043,-0.00828,-0.007645,-0.008477,-0.008735,-0.008956,-0.008432,-0.00916,-0.008503,-0.008462,-0.008457,-0.007471,-0.008146,-0.008556,-0.007458
50%,34.5,11203.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,897.5,876.75,912.5,1066.5,1018.899994,994.5,988.5,970.0,974.333344,985.5,991.590912,991.5,996.153839,1004.214294,1028.033325,1071.78125,1068.235291,1068.111145,1074.842102,1089.5,1094.833374,1091.090942,1082.152161,1086.666687,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.214286,0.466667,0.875,1.058824,1.222222,1.473684,1.75,1.952381,2.181818,2.391304,2.583333,13.5,43.0,0.009277606,0.001337,0.001313,0.000851,0.000817,0.001793,0.001743,-0.001465,0.003129,-0.000599,0.000712,-0.001937,-0.001538,0.000292,-0.000854,3.7e-05,-0.000367,-0.000275,0.000279,-7e-05,0.000383,-7.8e-05,-0.000494,-0.001867,-0.00057,-7.6e-05,0.000877,-0.00339,-0.000262,-4.9e-05,-0.001277,-0.000853,-4.8e-05,0.000407,-0.000886,0.001596,-9.1e-05,-0.001213,0.000885,3e-06,-0.002043,-0.000476,0.000138,0.000449,-0.000646,-0.000152,-0.001566,-0.000826,-0.001265,-6.9e-05,-0.001377,-0.000325,-0.000756,0.00026,-9e-05,-0.002042,-0.001044,-2.4e-05,-0.001564,-0.00285,-0.00168,-0.000576,-0.000956,7.5e-05,-0.001912,-0.001794,0.000157,-0.001645,-0.000518,7.9e-05,0.000804,-1.75244e-08,8.2e-05,-2.971988e-09,-0.0001,-0.001113,-0.000203,-0.001114,-0.000401,-1.887659e-08,-0.002667,-0.001494,-0.001184,-8.5e-05,4.895654e-08,-0.00042,0.001396,-0.000599,-2.040632e-08,-3.232057e-08,0.000653,0.000452,-0.000778,0.000301,-0.001914,-0.000304,0.000195,5.8e-05,-1.9e-05,-0.00072,-0.000857,-4.7e-05,-5.6e-05,-0.001779,0.000228,-5.6e-05,-0.001748,-0.000564,-0.000107,-4.6e-05,-0.000195,-4.8e-05,5.9e-05,-0.000732,5.8e-05,0.000686,-0.001872,-0.000147,0.000525,-0.001013,-0.000301,0.000896,0.000217,-0.000794,-0.000159,-0.000168,-0.001672,-0.000865,0.000676,-0.000614,0.000216,0.000611,0.000906,-1.079629e-07,-0.000508,-0.000551,0.000335,4.288043e-09,-0.001154,1.360898e-09,-0.000253,-0.00013,0.000521,-1.2e-05,-0.000562,-0.000451,-0.000746,2.107723e-08,-0.000252,-2.2e-05,-0.000641,-5.9e-05,-0.000348,-2.2e-05,2.873389e-07,0.000748,-0.000418,-0.000484,-0.00136,4.8e-05,-0.001051,-4.04955e-08,0.000715,-1.6e-05,-0.001765,-0.001577,-0.000364,0.00044,-0.000741,-0.000819,-0.000573,0.000454,-0.00053,0.000928,6.6e-05,-0.0002,-0.000883,8.8e-05,-0.000302,-6.7e-05,-0.000209,0.000142,-0.000579,-0.000853,-9.3e-05,-0.000264,-0.000694,-0.000225,0.000194,-0.000581,0.000238,1.9e-05,0.000211,-0.000295,0.00019,2.042958e-07,-0.000399,-0.001101,-0.000552,-0.000387,-0.000252,0.000212,-2.118034e-07,0.0004,-0.000521,-0.000243,-1.728316e-07,-0.000772,-0.000752,-0.000536,-0.000536,0.00032,0.000228,-0.0005785649,-5.2e-05,-0.001328,-0.00099,0.000821,-0.00046,-8.2e-05,0.000373,0.000182,0.00051,2.4e-05,0.000187,0.000352,-0.000169,0.000348,-0.000252,0.00042,-0.000636,-0.000394,0.000129,6.520761e-07,1.243876e-07,7.7e-05,0.000464,-0.0001,-0.001503,-0.000298,0.000114,-0.000757,6.6e-05,-6.396389e-07,-1.4e-05,0.0002,-0.000165,-0.000623,-2e-06,6.2e-05,-4.2e-05,0.000131,1.5e-05,-0.000524,0.000922,-6.6e-05,0.00096,0.000606,0.001335,-3.9e-05,-0.000551,-0.000822,-0.000301,0.000506,-0.000858,-5.6e-05,4.4e-05,8.5e-05,-0.000572,-0.000202,0.000326,-9.4e-05,0.000626,-2.6e-05,-0.000503,-0.000302,-1.2e-05,0.000544,1e-06,-0.000191,-0.000429,0.000213,1e-06,1e-06,-0.000422,0.000104,-0.000445,-0.000447,-5.6e-05,0.000344,-2e-06,8.4e-05,-0.000345,-0.000495,-0.000149,-3.3e-05,0.000356,-6.2e-05,0.000105,-0.000265
75%,47.0,16071.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.2,0.166667,0.142857,0.125,0.222222,0.2,0.181818,0.166667,0.153846,0.214286,0.2,0.1875,0.176471,0.166667,0.157895,0.15,0.142857,0.181818,0.173913,0.166667,1253.0,1158.0,1149.666626,1318.5,1298.800049,1262.166626,1283.428589,1218.75,1188.111084,1169.400024,1156.272705,1171.5,1164.769287,1161.071411,1194.133301,1274.625,1313.0,1308.555542,1300.157837,1295.099976,1289.619019,1283.136353,1279.739136,1289.25,0.0,1.0,1.333333,2.25,2.4,2.666667,3.0,3.375,3.666667,4.1,4.363636,4.833333,5.153846,5.642857,6.2,7.125,7.485294,7.680555,8.0,8.2625,8.428572,8.556818,8.695652,8.885417,21.0,58.0,0.06024861,0.007156,0.013332,0.011199,0.006919,0.018649,0.023581,0.005691,0.019206,0.005397,0.014499,0.00442,0.005575,0.004887,0.009775,0.008913,0.008757,0.007074,0.023952,0.013582,0.013567,0.010761,0.010733,0.009125,0.004587,0.007276,0.010655,0.004003,0.006632,0.010189,0.006304,0.006938,0.012076,0.010069,0.007373,0.009943,0.006317,0.004758,0.008958,0.00772,0.009733,0.007702,0.006275,0.011867,0.011383,0.011097,0.0079,0.007244,0.007623,0.008348,0.006575,0.011135,0.009684,0.014458,0.009342,0.004943,0.003609,0.008959,0.007174,0.007199,0.007041,0.010996,0.009014,0.007804,0.007996,0.004261,0.008089,0.004385,0.007364,0.006492,0.009301,0.007195955,0.007189,0.008795834,0.003523,0.006958,0.008997,0.00792,0.006274,0.007755399,0.004844,0.006876,0.008681,0.008414,0.009079687,0.009269,0.014839,0.007872,0.009279847,0.007514261,0.009452,0.008169,0.007495,0.009309,0.006295,0.007523,0.010139,0.011033,0.01245,0.007595,0.006747,0.009598,0.006343,0.006401,0.009403,0.007012,0.006155,0.00627,0.010713,0.007157,0.008575,0.008724,0.008045,0.006661,0.011216,0.009423,0.007183,0.007828,0.00804,0.006751,0.00728,0.010027,0.009219,0.007207,0.010099,0.009123,0.006753,0.006504,0.007919,0.008092,0.008424,0.00854,0.009309,0.009570601,0.007979,0.008361,0.00996,0.008470662,0.006079,0.01019304,0.007746,0.006907,0.008743,0.009101,0.007736,0.008669,0.007932,0.007537116,0.009118,0.007477,0.007491,0.00647,0.008613,0.007576,0.007547469,0.010189,0.007337,0.009836,0.008809,0.008935,0.007604,0.009177827,0.010647,0.009169,0.008015,0.006448,0.007549,0.009297,0.007217,0.00776,0.006108,0.007356,0.006452,0.008018,0.008694,0.007255,0.006495,0.007123,0.007076,0.008569,0.00911,0.007967,0.007794,0.008178,0.007295,0.008263,0.008509,0.008695,0.008314,0.008456,0.008046,0.008669,0.008888,0.008046,0.008106,0.007486541,0.007591,0.005907,0.006729,0.008337,0.007651,0.007843,0.008527042,0.008145,0.007056,0.007657,0.008665218,0.008115,0.0086,0.007333,0.007021,0.009738,0.008419,0.006608394,0.007998,0.006572,0.007282,0.008768,0.006983,0.007573,0.008624,0.0092,0.008451,0.00844,0.007935,0.009183,0.007944,0.007853,0.008166,0.00932,0.00828,0.00843,0.007997,0.009425097,0.009526388,0.007691,0.01003,0.008016,0.006988,0.008572,0.008523,0.009765,0.008294,0.008373956,0.010157,0.008986,0.008538,0.008285,0.007642,0.008262,0.007958,0.007686,0.007252,0.008781,0.008245,0.00874,0.008948,0.008429,0.011465,0.008108,0.007145,0.007431,0.007777,0.009518,0.00853,0.009212,0.008869,0.009216,0.008297,0.008139,0.008695,0.008831,0.008179,0.007858,0.007567,0.0081,0.009035,0.008499,0.008465,0.007209,0.00761,0.008502,0.009059,0.008678,0.007989,0.0083,0.006992,0.0076,0.009533,0.008032,0.008013,0.007925,0.00819,0.008024,0.007541,0.009062,0.008527,0.007501,0.008346,0.009126
max,59.0,22167.0,950.0,978.0,989.0,1305.0,899.0,941.0,776.0,597.0,602.0,771.0,563.0,591.0,639.0,634.0,772.0,1209.0,1000.0,257.0,174.0,813.0,742.0,444.0,482.0,436.0,436.0,459.0,450.666656,453.0,458.600006,382.166656,353.285706,402.0,491.666656,519.700012,530.090881,539.166687,543.15387,542.071411,547.599976,551.0,553.705872,566.055542,585.78949,601.450012,634.952393,651.045471,665.260864,677.125,4596.0,4474.5,4542.666504,5188.0,5039.600098,4940.5,4923.285645,4828.0,4737.888672,4677.799805,4596.36377,4617.416504,4641.846191,4694.143066,4828.200195,5182.375,5258.706055,5256.277832,5248.263184,5278.75,5273.0,5264.59082,5253.913086,5248.041504,6233.0,6216.0,6413.0,7354.5,6984.399902,6655.166504,6561.0,6290.875,6098.888672,6046.399902,5928.818359,5857.166504,5817.692383,5782.856934,5789.133301,6035.1875,6063.82373,5876.444336,5664.473633,5572.799805,5472.571289,5372.727051,5278.695801,5205.958496,27.0,83.0,0.5194656,0.9393,0.671426,0.508574,0.433406,0.46733,0.483864,0.41151,0.331279,0.620031,0.501418,0.339843,0.322409,0.576689,0.445069,0.407029,0.405914,0.494606,0.366774,0.360423,0.315222,0.391118,0.338613,0.365446,0.317082,0.406244,0.390945,0.267507,0.515324,0.505596,0.500769,0.54442,0.389996,0.35623,0.427338,0.264605,0.628837,0.347109,0.432026,0.458826,0.353915,0.48032,0.385503,0.324304,0.4209,0.298662,0.394892,0.446718,0.359529,0.438214,0.295252,0.240809,0.326487,0.285234,0.412878,0.389221,0.526711,0.315183,0.350245,0.37368,0.341108,0.297417,0.180244,0.318132,0.397066,0.301614,0.285616,0.3938,0.342578,0.559022,0.458124,0.3126087,0.404596,0.1863368,0.323352,0.248603,0.294947,0.200756,0.266079,0.3368303,0.226705,0.335219,0.256866,0.246929,0.3031232,0.217368,0.238525,0.233949,0.2642569,0.1933803,0.223732,0.268949,0.28892,0.305372,0.245448,0.332231,0.245863,0.200075,0.260222,0.271909,0.382508,0.266055,0.299135,0.221357,0.286524,0.536586,0.334615,0.164725,0.23228,0.245286,0.226831,0.29073,0.184224,0.329508,0.209585,0.223966,0.241003,0.229037,0.243602,0.255117,0.237201,0.252283,0.293417,0.417586,0.340458,0.324459,0.301867,0.338576,0.222729,0.335321,0.498164,0.354723,0.284955,0.1909822,0.251182,0.410183,0.240343,0.4158529,0.320804,0.2227246,0.222529,0.49678,0.189708,0.169312,0.227467,0.241769,0.205678,0.2271364,0.242405,0.300437,0.238889,0.364209,0.302856,0.357256,0.4306874,0.163808,0.369081,0.178244,0.208783,0.318642,0.352827,0.2235273,0.257082,0.233785,0.175788,0.303044,0.289831,0.278601,0.472701,0.380148,0.642135,0.489445,0.522377,0.431319,0.222427,0.336549,0.317146,0.341333,0.523396,0.326221,0.207049,0.205503,0.187045,0.192324,0.21013,0.26864,0.355062,0.176774,0.167651,0.20438,0.247916,0.179889,0.239175,0.203701,0.282516,0.2628181,0.170921,0.198616,0.301123,0.211715,0.186119,0.165263,0.2346242,0.230493,0.275734,0.192262,0.2153724,0.151485,0.252471,0.183477,0.208657,0.236225,0.206049,0.3670191,0.324765,0.40089,0.350212,0.290633,0.327314,0.261503,0.194905,0.241418,0.203209,0.242898,0.248898,0.206767,0.20408,0.216066,0.249006,0.186868,0.206267,0.30341,0.336939,0.1828709,0.2799688,0.244238,0.25667,0.175199,0.211757,0.20133,0.169726,0.173755,0.173015,0.2111876,0.149826,0.166086,0.226978,0.178143,0.212368,0.126469,0.214786,0.19621,0.126093,0.237609,0.131128,0.178428,0.192587,0.208148,0.155626,0.195051,0.234973,0.158408,0.1762,0.17093,0.252043,0.181846,0.329837,0.22866,0.146926,0.160406,0.164493,0.161022,0.21386,0.195949,0.1653,0.181864,0.206984,0.117036,0.129561,0.156964,0.15335,0.152944,0.193638,0.144381,0.211967,0.149766,0.187924,0.147007,0.10615,0.169828,0.202824,0.112587,0.117921,0.200056,0.166945,0.111669,0.215543,0.133241,0.133338,0.147305


In [34]:
# Apply log transformation to the arrays
np_train[:,:100] = np.log1p(np_train[:,:100])
np_val[:,:100] = np.log1p(np_val[:,:100])
np_test[:,:100] = np.log1p(np_test[:,:100])

In [43]:
# Convert nans to numbers
np_train = np.nan_to_num(np_train)
np_val = np.nan_to_num(np_val)
np_test = np.nan_to_num(np_test)

In [49]:
pd.DataFrame(np_train).to_csv('np_train.csv')

In [50]:
np_train.shape

(214200, 400)

In [51]:
# Scale feature matrices
scaler = StandardScaler()
scaler.fit(np_train)

np_train = scaler.transform(np_train)
np_val = scaler.transform(np_val)
np_test = scaler.transform(np_test)

In [52]:
np_train[:5, :]

array([[-1.83583904, -0.54229122,         nan, ...,  2.74524398,
        -2.67678114,  0.48958367],
       [-1.83583904, -0.4833661 ,         nan, ..., -0.24552048,
         0.32794862, -0.37943567],
       [-1.83583904, -0.50114049,         nan, ..., -0.02828303,
         0.73543028, -0.62988265],
       [-1.83583904, -0.5013465 ,         nan, ..., -0.62564011,
        -0.30898175, -1.25221431],
       [-1.83583904, -0.49395461,         nan, ...,  0.1913787 ,
         0.41013463, -0.29602646]])