In [70]:
from  datetime import datetime, timedelta
import gc
import numpy as np, pandas as pd
import lightgbm as lgb

In [71]:
pd.options.display.max_columns = 50

In [72]:
h = 28
max_lags = 57
tr_last = 1913
#预测的第一天
fday = datetime(2016, 4, 25)

In [73]:
#verbose=True表示显示时出现详细信息 warning之类的
def reduce_mem_usage(df, verbose=True):
    numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
    #iterate through all the columns of a dataframe and modify the data type to reduce memory usage.
    #df.memory_usage()返回每一列的内存，通过sum()算出总内存，除完后结果是xxMB
    start_mem = df.memory_usage().sum() / 1024**2    
    for col in df.columns:
        col_type = df[col].dtypes
        if col_type in numerics: 
            c_min = df[col].min()
            c_max = df[col].max()
            #[:3]是把'int1XX'后面的数字去掉
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)    
    end_mem = df.memory_usage().sum() / 1024**2
    if verbose: print('Mem. usage decreased to {:5.2f} Mb ({:.1f}% reduction)'.format(end_mem, 100 * (start_mem - end_mem) / start_mem))
    return df

In [74]:
CAL_DTYPES={"event_name_1": "category", "event_name_2": "category", "event_type_1": "category", 
         "event_type_2": "category", "weekday": "category", 'wm_yr_wk': 'int16', "wday": "int16",
        "month": "int16", "year": "int16", "snap_CA": "float32", 'snap_TX': 'float32', 'snap_WI': 'float32' }
PRICE_DTYPES = {"store_id": "category", "item_id": "category", "wm_yr_wk": "int16","sell_price":"float32" }

In [75]:
def create_dt(is_train=True, nrows=None, first_day=1200):
    INPUT_DIR_PATH = '/Users/yanzeliu/Downloads/m5-forecasting-accuracy/'
    prices = pd.read_csv(INPUT_DIR_PATH + 'sell_prices.csv', dtype=PRICE_DTYPES)
    for col, col_dtype in PRICE_DTYPES.items():
        if col_dtype == "category":
            prices[col] = prices[col].cat.codes.astype("int16")
            prices[col] -= prices[col].min()
    
    cal = pd.read_csv(INPUT_DIR_PATH + 'calendar.csv', dtype = CAL_DTYPES)
    cal["date"] = pd.to_datetime(cal["date"])
    for col, col_dtype in CAL_DTYPES.items():
        if col_dtype == "category":
            cal[col] = cal[col].cat.codes.astype("int16")
            cal[col] -= cal[col].min()
    
    start_day = max(1 if is_train  else tr_last-max_lags, first_day) #?
    numcols = [f"d_{day}" for day in range(start_day,tr_last+1)]
    catcols = ['id', 'item_id', 'dept_id','store_id', 'cat_id', 'state_id']
    dtype = {numcol:"float32" for numcol in numcols} 
    dtype.update({col: "category" for col in catcols if col != "id"})
    
    dt = pd.read_csv(INPUT_DIR_PATH + 'sales_train_validation.csv', 
                     nrows = nrows, usecols = catcols + numcols, dtype = dtype)
    
    for col in catcols:
        if col != "id":
            dt[col] = dt[col].cat.codes.astype("int16")
            dt[col] -= dt[col].min()
            
    if not is_train:
        for day in range(tr_last+1, tr_last+ 28 +1):
            dt[f"d_{day}"] = np.nan
            
    dt = pd.melt(dt,
                  id_vars = catcols,
                  value_vars = [col for col in dt.columns if col.startswith("d_")],
                  var_name = "d",
                  value_name = "sales")
    
    dt = dt.merge(cal, on= "d", copy = False)
    dt = dt.merge(prices, on = ["store_id", "item_id", "wm_yr_wk"], copy = False)
    
    return dt

In [76]:
def create_fea(dt):
    lags = [7, 28]
    lag_cols = [f"lag_{lag}" for lag in lags ]
    for lag, lag_col in zip(lags, lag_cols):
        dt[lag_col] = dt[["id","sales"]].groupby("id")["sales"].shift(lag)

    wins = [7, 28]
    for win in wins:
        for lag,lag_col in zip(lags, lag_cols):
            dt[f"rmean_{lag}_{win}"] = dt[["id", lag_col]].groupby("id")[lag_col].transform(lambda x : x.rolling(win).mean())

    
    
    date_features = {
        
        "wday": "weekday",
        "week": "weekofyear",
        "month": "month",
        "quarter": "quarter",
        "year": "year",
        "mday": "day",
#         "ime": "is_month_end",
#         "ims": "is_month_start",
    }
    
#     dt.drop(["d", "wm_yr_wk", "weekday"], axis=1, inplace = True)
    
    for date_feat_name, date_feat_func in date_features.items():
        if date_feat_name in dt.columns:
            dt[date_feat_name] = dt[date_feat_name].astype("int16")
        else:
            dt[date_feat_name] = getattr(dt["date"].dt, date_feat_func).astype("int16")
    
    return dt

In [77]:
FIRST_DAY = 350 # If you want to load all the data set it to '1' -->  Great  memory overflow  risk !

In [78]:
%%time

df = create_dt(is_train=True, first_day= FIRST_DAY)
df.shape

CPU times: user 26.2 s, sys: 13 s, total: 39.2 s
Wall time: 41.3 s


(40718219, 22)

In [79]:
df.head()

Unnamed: 0,id,item_id,dept_id,store_id,cat_id,state_id,d,sales,date,wm_yr_wk,weekday,wday,month,year,event_name_1,event_type_1,event_name_2,event_type_2,snap_CA,snap_TX,snap_WI,sell_price
0,HOBBIES_1_002_CA_1_validation,1,0,0,0,0,d_350,0.0,2012-01-13,11150,0,7,1,2012,0,0,0,0,0.0,1.0,0.0,3.97
1,HOBBIES_1_004_CA_1_validation,3,0,0,0,0,d_350,2.0,2012-01-13,11150,0,7,1,2012,0,0,0,0,0.0,1.0,0.0,4.34
2,HOBBIES_1_005_CA_1_validation,4,0,0,0,0,d_350,0.0,2012-01-13,11150,0,7,1,2012,0,0,0,0,0.0,1.0,0.0,2.48
3,HOBBIES_1_008_CA_1_validation,7,0,0,0,0,d_350,0.0,2012-01-13,11150,0,7,1,2012,0,0,0,0,0.0,1.0,0.0,0.5
4,HOBBIES_1_009_CA_1_validation,8,0,0,0,0,d_350,2.0,2012-01-13,11150,0,7,1,2012,0,0,0,0,0.0,1.0,0.0,1.77


In [80]:
%%time

create_fea(df)
df.shape

CPU times: user 2min 17s, sys: 33.3 s, total: 2min 51s
Wall time: 2min 57s


(40718219, 31)

In [81]:
df.head()

Unnamed: 0,id,item_id,dept_id,store_id,cat_id,state_id,d,sales,date,wm_yr_wk,weekday,wday,month,year,event_name_1,event_type_1,event_name_2,event_type_2,snap_CA,snap_TX,snap_WI,sell_price,lag_7,lag_28,rmean_7_7,rmean_28_7,rmean_7_28,rmean_28_28,week,quarter,mday
0,HOBBIES_1_002_CA_1_validation,1,0,0,0,0,d_350,0.0,2012-01-13,11150,0,7,1,2012,0,0,0,0,0.0,1.0,0.0,3.97,,,,,,,2,1,13
1,HOBBIES_1_004_CA_1_validation,3,0,0,0,0,d_350,2.0,2012-01-13,11150,0,7,1,2012,0,0,0,0,0.0,1.0,0.0,4.34,,,,,,,2,1,13
2,HOBBIES_1_005_CA_1_validation,4,0,0,0,0,d_350,0.0,2012-01-13,11150,0,7,1,2012,0,0,0,0,0.0,1.0,0.0,2.48,,,,,,,2,1,13
3,HOBBIES_1_008_CA_1_validation,7,0,0,0,0,d_350,0.0,2012-01-13,11150,0,7,1,2012,0,0,0,0,0.0,1.0,0.0,0.5,,,,,,,2,1,13
4,HOBBIES_1_009_CA_1_validation,8,0,0,0,0,d_350,2.0,2012-01-13,11150,0,7,1,2012,0,0,0,0,0.0,1.0,0.0,1.77,,,,,,,2,1,13


In [82]:
df.dropna(inplace = True)
df.shape

(39041269, 31)

In [83]:
cat_feats = ['item_id', 'dept_id','store_id', 'cat_id', 'state_id'] + ["event_name_1", "event_name_2", "event_type_1", "event_type_2"]
useless_cols = ["id", "date", "sales","d", "wm_yr_wk", "weekday"]
train_cols = df.columns[~df.columns.isin(useless_cols)]
X_train = df[train_cols]
y_train = df["sales"]

In [84]:
%%time

np.random.seed(777)

fake_valid_inds = np.random.choice(X_train.index.values, 2_000_000, replace = False)
#setdiff1d在1中不在2中的值
train_inds = np.setdiff1d(X_train.index.values, fake_valid_inds)
train_data = lgb.Dataset(X_train.loc[train_inds] , label = y_train.loc[train_inds], 
                         categorical_feature=cat_feats, free_raw_data=False)
fake_valid_data = lgb.Dataset(X_train.loc[fake_valid_inds], label = y_train.loc[fake_valid_inds],
                              categorical_feature=cat_feats,
                 free_raw_data=False)# This is a random sample, we're not gonna apply any time series train-test-split tricks here!

CPU times: user 15.9 s, sys: 10.5 s, total: 26.3 s
Wall time: 28.3 s


In [85]:
del df, X_train, y_train, fake_valid_inds,train_inds ; gc.collect()

135

In [86]:
params = {
        "objective" : "poisson",
        "metric" :"rmse",
        "force_row_wise" : True,
        "learning_rate" : 0.075,
#         "sub_feature" : 0.8,
        "sub_row" : 0.75,
        "bagging_freq" : 1,
        "lambda_l2" : 0.1,
#         "nthread" : 4
        "metric": ["rmse"],
    'verbosity': 1,
    'num_iterations' : 1200,
    'num_leaves': 128,
    "min_data_in_leaf": 100,
}

In [87]:
%%time

m_lgb = lgb.train(params, train_data, valid_sets = [fake_valid_data], verbose_eval=20)



[20]	valid_0's rmse: 2.95654
[40]	valid_0's rmse: 2.59891
[60]	valid_0's rmse: 2.5085
[80]	valid_0's rmse: 2.48401
[100]	valid_0's rmse: 2.47146
[120]	valid_0's rmse: 2.4607
[140]	valid_0's rmse: 2.45398
[160]	valid_0's rmse: 2.44701
[180]	valid_0's rmse: 2.44
[200]	valid_0's rmse: 2.43421
[220]	valid_0's rmse: 2.42943
[240]	valid_0's rmse: 2.42568
[260]	valid_0's rmse: 2.42101
[280]	valid_0's rmse: 2.41667
[300]	valid_0's rmse: 2.41108
[320]	valid_0's rmse: 2.40705
[340]	valid_0's rmse: 2.40376
[360]	valid_0's rmse: 2.4007
[380]	valid_0's rmse: 2.39734
[400]	valid_0's rmse: 2.39318
[420]	valid_0's rmse: 2.38941
[440]	valid_0's rmse: 2.38647
[460]	valid_0's rmse: 2.38332
[480]	valid_0's rmse: 2.3801
[500]	valid_0's rmse: 2.37759
[520]	valid_0's rmse: 2.37508
[540]	valid_0's rmse: 2.37286
[560]	valid_0's rmse: 2.37033
[580]	valid_0's rmse: 2.36795
[600]	valid_0's rmse: 2.36529
[620]	valid_0's rmse: 2.36374
[640]	valid_0's rmse: 2.36084
[660]	valid_0's rmse: 2.35985
[680]	valid_0's rmse:

In [89]:
m_lgb.save_model("model.lgb")

<lightgbm.basic.Booster at 0x1309610d0>

In [88]:
%%time

alphas = [1.028, 1.023, 1.018]
weights = [1/len(alphas)]*len(alphas)
sub = 0.

for icount, (alpha, weight) in enumerate(zip(alphas, weights)):

    te = create_dt(False)
    cols = [f"F{i}" for i in range(1,29)]

    for tdelta in range(0, 28):
        day = fday + timedelta(days=tdelta)
        print(tdelta, day)
        tst = te[(te.date >= day - timedelta(days=max_lags)) & (te.date <= day)].copy()
        create_fea(tst)
        tst = tst.loc[tst.date == day , train_cols]
        te.loc[te.date == day, "sales"] = alpha*m_lgb.predict(tst) # magic multiplier by kyakovlev



    te_sub = te.loc[te.date >= fday, ["id", "sales"]].copy()
#     te_sub.loc[te.date >= fday+ timedelta(days=h), "id"] = te_sub.loc[te.date >= fday+timedelta(days=h), 
#                                                                           "id"].str.replace("validation$", "evaluation")
    te_sub["F"] = [f"F{rank}" for rank in te_sub.groupby("id")["id"].cumcount()+1]
    te_sub = te_sub.set_index(["id", "F" ]).unstack()["sales"][cols].reset_index()
    te_sub.fillna(0., inplace = True)
    te_sub.sort_values("id", inplace = True)
    te_sub.reset_index(drop=True, inplace = True)
    te_sub.to_csv(f"submission_{icount}.csv",index=False)
    if icount == 0 :
        sub = te_sub
        sub[cols] *= weight
    else:
        sub[cols] += te_sub[cols]*weight
    print(icount, alpha, weight)


sub2 = sub.copy()
sub2["id"] = sub2["id"].str.replace("validation$", "evaluation")
sub = pd.concat([sub, sub2], axis=0, sort=False)

0 2016-04-25 00:00:00
1 2016-04-26 00:00:00
2 2016-04-27 00:00:00
3 2016-04-28 00:00:00
4 2016-04-29 00:00:00
5 2016-04-30 00:00:00
6 2016-05-01 00:00:00
7 2016-05-02 00:00:00
8 2016-05-03 00:00:00
9 2016-05-04 00:00:00
10 2016-05-05 00:00:00
11 2016-05-06 00:00:00
12 2016-05-07 00:00:00
13 2016-05-08 00:00:00
14 2016-05-09 00:00:00
15 2016-05-10 00:00:00
16 2016-05-11 00:00:00
17 2016-05-12 00:00:00
18 2016-05-13 00:00:00
19 2016-05-14 00:00:00
20 2016-05-15 00:00:00
21 2016-05-16 00:00:00
22 2016-05-17 00:00:00
23 2016-05-18 00:00:00
24 2016-05-19 00:00:00
25 2016-05-20 00:00:00
26 2016-05-21 00:00:00
27 2016-05-22 00:00:00
0 1.028 0.3333333333333333
0 2016-04-25 00:00:00
1 2016-04-26 00:00:00
2 2016-04-27 00:00:00
3 2016-04-28 00:00:00
4 2016-04-29 00:00:00
5 2016-04-30 00:00:00
6 2016-05-01 00:00:00
7 2016-05-02 00:00:00
8 2016-05-03 00:00:00
9 2016-05-04 00:00:00
10 2016-05-05 00:00:00
11 2016-05-06 00:00:00
12 2016-05-07 00:00:00
13 2016-05-08 00:00:00
14 2016-05-09 00:00:00
15 2

In [138]:
sub2

F,id,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,F14,F15,F16,F17,F18,F19,F20,F21,F22,F23,F24,F25,F26,F27,F28
0,FOODS_1_001_CA_1_evaluation,0.905853,0.843855,0.820961,0.806487,1.067748,1.259153,1.188165,0.974507,0.970208,0.943856,0.955855,1.065013,1.294324,1.184291,0.974664,0.910966,0.932386,0.937727,1.107358,1.360487,1.226383,0.945862,0.866742,0.842662,0.850956,1.023295,1.275725,1.245933
1,FOODS_1_001_CA_2_evaluation,0.874720,0.986451,0.896156,1.079659,1.194492,1.407000,1.450760,0.906453,0.959294,0.928870,0.934563,1.104757,1.425282,1.258413,0.987821,0.965133,0.963860,0.979240,1.174562,1.526722,1.451883,1.020054,0.945700,0.942331,0.928125,1.121182,1.539689,1.312344
2,FOODS_1_001_CA_3_evaluation,1.070036,1.019156,0.931146,0.921529,0.996673,1.135927,1.397485,1.049176,1.067223,0.934157,1.095728,1.053879,1.412200,1.413113,1.111579,1.101744,0.999138,1.010534,1.086551,1.525843,1.609219,1.085067,1.040640,0.942493,0.936957,1.035578,1.326760,1.275991
3,FOODS_1_001_CA_4_evaluation,0.401014,0.373246,0.374996,0.379155,0.446966,0.488383,0.526043,0.395390,0.434815,0.420106,0.451994,0.400438,0.433242,0.390287,0.357482,0.368359,0.397145,0.411933,0.431641,0.483172,0.479042,0.362900,0.367957,0.369824,0.374720,0.430194,0.461675,0.474385
4,FOODS_1_001_TX_1_evaluation,0.192134,0.189172,0.181880,0.183893,0.178469,0.192458,0.233864,0.478940,0.446638,0.445022,0.461438,0.487279,0.515598,0.460810,0.406697,0.422296,0.354706,0.350296,0.394136,0.387713,0.371079,0.289855,0.288209,0.289875,0.288226,0.321571,0.363940,0.351393
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30485,HOUSEHOLD_2_516_TX_2_evaluation,0.235053,0.224667,0.258993,0.243270,0.308324,0.377202,0.339179,0.229460,0.228717,0.227609,0.263239,0.334694,0.367231,0.308942,0.235158,0.235558,0.228139,0.234672,0.284451,0.378817,0.344366,0.240527,0.229531,0.231397,0.225035,0.287833,0.371935,0.338046
30486,HOUSEHOLD_2_516_TX_3_evaluation,0.168356,0.156442,0.166828,0.156655,0.194795,0.225448,0.172207,0.137060,0.126791,0.114776,0.130797,0.153228,0.193661,0.162202,0.134983,0.129626,0.130381,0.132415,0.163282,0.199080,0.176770,0.146277,0.139693,0.145108,0.148463,0.183980,0.213247,0.193197
30487,HOUSEHOLD_2_516_WI_1_evaluation,0.085846,0.082399,0.083037,0.088839,0.107103,0.109216,0.103450,0.095191,0.092430,0.087470,0.101378,0.143281,0.151900,0.117990,0.091066,0.084774,0.087434,0.092320,0.131923,0.142546,0.133372,0.087889,0.084602,0.085425,0.091157,0.140819,0.151829,0.143345
30488,HOUSEHOLD_2_516_WI_2_evaluation,0.039014,0.038651,0.037561,0.092555,0.103876,0.098261,0.094695,0.089236,0.086790,0.081505,0.112414,0.130581,0.127964,0.102449,0.093536,0.090715,0.087783,0.091939,0.125104,0.127545,0.113601,0.096186,0.092910,0.097341,0.082900,0.106945,0.110571,0.100681


In [139]:
sub4["id"] = sub2["id"].str.replace("evaluation", "validation")

In [116]:
sub4 = sub2.copy()

In [140]:
sub_new = pd.concat([sub4, sub2], axis=0, sort=False)

In [141]:
sub_new.head()

F,id,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,F14,F15,F16,F17,F18,F19,F20,F21,F22,F23,F24,F25,F26,F27,F28
0,FOODS_1_001_CA_1_validation,0.905853,0.843855,0.820961,0.806487,1.067748,1.259153,1.188165,0.974507,0.970208,0.943856,0.955855,1.065013,1.294324,1.184291,0.974664,0.910966,0.932386,0.937727,1.107358,1.360487,1.226383,0.945862,0.866742,0.842662,0.850956,1.023295,1.275725,1.245933
1,FOODS_1_001_CA_2_validation,0.87472,0.986451,0.896156,1.079659,1.194492,1.407,1.45076,0.906453,0.959294,0.92887,0.934563,1.104757,1.425282,1.258413,0.987821,0.965133,0.96386,0.97924,1.174562,1.526722,1.451883,1.020054,0.9457,0.942331,0.928125,1.121182,1.539689,1.312344
2,FOODS_1_001_CA_3_validation,1.070036,1.019156,0.931146,0.921529,0.996673,1.135927,1.397485,1.049176,1.067223,0.934157,1.095728,1.053879,1.4122,1.413113,1.111579,1.101744,0.999138,1.010534,1.086551,1.525843,1.609219,1.085067,1.04064,0.942493,0.936957,1.035578,1.32676,1.275991
3,FOODS_1_001_CA_4_validation,0.401014,0.373246,0.374996,0.379155,0.446966,0.488383,0.526043,0.39539,0.434815,0.420106,0.451994,0.400438,0.433242,0.390287,0.357482,0.368359,0.397145,0.411933,0.431641,0.483172,0.479042,0.3629,0.367957,0.369824,0.37472,0.430194,0.461675,0.474385
4,FOODS_1_001_TX_1_validation,0.192134,0.189172,0.18188,0.183893,0.178469,0.192458,0.233864,0.47894,0.446638,0.445022,0.461438,0.487279,0.515598,0.46081,0.406697,0.422296,0.354706,0.350296,0.394136,0.387713,0.371079,0.289855,0.288209,0.289875,0.288226,0.321571,0.36394,0.351393


In [142]:
sub_new.set_index(["id"], inplace=True)

In [143]:
sub_new.head()

F,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,F14,F15,F16,F17,F18,F19,F20,F21,F22,F23,F24,F25,F26,F27,F28
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1
FOODS_1_001_CA_1_validation,0.905853,0.843855,0.820961,0.806487,1.067748,1.259153,1.188165,0.974507,0.970208,0.943856,0.955855,1.065013,1.294324,1.184291,0.974664,0.910966,0.932386,0.937727,1.107358,1.360487,1.226383,0.945862,0.866742,0.842662,0.850956,1.023295,1.275725,1.245933
FOODS_1_001_CA_2_validation,0.87472,0.986451,0.896156,1.079659,1.194492,1.407,1.45076,0.906453,0.959294,0.92887,0.934563,1.104757,1.425282,1.258413,0.987821,0.965133,0.96386,0.97924,1.174562,1.526722,1.451883,1.020054,0.9457,0.942331,0.928125,1.121182,1.539689,1.312344
FOODS_1_001_CA_3_validation,1.070036,1.019156,0.931146,0.921529,0.996673,1.135927,1.397485,1.049176,1.067223,0.934157,1.095728,1.053879,1.4122,1.413113,1.111579,1.101744,0.999138,1.010534,1.086551,1.525843,1.609219,1.085067,1.04064,0.942493,0.936957,1.035578,1.32676,1.275991
FOODS_1_001_CA_4_validation,0.401014,0.373246,0.374996,0.379155,0.446966,0.488383,0.526043,0.39539,0.434815,0.420106,0.451994,0.400438,0.433242,0.390287,0.357482,0.368359,0.397145,0.411933,0.431641,0.483172,0.479042,0.3629,0.367957,0.369824,0.37472,0.430194,0.461675,0.474385
FOODS_1_001_TX_1_validation,0.192134,0.189172,0.18188,0.183893,0.178469,0.192458,0.233864,0.47894,0.446638,0.445022,0.461438,0.487279,0.515598,0.46081,0.406697,0.422296,0.354706,0.350296,0.394136,0.387713,0.371079,0.289855,0.288209,0.289875,0.288226,0.321571,0.36394,0.351393


In [144]:
sub_new[sub_new <= 0.01] = 0

In [148]:
sub_new = sub_new.reset_index()

In [150]:
sub_new.to_csv("/Users/yanzeliu/Desktop/submission_lgb.csv",index=False)

In [151]:
sub_new.head()

F,id,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,F14,F15,F16,F17,F18,F19,F20,F21,F22,F23,F24,F25,F26,F27,F28
0,FOODS_1_001_CA_1_validation,0.905853,0.843855,0.820961,0.806487,1.067748,1.259153,1.188165,0.974507,0.970208,0.943856,0.955855,1.065013,1.294324,1.184291,0.974664,0.910966,0.932386,0.937727,1.107358,1.360487,1.226383,0.945862,0.866742,0.842662,0.850956,1.023295,1.275725,1.245933
1,FOODS_1_001_CA_2_validation,0.87472,0.986451,0.896156,1.079659,1.194492,1.407,1.45076,0.906453,0.959294,0.92887,0.934563,1.104757,1.425282,1.258413,0.987821,0.965133,0.96386,0.97924,1.174562,1.526722,1.451883,1.020054,0.9457,0.942331,0.928125,1.121182,1.539689,1.312344
2,FOODS_1_001_CA_3_validation,1.070036,1.019156,0.931146,0.921529,0.996673,1.135927,1.397485,1.049176,1.067223,0.934157,1.095728,1.053879,1.4122,1.413113,1.111579,1.101744,0.999138,1.010534,1.086551,1.525843,1.609219,1.085067,1.04064,0.942493,0.936957,1.035578,1.32676,1.275991
3,FOODS_1_001_CA_4_validation,0.401014,0.373246,0.374996,0.379155,0.446966,0.488383,0.526043,0.39539,0.434815,0.420106,0.451994,0.400438,0.433242,0.390287,0.357482,0.368359,0.397145,0.411933,0.431641,0.483172,0.479042,0.3629,0.367957,0.369824,0.37472,0.430194,0.461675,0.474385
4,FOODS_1_001_TX_1_validation,0.192134,0.189172,0.18188,0.183893,0.178469,0.192458,0.233864,0.47894,0.446638,0.445022,0.461438,0.487279,0.515598,0.46081,0.406697,0.422296,0.354706,0.350296,0.394136,0.387713,0.371079,0.289855,0.288209,0.289875,0.288226,0.321571,0.36394,0.351393
