# Step 0.0. Install LightAutoML

# Step 0.1. Import necessary libraries 

In [3]:
# Standard python libraries
import logging
import os
import time
import requests
logging.basicConfig(format='[%(asctime)s] (%(levelname)s): %(message)s', level=logging.INFO)

# Installed libraries
import numpy as np
import pandas as pd
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split
import torch

# Imports from our package
from lightautoml.automl.presets.tabular_presets import TabularAutoML, TabularUtilizedAutoML
from lightautoml.tasks import Task

import multiprocessing as mp

# set option of max 100 displayed rows
pd.set_option('display.max_rows', 200)
pd.set_option('display.max_columns', 200)




# Step 0.2. Parameters 

In [4]:
N_THREADS = mp.cpu_count() # threads cnt for lgbm and linear models
N_FOLDS = 10 # folds cnt for AutoML - 10 is better
RANDOM_STATE = 42 # fixed random state for various reasons
TIMEOUT = 3*3600 # Time in seconds for automl run
TARGET_NAME = 'final_price' # Target column name

# Step 0.3. Data load 

In [5]:
%%time

train_data = pd.read_csv('train_data.csv')
train_data.head()

Wall time: 69.3 ms


Unnamed: 0,row_ID,vehicle_manufacturer,vehicle_model,vehicle_category,current_mileage,vehicle_year,vehicle_gearbox_type,doors_cnt,wheels,vehicle_color,vehicle_interior_color,car_vin,car_leather_interior,deal_type,final_price
0,0,TOYOTA,Aqua s,Sedan,133000,2014,Automatic,4/5,Right-hand drive,Silver,Black,,0,For Sale,3650.0
1,1,MERCEDES-BENZ,C 220,Sedan,24500,2010,Manual,4/5,Left wheel,Silver,Black,,0,For Sale,6800.0
2,2,HYUNDAI,Veloster,Hatchback,31000,2016,Tiptronic,2/3,Left wheel,Silver,Black,KMHTC6AE3GU293912,1,For Sale,6300.0
3,3,HYUNDAI,Santa FE,Jeep,115459,2015,Automatic,4/5,Left wheel,Blue,Black,,1,For Sale,14488.0
4,4,TOYOTA,CHR,Jeep,18950,2019,Automatic,4/5,Left wheel,Black,,JTNKHMBX7K1030253,1,For Sale,5000.0


In [6]:
test_data = pd.read_csv('test_data.csv')
test_data.head()

Unnamed: 0,row_ID,vehicle_manufacturer,vehicle_model,vehicle_category,current_mileage,vehicle_year,vehicle_gearbox_type,doors_cnt,wheels,vehicle_color,vehicle_interior_color,car_vin,car_leather_interior,deal_type
0,35000,TOYOTA,Prius,Hatchback,323733,2012,Automatic,4/5,Left wheel,Grey,Black,JTDKN3DU6C5439638,1,For Sale
1,35001,HYUNDAI,Elantra,Sedan,112000,2013,Tiptronic,4/5,Left wheel,Grey,Black,SURATSHIA,1,For Sale
2,35002,LEXUS,NX 300,Jeep,16920,2018,Automatic,,Left wheel,Brown,,JTJYARBZ5J2104521,1,For Sale
3,35003,LEXUS,CT 200h,Hatchback,302742,2012,Automatic,4/5,Left wheel,White,,JTHKD5BH4C2070945,1,For Sale
4,35004,TOYOTA,RAV 4,Jeep,1800,2002,Manual,4/5,Left wheel,Silver,Black,,0,For Sale


In [8]:
submission = pd.read_csv('sample_submission.csv')
submission.head()

Unnamed: 0,row_ID,final_price
0,35000,0
1,35001,0
2,35002,0
3,35003,0
4,35004,0


# Step 0.4. Some user feature preparation 

Be creative :)

At first let's check correctness of data in train and test tables. Let's start from *current_milage*

train_data

In [9]:
train_data['current_mileage'].sort_values()

32771             0
3136              0
11941             0
1193              0
15936             0
            ...    
10885    2147483647
28857    2147483647
316      2147483647
26303    2147483647
13926    2147483647
Name: current_mileage, Length: 35000, dtype: int64

test_data

In [10]:
test_data['current_mileage'].sort_values()

5195             0
3936             0
2552             0
2550             0
3944             0
           ...    
1052    2147483647
9805    2147483647
7876    2147483647
1072    2147483647
1619    2147483647
Name: current_mileage, Length: 10697, dtype: int64

As we can see, some milage values are extremely high. World record of car mileage is around 4890000 kilometers. So let's consider milage more than 4890000 kilometers as wrong data and add this feature to our train and test datasets.

*final_price*

In [11]:
train_data.head()

Unnamed: 0,row_ID,vehicle_manufacturer,vehicle_model,vehicle_category,current_mileage,vehicle_year,vehicle_gearbox_type,doors_cnt,wheels,vehicle_color,vehicle_interior_color,car_vin,car_leather_interior,deal_type,final_price
0,0,TOYOTA,Aqua s,Sedan,133000,2014,Automatic,4/5,Right-hand drive,Silver,Black,,0,For Sale,3650.0
1,1,MERCEDES-BENZ,C 220,Sedan,24500,2010,Manual,4/5,Left wheel,Silver,Black,,0,For Sale,6800.0
2,2,HYUNDAI,Veloster,Hatchback,31000,2016,Tiptronic,2/3,Left wheel,Silver,Black,KMHTC6AE3GU293912,1,For Sale,6300.0
3,3,HYUNDAI,Santa FE,Jeep,115459,2015,Automatic,4/5,Left wheel,Blue,Black,,1,For Sale,14488.0
4,4,TOYOTA,CHR,Jeep,18950,2019,Automatic,4/5,Left wheel,Black,,JTNKHMBX7K1030253,1,For Sale,5000.0


In [12]:
train_data[train_data['final_price'] > 100000].sort_values('final_price')

Unnamed: 0,row_ID,vehicle_manufacturer,vehicle_model,vehicle_category,current_mileage,vehicle_year,vehicle_gearbox_type,doors_cnt,wheels,vehicle_color,vehicle_interior_color,car_vin,car_leather_interior,deal_type,final_price
20577,20577,BMW,520,Sedan,536688566,1992,Manual,4/5,Left wheel,Grey,Black,,1,For Sale,111111.0
19820,19820,PORSCHE,Panamera Turbo S E-Hyb,Sedan,16000,2017,Tiptronic,4/5,Left wheel,Carnelian red,White,,1,For Sale,135000.0
32937,32937,LAMBORGHINI,Gallardo,Coupe,26000,2013,Tiptronic,2/3,Left wheel,Yellow,Black,,1,For Sale,135000.0
15291,15291,MERCEDES-BENZ,G 65 AMG 63AMG,Jeep,0,2020,Tiptronic,4/5,Left wheel,Black,Brown,,1,For Sale,200000.0
12136,12136,BMW,328,Sedan,246000,2008,Automatic,4/5,Left wheel,Sky blue,Yellow,,1,For Sale,1111111.0
24980,24980,SUBARU,Legacy,Sedan,97000,2003,Manual,4/5,Right-hand drive,Silver,Black,,1,For Sale,1111111.0
34052,34052,MERCEDES-BENZ,ML 320,Jeep,111111111,2002,Tiptronic,4/5,Left wheel,White,Black,,1,For Sale,6666666.0
3877,3877,VOLKSWAGEN,Golf,Sedan,7676,2007,Tiptronic,4/5,Left wheel,Red,Golden,,0,For Sale,8388607.0
32442,32442,OPEL,Combo,Goods wagon,99999,1999,Manual,2/3,Left wheel,Blue,Black,,0,For Sale,8388607.0


As we can see, prices of some cars are too high. For example Volkswagen Golf can't have the price over 8 million dollars. So let's drop all cars with price more than $200000.

In [13]:
drop_index = train_data[train_data['final_price'] > 200000].index
train_data.drop(index=drop_index, inplace=True)

Add new features

In [14]:
%%time

# define if mileage is wrong
def wrong_milage(col):
    if col > 4890000:
        return 1
    return 0

# define mileage of car per year
def milage_per_year(row):
    if row['vehicle_year'] == 0:
        return 0
    return row['current_mileage']/(2021-row['vehicle_year'])

# add manufacturer country for each car
usa     = ['FORD', 'CHEVROLET', 'JEEP', 'DODGE', 'MERCURY', 'CADILLAC', 'CHRYSLER', 
           'TESLA', 'SCION', 'GMC', 'HUMMER', 'LINCOLN', 'BUICK', 'SATURN', 'PONTIAC', 'FOTON']
japan   = ['TOYOTA', 'MITSUBISHI', 'NISSAN', 'LEXUS', 'HONDA', 'SUBARU', 'MAZDA', 
           'SUZUKI', 'ACURA', 'INFINITI', 'ISUZU', 'DAIHATSU']
england = ['MINI', 'JAGUAR', 'LAND ROVER', 'ASTON MARTIN', 'ROVER', 'MG', 'BENTLEY', 
           'ROLLS-ROYCE']
italy   = ['FIAT', 'ALFA ROMEO', 'FERRARI', 'MASERATI', 'LANCIA', 'LAMBORGHINI']
germany = ['MERCEDES-BENZ', 'BMW', 'AUDI', 'VOLKSWAGEN', 'OPEL', 'PORSCHE']
korea   = ['HYUNDAI', 'DAEWOO', 'SSANGYONG', 'KIA']
russia  = ['VAZ', 'ZAZ', 'UAZ', 'GAZ', 'MOSKVICH']
france  = ['RENAULT', 'PEUGEOT', 'CITROEN']
china   = ['GREATWALL', 'JAC']
sweden  = ['VOLVO', 'SAAB']
spain   = ['SEAT']
india   = ['TATA']
others  = ['სხვა']

def manufacturer_country(col):
    if col in usa:
        return 'USA'
    if col in japan:
        return 'Japan'
    if col in england:
        return 'England'
    if col in italy:
        return 'Italy'
    if col in germany:
        return 'Germany'
    if col in korea:
        return 'Korea'
    if col in russia:
        return 'Russia'
    if col in france:
        return 'France'
    if col in china:
        return 'China'
    if col in sweden:
        return 'Sweden'
    if col in spain:
        return 'Spain'
    if col in india:
        return 'India'
    if col in others:
        return 'Others'
    return np.nan

def create_extra_features(data):
    # add feature which shows cars that have too big mileage
    data['wrong_mileage'] = data['current_mileage'].apply(wrong_milage)
    # add feature with manufacturer country
    data['manufacturer_country'] = data['vehicle_manufacturer'].apply(manufacturer_country)
    # add squared mileage
    data['current_mileage_sq'] = data['current_mileage']**2
    # add number of NaN data in each row
    data['NANs_cnt'] = data.isnull().sum(axis = 1) 
    # add mileage per year feature   
    data['mileage_per_year'] = data.apply(milage_per_year, axis=1)
    # add squared mileage per year
    data['mileage_per_year_sq'] = data['mileage_per_year']**2

create_extra_features(train_data)
create_extra_features(test_data)

Wall time: 484 ms


Features from strong baseline

In [15]:
def create_col_with_min_freq(data, col, min_freq = 10):
    # replace rare values (less than min_freq rows) in feature by RARE_VALUE
    data[col + '_fixed'] = data[col].astype(str)
    data.loc[data[col + '_fixed'].value_counts()[data[col + '_fixed']].values < min_freq, col + '_fixed'] = "RARE_VALUE"
    data.replace({'nan': np.nan}, inplace = True)
    
def create_gr_feats(data):
    # create aggregation feats for numeric features based on categorical ones
    for cat_col in ['vehicle_manufacturer', 'vehicle_model', 'vehicle_category',
                   'vehicle_gearbox_type', 'doors_cnt', 'wheels', 'vehicle_color', 
                   'vehicle_interior_color', 'deal_type']:
        create_col_with_min_freq(data, cat_col, 15)
        for num_col in ['current_mileage', 'vehicle_year', 'car_leather_interior']:
            for n, f in [('mean', np.mean), ('min', np.nanmin), ('max', np.nanmax)]:
                data['FIXED_' + n + '_' + num_col + '_by_' + cat_col] = data.groupby(cat_col + '_fixed')[num_col].transform(f)
                
    # create features with counts
    for col in ['vehicle_manufacturer', 'vehicle_model', 'vehicle_category', 'mileage_per_year',
               'current_mileage', 'vehicle_year', 'vehicle_gearbox_type', 'doors_cnt',
               'wheels', 'vehicle_color', 'vehicle_interior_color', 'car_vin', 'deal_type']:
        data[col + '_cnt'] = data[col].map(data[col].value_counts(dropna = False))
        

all_df = pd.concat([train_data, test_data]).reset_index(drop = True)
create_gr_feats(all_df)
train_data, test_data = all_df[:len(train_data)], all_df[len(train_data):]

In [16]:
train_data.head(10)

Unnamed: 0,row_ID,vehicle_manufacturer,vehicle_model,vehicle_category,current_mileage,vehicle_year,vehicle_gearbox_type,doors_cnt,wheels,vehicle_color,vehicle_interior_color,car_vin,car_leather_interior,deal_type,final_price,wrong_mileage,manufacturer_country,current_mileage_sq,NANs_cnt,mileage_per_year,mileage_per_year_sq,vehicle_manufacturer_fixed,FIXED_mean_current_mileage_by_vehicle_manufacturer,FIXED_min_current_mileage_by_vehicle_manufacturer,FIXED_max_current_mileage_by_vehicle_manufacturer,FIXED_mean_vehicle_year_by_vehicle_manufacturer,FIXED_min_vehicle_year_by_vehicle_manufacturer,FIXED_max_vehicle_year_by_vehicle_manufacturer,FIXED_mean_car_leather_interior_by_vehicle_manufacturer,FIXED_min_car_leather_interior_by_vehicle_manufacturer,FIXED_max_car_leather_interior_by_vehicle_manufacturer,vehicle_model_fixed,FIXED_mean_current_mileage_by_vehicle_model,FIXED_min_current_mileage_by_vehicle_model,FIXED_max_current_mileage_by_vehicle_model,FIXED_mean_vehicle_year_by_vehicle_model,FIXED_min_vehicle_year_by_vehicle_model,FIXED_max_vehicle_year_by_vehicle_model,FIXED_mean_car_leather_interior_by_vehicle_model,FIXED_min_car_leather_interior_by_vehicle_model,FIXED_max_car_leather_interior_by_vehicle_model,vehicle_category_fixed,FIXED_mean_current_mileage_by_vehicle_category,FIXED_min_current_mileage_by_vehicle_category,FIXED_max_current_mileage_by_vehicle_category,FIXED_mean_vehicle_year_by_vehicle_category,FIXED_min_vehicle_year_by_vehicle_category,FIXED_max_vehicle_year_by_vehicle_category,FIXED_mean_car_leather_interior_by_vehicle_category,FIXED_min_car_leather_interior_by_vehicle_category,FIXED_max_car_leather_interior_by_vehicle_category,vehicle_gearbox_type_fixed,FIXED_mean_current_mileage_by_vehicle_gearbox_type,FIXED_min_current_mileage_by_vehicle_gearbox_type,FIXED_max_current_mileage_by_vehicle_gearbox_type,FIXED_mean_vehicle_year_by_vehicle_gearbox_type,FIXED_min_vehicle_year_by_vehicle_gearbox_type,FIXED_max_vehicle_year_by_vehicle_gearbox_type,FIXED_mean_car_leather_interior_by_vehicle_gearbox_type,FIXED_min_car_leather_interior_by_vehicle_gearbox_type,FIXED_max_car_leather_interior_by_vehicle_gearbox_type,doors_cnt_fixed,FIXED_mean_current_mileage_by_doors_cnt,FIXED_min_current_mileage_by_doors_cnt,FIXED_max_current_mileage_by_doors_cnt,FIXED_mean_vehicle_year_by_doors_cnt,FIXED_min_vehicle_year_by_doors_cnt,FIXED_max_vehicle_year_by_doors_cnt,FIXED_mean_car_leather_interior_by_doors_cnt,FIXED_min_car_leather_interior_by_doors_cnt,FIXED_max_car_leather_interior_by_doors_cnt,wheels_fixed,FIXED_mean_current_mileage_by_wheels,FIXED_min_current_mileage_by_wheels,FIXED_max_current_mileage_by_wheels,FIXED_mean_vehicle_year_by_wheels,FIXED_min_vehicle_year_by_wheels,FIXED_max_vehicle_year_by_wheels,FIXED_mean_car_leather_interior_by_wheels,FIXED_min_car_leather_interior_by_wheels,FIXED_max_car_leather_interior_by_wheels,vehicle_color_fixed,FIXED_mean_current_mileage_by_vehicle_color,FIXED_min_current_mileage_by_vehicle_color,FIXED_max_current_mileage_by_vehicle_color,FIXED_mean_vehicle_year_by_vehicle_color,FIXED_min_vehicle_year_by_vehicle_color,FIXED_max_vehicle_year_by_vehicle_color,FIXED_mean_car_leather_interior_by_vehicle_color,FIXED_min_car_leather_interior_by_vehicle_color,FIXED_max_car_leather_interior_by_vehicle_color,vehicle_interior_color_fixed,FIXED_mean_current_mileage_by_vehicle_interior_color,FIXED_min_current_mileage_by_vehicle_interior_color,FIXED_max_current_mileage_by_vehicle_interior_color,FIXED_mean_vehicle_year_by_vehicle_interior_color,FIXED_min_vehicle_year_by_vehicle_interior_color,FIXED_max_vehicle_year_by_vehicle_interior_color,FIXED_mean_car_leather_interior_by_vehicle_interior_color,FIXED_min_car_leather_interior_by_vehicle_interior_color,FIXED_max_car_leather_interior_by_vehicle_interior_color,deal_type_fixed,FIXED_mean_current_mileage_by_deal_type,FIXED_min_current_mileage_by_deal_type,FIXED_max_current_mileage_by_deal_type,FIXED_mean_vehicle_year_by_deal_type,FIXED_min_vehicle_year_by_deal_type,FIXED_max_vehicle_year_by_deal_type,FIXED_mean_car_leather_interior_by_deal_type,FIXED_min_car_leather_interior_by_deal_type,FIXED_max_car_leather_interior_by_deal_type,vehicle_manufacturer_cnt,vehicle_model_cnt,vehicle_category_cnt,mileage_per_year_cnt,current_mileage_cnt,vehicle_year_cnt,vehicle_gearbox_type_cnt,doors_cnt_cnt,wheels_cnt,vehicle_color_cnt,vehicle_interior_color_cnt,car_vin_cnt,deal_type_cnt
0,0,TOYOTA,Aqua s,Sedan,133000,2014,Automatic,4/5,Right-hand drive,Silver,Black,,0,For Sale,3650.0,0,Japan,17689000000,1,19000.0,361000000.0,TOYOTA,766933.6,0,2147483647,2011.910682,1993,2020,0.632855,0,1,RARE_VALUE,1984069.0,0,2147483647,2007.293281,0,2020,0.57001,0,1,Sedan,2256581.0,0,2147483647,2011.172982,1940,2020,0.756826,0,1,Automatic,878461.7,0,2147483647,2012.276778,1939,2020,0.827144,0,1,4/5,1840599.0,0.0,2147484000.0,2010.87328,1939.0,2020.0,0.722633,0.0,1.0,Right-hand drive,4341377.0,0,2147483647,2006.19779,1985,2020,0.188674,0,1,Silver,2224805.0,0.0,2147484000.0,2009.843149,1939.0,2020.0,0.674566,0.0,1.0,Black,2124782.0,0.0,2147484000.0,2010.883452,1940.0,2020.0,0.734389,0.0,1.0,For Sale,2093565.0,0,2147483647,2010.678836,0,2020,0.721315,0,1,8912,4,20216,55,49,4727,31668,42377,3620,8524,27465,30638,44893
1,1,MERCEDES-BENZ,C 220,Sedan,24500,2010,Manual,4/5,Left wheel,Silver,Black,,0,For Sale,6800.0,0,Germany,600250000,1,2227.272727,4960744.0,MERCEDES-BENZ,3053960.0,0,2147483647,2008.610528,1939,2020,0.802785,0,1,C 220,192087.2,0,514000,2000.0,1993,2015,0.183673,0,1,Sedan,2256581.0,0,2147483647,2011.172982,1940,2020,0.756826,0,1,Manual,9467176.0,0,2147483647,2001.557482,1939,2020,0.189474,0,1,4/5,1840599.0,0.0,2147484000.0,2010.87328,1939.0,2020.0,0.722633,0.0,1.0,Left wheel,1940978.0,0,2147483647,2011.107274,1939,2020,0.759995,0,1,Silver,2224805.0,0.0,2147484000.0,2009.843149,1939.0,2020.0,0.674566,0.0,1.0,Black,2124782.0,0.0,2147484000.0,2010.883452,1940.0,2020.0,0.734389,0.0,1.0,For Sale,2093565.0,0,2147483647,2010.678836,0,2020,0.721315,0,1,5243,49,20216,1,1,3260,4845,42377,42070,8524,27465,30638,44893
2,2,HYUNDAI,Veloster,Hatchback,31000,2016,Tiptronic,2/3,Left wheel,Silver,Black,KMHTC6AE3GU293912,1,For Sale,6300.0,0,Korea,961000000,0,6200.0,38440000.0,HYUNDAI,662544.8,0,2147483647,2013.831083,1995,2020,0.910631,0,1,Veloster,90254.09,0,322389,2013.715596,2011,2019,0.889908,0,1,Hatchback,1314011.0,0,2147483647,2010.141874,1953,2020,0.478504,0,1,Tiptronic,2758090.0,0,2147483647,2010.196563,1995,2020,0.676579,0,1,2/3,7476186.0,0.0,2147484000.0,2004.806841,1941.0,2020.0,0.460262,0.0,1.0,Left wheel,1940978.0,0,2147483647,2011.107274,1939,2020,0.759995,0,1,Silver,2224805.0,0.0,2147484000.0,2009.843149,1939.0,2020.0,0.674566,0.0,1.0,Black,2124782.0,0.0,2147484000.0,2010.883452,1940.0,2020.0,0.734389,0.0,1.0,For Sale,2093565.0,0,2147483647,2010.678836,0,2020,0.721315,0,1,8146,218,6978,13,20,3278,7331,1988,42070,8524,27465,1,44893
3,3,HYUNDAI,Santa FE,Jeep,115459,2015,Automatic,4/5,Left wheel,Blue,Black,,1,For Sale,14488.0,0,Korea,13330780681,1,19243.166667,370299500.0,HYUNDAI,662544.8,0,2147483647,2013.831083,1995,2020,0.910631,0,1,Santa FE,114330.6,0,372623,2014.295848,2001,2020,0.987889,0,1,Jeep,1608005.0,0,2147483647,2011.7031,1953,2020,0.86423,0,1,Automatic,878461.7,0,2147483647,2012.276778,1939,2020,0.827144,0,1,4/5,1840599.0,0.0,2147484000.0,2010.87328,1939.0,2020.0,0.722633,0.0,1.0,Left wheel,1940978.0,0,2147483647,2011.107274,1939,2020,0.759995,0,1,Blue,2205309.0,0.0,2147484000.0,2009.105088,1951.0,2020.0,0.588734,0.0,1.0,Black,2124782.0,0.0,2147484000.0,2010.883452,1940.0,2020.0,0.734389,0.0,1.0,For Sale,2093565.0,0,2147483647,2010.678836,0,2020,0.721315,0,1,8146,1156,13486,2,2,3451,31668,42377,42070,3302,27465,30638,44893
4,4,TOYOTA,CHR,Jeep,18950,2019,Automatic,4/5,Left wheel,Black,,JTNKHMBX7K1030253,1,For Sale,5000.0,0,Japan,359102500,1,9475.0,89775620.0,TOYOTA,766933.6,0,2147483647,2011.910682,1993,2020,0.632855,0,1,CHR,30726.56,0,85214,2018.37415,2017,2019,0.829932,0,1,Jeep,1608005.0,0,2147483647,2011.7031,1953,2020,0.86423,0,1,Automatic,878461.7,0,2147483647,2012.276778,1939,2020,0.827144,0,1,4/5,1840599.0,0.0,2147484000.0,2010.87328,1939.0,2020.0,0.722633,0.0,1.0,Left wheel,1940978.0,0,2147483647,2011.107274,1939,2020,0.759995,0,1,Black,2496681.0,0.0,2147484000.0,2011.467738,1940.0,2020.0,0.809162,0.0,1.0,,,,,,,,,,,For Sale,2093565.0,0,2147483647,2010.678836,0,2020,0.721315,0,1,8912,147,13486,4,4,853,31668,42377,42070,11701,8796,4,44893
5,5,MITSUBISHI,Delica,Jeep,149000,2003,Automatic,4/5,Right-hand drive,Silver,Black,,0,For Sale,20.0,0,Japan,22201000000,1,8277.777778,68521600.0,MITSUBISHI,185368.1,0,18065445,2004.55144,1985,2020,0.182442,0,1,Delica,176839.7,0,371500,2000.928571,1985,2007,0.285714,0,1,Jeep,1608005.0,0,2147483647,2011.7031,1953,2020,0.86423,0,1,Automatic,878461.7,0,2147483647,2012.276778,1939,2020,0.827144,0,1,4/5,1840599.0,0.0,2147484000.0,2010.87328,1939.0,2020.0,0.722633,0.0,1.0,Right-hand drive,4341377.0,0,2147483647,2006.19779,1985,2020,0.188674,0,1,Silver,2224805.0,0.0,2147484000.0,2009.843149,1939.0,2020.0,0.674566,0.0,1.0,Black,2124782.0,0.0,2147484000.0,2010.883452,1940.0,2020.0,0.734389,0.0,1.0,For Sale,2093565.0,0,2147483647,2010.678836,0,2020,0.721315,0,1,729,42,13486,6,45,935,31668,42377,3620,8524,27465,30638,44893
6,6,HYUNDAI,Tucson,Jeep,173600,2013,Automatic,4/5,Left wheel,Silver,Black,,1,For Sale,7632.0,0,Korea,30136960000,1,21700.0,470890000.0,HYUNDAI,662544.8,0,2147483647,2013.831083,1995,2020,0.910631,0,1,Tucson,108361.6,0,332450,2014.007903,2004,2020,0.982979,0,1,Jeep,1608005.0,0,2147483647,2011.7031,1953,2020,0.86423,0,1,Automatic,878461.7,0,2147483647,2012.276778,1939,2020,0.827144,0,1,4/5,1840599.0,0.0,2147484000.0,2010.87328,1939.0,2020.0,0.722633,0.0,1.0,Left wheel,1940978.0,0,2147483647,2011.107274,1939,2020,0.759995,0,1,Silver,2224805.0,0.0,2147484000.0,2009.843149,1939.0,2020.0,0.674566,0.0,1.0,Black,2124782.0,0.0,2147484000.0,2010.883452,1940.0,2020.0,0.734389,0.0,1.0,For Sale,2093565.0,0,2147483647,2010.678836,0,2020,0.721315,0,1,8146,1645,13486,5,2,4894,31668,42377,42070,8524,27465,30638,44893
7,7,BMW,X5,Jeep,264000,2006,Tiptronic,4/5,Left wheel,Silver,Black,,1,For Sale,6500.0,0,Germany,69696000000,1,17600.0,309760000.0,BMW,3058740.0,0,2147483647,2007.908285,1947,2020,0.859991,0,1,X5,700528.3,0,333333333,2009.933029,1999,2020,0.986301,0,1,Jeep,1608005.0,0,2147483647,2011.7031,1953,2020,0.86423,0,1,Tiptronic,2758090.0,0,2147483647,2010.196563,1995,2020,0.676579,0,1,4/5,1840599.0,0.0,2147484000.0,2010.87328,1939.0,2020.0,0.722633,0.0,1.0,Left wheel,1940978.0,0,2147483647,2011.107274,1939,2020,0.759995,0,1,Silver,2224805.0,0.0,2147484000.0,2009.843149,1939.0,2020.0,0.674566,0.0,1.0,Black,2124782.0,0.0,2147484000.0,2010.883452,1940.0,2020.0,0.734389,0.0,1.0,For Sale,2093565.0,0,2147483647,2010.678836,0,2020,0.721315,0,1,2257,657,13486,46,24,843,7331,42377,42070,8524,27465,30638,44893
8,8,TOYOTA,Aqua,Hatchback,86658,2017,Automatic,4/5,Left wheel,White,Black,,1,For Sale,4870.0,0,Japan,7509608964,1,21664.5,469350600.0,TOYOTA,766933.6,0,2147483647,2011.910682,1993,2020,0.632855,0,1,Aqua,101650.3,0,970000,2013.181481,2011,2019,0.444444,0,1,Hatchback,1314011.0,0,2147483647,2010.141874,1953,2020,0.478504,0,1,Automatic,878461.7,0,2147483647,2012.276778,1939,2020,0.827144,0,1,4/5,1840599.0,0.0,2147484000.0,2010.87328,1939.0,2020.0,0.722633,0.0,1.0,Left wheel,1940978.0,0,2147483647,2011.107274,1939,2020,0.759995,0,1,White,1237439.0,0.0,2147484000.0,2011.387576,1939.0,2020.0,0.716381,0.0,1.0,Black,2124782.0,0.0,2147484000.0,2010.883452,1940.0,2020.0,0.734389,0.0,1.0,For Sale,2093565.0,0,2147483647,2010.678836,0,2020,0.721315,0,1,8912,540,6978,1,1,2186,31668,42377,42070,11043,27465,30638,44893
9,9,BMW,640,Sedan,190000,2012,Tiptronic,4/5,Left wheel,Grey,Black,,1,For Sale,16500.0,0,Germany,36100000000,1,21111.111111,445679000.0,BMW,3058740.0,0,2147483647,2007.908285,1947,2020,0.859991,0,1,RARE_VALUE,1984069.0,0,2147483647,2007.293281,0,2020,0.57001,0,1,Sedan,2256581.0,0,2147483647,2011.172982,1940,2020,0.756826,0,1,Tiptronic,2758090.0,0,2147483647,2010.196563,1995,2020,0.676579,0,1,4/5,1840599.0,0.0,2147484000.0,2010.87328,1939.0,2020.0,0.722633,0.0,1.0,Left wheel,1940978.0,0,2147483647,2011.107274,1939,2020,0.759995,0,1,Grey,476840.6,0.0,1111111000.0,2011.99927,1963.0,2020.0,0.77946,0.0,1.0,Black,2124782.0,0.0,2147484000.0,2010.883452,1940.0,2020.0,0.734389,0.0,1.0,For Sale,2093565.0,0,2147483647,2010.678836,0,2020,0.721315,0,1,2257,3,20216,7,184,5112,7331,42377,42070,5482,27465,30638,44893


# ========= AutoML preset usage =========


## Step 1. Create Task

In [17]:
%%time

task = Task('reg', loss='mae', metric='mae')

sklearn doesn't support in general case mae and will not be used.


Wall time: 2.52 ms


## Step 2. Setup columns roles

Roles setup here set target column and base date, which is used to calculate date differences:

In [18]:
%%time

roles = {'target': TARGET_NAME,
         'drop': ['row_ID']
         }

Wall time: 0 ns


## Step 3. Create AutoML from preset

To create AutoML model here we use `TabularAutoML` preset, which looks like:

![TabularAutoML preset pipeline](https://github.com/sberbank-ai-lab/LightAutoML/raw/master/imgs/tutorial_2_pipeline.png)

All params we set above can be send inside preset to change its configuration:

In [19]:
#### %%time 
automl = TabularUtilizedAutoML(task = task, 
                       timeout = TIMEOUT,
                       cpu_limit = N_THREADS,
                       general_params = {'use_algos': [['linear_l2', 'lgb', 'lgb_tuned']]},
                       reader_params = {'n_jobs': N_THREADS, 'cv': N_FOLDS, 'random_state': RANDOM_STATE},
                      )
oof_pred = automl.fit_predict(train_data, roles = roles)
logging.info('oof_pred:\n{}\nShape = {}'.format(oof_pred, oof_pred.shape))

Current random state: {'reader_params': {'random_state': 42}, 'general_params': {'return_all_predictions': False}}
Found reader_params in kwargs, need to combine
Merged variant for reader_params = {'n_jobs': 8, 'cv': 10, 'random_state': 42}
Found general_params in kwargs, need to combine
Merged variant for general_params = {'use_algos': [['linear_l2', 'lgb', 'lgb_tuned']], 'return_all_predictions': False}
Start automl preset with listed constraints:
- time: 10799.997000217438 seconds
- cpus: 8 cores
- memory: 16 gb

Train data shape: (34995, 124)
Feats was rejected during automatic roles guess: []


Layer 1 ...
Train process start. Time left 10789.108889579773 secs


  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)



Start fitting Lvl_0_Pipe_0_Mod_0_LinearL2 ...

===== Start working with fold 0 for Lvl_0_Pipe_0_Mod_0_LinearL2 =====

Linear model: C = 1e-05 score = -3611.4078763950893
Linear model: C = 5e-05 score = -3602.1437322823663
Linear model: C = 0.0001 score = -3590.31772593471
Linear model: C = 0.0005 score = -3501.52542905971
Linear model: C = 0.001 score = -3400.464418736049
Linear model: C = 0.005 score = -2868.888834524972
Linear model: C = 0.01 score = -2604.6120405829292
Linear model: C = 0.05 score = -2311.8759727232796
Linear model: C = 0.1 score = -2247.043958149501
Linear model: C = 0.5 score = -2160.126519141061
Linear model: C = 1 score = -2160.1264875368392
Linear model: C = 5 score = -2115.0022644391743
Linear model: C = 10 score = -2115.0023634272984
Linear model: C = 50 score = -2115.00235206604

===== Start working with fold 1 for Lvl_0_Pipe_0_Mod_0_LinearL2 =====

Linear model: C = 1e-05 score = -3615.91643031529
Linear model: C = 5e-05 score = -3606.7017613699777
Linear m

Copying TaskTimer may affect the parent PipelineTimer, so copy will create new unlimited TaskTimer
[2021-09-26 19:17:51,162] (INFO): A new study created in memory with name: no-name-6da3b376-6822-41e1-962d-48da63e7626a


Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's l1: 1518.88
[200]	valid's l1: 1382.67
[300]	valid's l1: 1356.21
[400]	valid's l1: 1345.81
[500]	valid's l1: 1340.3
[600]	valid's l1: 1333.27
[700]	valid's l1: 1328.47
[800]	valid's l1: 1325.28
[900]	valid's l1: 1321.03
[1000]	valid's l1: 1316.86
[1100]	valid's l1: 1314.75
[1200]	valid's l1: 1312.53
Did not meet early stopping. Best iteration is:
[1199]	valid's l1: 1312.52
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed


[2021-09-26 19:18:17,933] (INFO): Trial 0 finished with value: -1312.5160231816428 and parameters: {'feature_fraction': 0.6872700594236812, 'num_leaves': 244, 'bagging_fraction': 0.8659969709057025, 'min_sum_hessian_in_leaf': 0.24810409748678125, 'reg_alpha': 2.5361081166471375e-07, 'reg_lambda': 2.5348407664333426e-07}. Best is trial 0 with value: -1312.5160231816428.


Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's l1: 1559.21
[200]	valid's l1: 1419.92
[300]	valid's l1: 1389.92
[400]	valid's l1: 1376.41
[500]	valid's l1: 1365.11
[600]	valid's l1: 1358.71
[700]	valid's l1: 1353.9
[800]	valid's l1: 1350.37
[900]	valid's l1: 1346.64
[1000]	valid's l1: 1343.93
[1100]	valid's l1: 1341.72
[1200]	valid's l1: 1339.47
Did not meet early stopping. Best iteration is:
[1200]	valid's l1: 1339.47
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed


[2021-09-26 19:18:40,538] (INFO): Trial 1 finished with value: -1339.467126487302 and parameters: {'feature_fraction': 0.5290418060840998, 'num_leaves': 223, 'bagging_fraction': 0.8005575058716043, 'min_sum_hessian_in_leaf': 0.679657809075816, 'reg_alpha': 1.5320059381854043e-08, 'reg_lambda': 5.360294728728285}. Best is trial 0 with value: -1312.5160231816428.


Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's l1: 1644.4
[200]	valid's l1: 1513.93
[300]	valid's l1: 1481.54
[400]	valid's l1: 1463.08
[500]	valid's l1: 1451.38
[600]	valid's l1: 1441.09
[700]	valid's l1: 1432.55
[800]	valid's l1: 1425.93
[900]	valid's l1: 1421.59
[1000]	valid's l1: 1416.57
[1100]	valid's l1: 1413.14
[1200]	valid's l1: 1408.72
Did not meet early stopping. Best iteration is:
[1200]	valid's l1: 1408.72
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed


[2021-09-26 19:18:52,727] (INFO): Trial 2 finished with value: -1408.7209240723337 and parameters: {'feature_fraction': 0.9162213204002109, 'num_leaves': 66, 'bagging_fraction': 0.5909124836035503, 'min_sum_hessian_in_leaf': 0.00541524411940254, 'reg_alpha': 5.472429642032198e-06, 'reg_lambda': 0.00052821153945323}. Best is trial 0 with value: -1312.5160231816428.


Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's l1: 1615.39
[200]	valid's l1: 1480.99
[300]	valid's l1: 1457.13
[400]	valid's l1: 1440.34
[500]	valid's l1: 1428.84
[600]	valid's l1: 1420.95
[700]	valid's l1: 1412.95
[800]	valid's l1: 1406.34
[900]	valid's l1: 1400.51
[1000]	valid's l1: 1397.56
[1100]	valid's l1: 1393.23
[1200]	valid's l1: 1390.35
Did not meet early stopping. Best iteration is:
[1198]	valid's l1: 1390.32
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed


[2021-09-26 19:19:06,446] (INFO): Trial 3 finished with value: -1390.318653514862 and parameters: {'feature_fraction': 0.7159725093210578, 'num_leaves': 85, 'bagging_fraction': 0.8059264473611898, 'min_sum_hessian_in_leaf': 0.003613894271216527, 'reg_alpha': 4.258943089524393e-06, 'reg_lambda': 1.9826980964985924e-05}. Best is trial 0 with value: -1312.5160231816428.


Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's l1: 1562.75
[200]	valid's l1: 1422.6
[300]	valid's l1: 1395.43
[400]	valid's l1: 1381.04
[500]	valid's l1: 1370.09
[600]	valid's l1: 1363.44
[700]	valid's l1: 1357.61
[800]	valid's l1: 1353.16
[900]	valid's l1: 1349.94
[1000]	valid's l1: 1347.43
[1100]	valid's l1: 1344
[1200]	valid's l1: 1341.74
Did not meet early stopping. Best iteration is:
[1199]	valid's l1: 1341.73
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed


[2021-09-26 19:19:31,923] (INFO): Trial 4 finished with value: -1341.7264780449186 and parameters: {'feature_fraction': 0.728034992108518, 'num_leaves': 204, 'bagging_fraction': 0.5998368910791798, 'min_sum_hessian_in_leaf': 0.11400863701127326, 'reg_alpha': 0.0021465011216654484, 'reg_lambda': 2.6185068507773707e-08}. Best is trial 0 with value: -1312.5160231816428.


Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's l1: 1680.15
[200]	valid's l1: 1542.7
[300]	valid's l1: 1505.07
[400]	valid's l1: 1481.57
[500]	valid's l1: 1468.59
[600]	valid's l1: 1456.25
[700]	valid's l1: 1447.96
[800]	valid's l1: 1441.27
[900]	valid's l1: 1434.97
[1000]	valid's l1: 1429.82
[1100]	valid's l1: 1426.09
[1200]	valid's l1: 1421.5
Did not meet early stopping. Best iteration is:
[1198]	valid's l1: 1421.46
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed


[2021-09-26 19:19:43,545] (INFO): Trial 5 finished with value: -1421.4648058663777 and parameters: {'feature_fraction': 0.8037724259507192, 'num_leaves': 56, 'bagging_fraction': 0.5325257964926398, 'min_sum_hessian_in_leaf': 6.245139574743075, 'reg_alpha': 4.905556676028774, 'reg_lambda': 0.18861495878553936}. Best is trial 0 with value: -1312.5160231816428.


Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's l1: 1738.47
[200]	valid's l1: 1600.74
[300]	valid's l1: 1563.66
[400]	valid's l1: 1545.12
[500]	valid's l1: 1523.35
[600]	valid's l1: 1512.49
[700]	valid's l1: 1498.58
[800]	valid's l1: 1489.98
[900]	valid's l1: 1486.54
[1000]	valid's l1: 1479.89
[1100]	valid's l1: 1477.91
[1200]	valid's l1: 1474.28
Did not meet early stopping. Best iteration is:
[1200]	valid's l1: 1474.28
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed


[2021-09-26 19:19:53,792] (INFO): Trial 6 finished with value: -1474.2751297198704 and parameters: {'feature_fraction': 0.6523068845866853, 'num_leaves': 39, 'bagging_fraction': 0.8421165132560784, 'min_sum_hessian_in_leaf': 0.057624872164786026, 'reg_alpha': 1.254134495897175e-07, 'reg_lambda': 0.00028614897264046574}. Best is trial 0 with value: -1312.5160231816428.


Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's l1: 1565.13
[200]	valid's l1: 1424.88
[300]	valid's l1: 1396.02
[400]	valid's l1: 1380.86
[500]	valid's l1: 1369.95
[600]	valid's l1: 1362.99
[700]	valid's l1: 1357.76
[800]	valid's l1: 1352.51
[900]	valid's l1: 1348.74
[1000]	valid's l1: 1346.31
[1100]	valid's l1: 1343.06
[1200]	valid's l1: 1340.96
Did not meet early stopping. Best iteration is:
[1196]	valid's l1: 1340.87
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed


[2021-09-26 19:20:16,618] (INFO): Trial 7 finished with value: -1340.8703079289028 and parameters: {'feature_fraction': 0.5171942605576092, 'num_leaves': 234, 'bagging_fraction': 0.6293899908000085, 'min_sum_hessian_in_leaf': 0.4467752817973907, 'reg_alpha': 6.388511557344611e-06, 'reg_lambda': 0.0004793052550782129}. Best is trial 0 with value: -1312.5160231816428.


Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's l1: 1664.19
[200]	valid's l1: 1527.16
[300]	valid's l1: 1501.47
[400]	valid's l1: 1485.03
[500]	valid's l1: 1471.32
[600]	valid's l1: 1465.64
[700]	valid's l1: 1459.97
[800]	valid's l1: 1454.11
[900]	valid's l1: 1453.14
[1000]	valid's l1: 1450.47
[1100]	valid's l1: 1446.67
[1200]	valid's l1: 1444.78
Did not meet early stopping. Best iteration is:
[1200]	valid's l1: 1444.78
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed


[2021-09-26 19:20:30,065] (INFO): Trial 8 finished with value: -1444.7765902200426 and parameters: {'feature_fraction': 0.7733551396716398, 'num_leaves': 60, 'bagging_fraction': 0.9847923138822793, 'min_sum_hessian_in_leaf': 1.2604664585649468, 'reg_alpha': 2.854239907497756, 'reg_lambda': 1.1309571585271483}. Best is trial 0 with value: -1312.5160231816428.


Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's l1: 1552.05
[200]	valid's l1: 1422.05
[300]	valid's l1: 1396.54
[400]	valid's l1: 1381.99
[500]	valid's l1: 1372.37
[600]	valid's l1: 1365.15
[700]	valid's l1: 1358.02
[800]	valid's l1: 1353.28
[900]	valid's l1: 1350.2
[1000]	valid's l1: 1345.74
[1100]	valid's l1: 1342.73
[1200]	valid's l1: 1340.98
Did not meet early stopping. Best iteration is:
[1200]	valid's l1: 1340.98
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed


[2021-09-26 19:20:58,426] (INFO): Trial 9 finished with value: -1340.9828104547773 and parameters: {'feature_fraction': 0.7989499894055425, 'num_leaves': 237, 'bagging_fraction': 0.5442462510259598, 'min_sum_hessian_in_leaf': 0.006080390190296602, 'reg_alpha': 2.5529693461039728e-08, 'reg_lambda': 8.471746987003668e-06}. Best is trial 0 with value: -1312.5160231816428.


Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's l1: 1542.87
[200]	valid's l1: 1407.86
[300]	valid's l1: 1388.22
[400]	valid's l1: 1375.15
[500]	valid's l1: 1367.86
[600]	valid's l1: 1366.6
[700]	valid's l1: 1364.8
[800]	valid's l1: 1361.95
[900]	valid's l1: 1357.82
[1000]	valid's l1: 1352.59
[1100]	valid's l1: 1346.01
[1200]	valid's l1: 1343.28
Did not meet early stopping. Best iteration is:
[1199]	valid's l1: 1343.19
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed


[2021-09-26 19:21:24,498] (INFO): Trial 10 finished with value: -1343.1935723400456 and parameters: {'feature_fraction': 0.9725682721151934, 'num_leaves': 150, 'bagging_fraction': 0.9576199864322108, 'min_sum_hessian_in_leaf': 0.03293669426393944, 'reg_alpha': 0.005764962972197511, 'reg_lambda': 2.2311398834761413e-08}. Best is trial 0 with value: -1312.5160231816428.


Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's l1: 1573.36
[200]	valid's l1: 1430.59
[300]	valid's l1: 1400.02
[400]	valid's l1: 1384.18
[500]	valid's l1: 1373.37
[600]	valid's l1: 1364.78
[700]	valid's l1: 1359.99
[800]	valid's l1: 1355.75
[900]	valid's l1: 1352.46
[1000]	valid's l1: 1347.64
[1100]	valid's l1: 1344.1
[1200]	valid's l1: 1341.43
Did not meet early stopping. Best iteration is:
[1198]	valid's l1: 1341.41
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed


[2021-09-26 19:21:43,816] (INFO): Trial 11 finished with value: -1341.4127447768449 and parameters: {'feature_fraction': 0.5077973998292281, 'num_leaves': 177, 'bagging_fraction': 0.7287104201226786, 'min_sum_hessian_in_leaf': 0.5414118040417358, 'reg_alpha': 1.2005946274475368e-08, 'reg_lambda': 0.05327797205857794}. Best is trial 0 with value: -1312.5160231816428.


Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's l1: 1542.15
[200]	valid's l1: 1402.67
[300]	valid's l1: 1375.25
[400]	valid's l1: 1363.68
[500]	valid's l1: 1355.98
[600]	valid's l1: 1350.06
[700]	valid's l1: 1345.06
[800]	valid's l1: 1339.84
[900]	valid's l1: 1337.65
[1000]	valid's l1: 1335.7
[1100]	valid's l1: 1332.62
[1200]	valid's l1: 1328.59
Did not meet early stopping. Best iteration is:
[1200]	valid's l1: 1328.59
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed


[2021-09-26 19:22:13,666] (INFO): Trial 12 finished with value: -1328.5871546216692 and parameters: {'feature_fraction': 0.6033130267906559, 'num_leaves': 254, 'bagging_fraction': 0.8734617880586331, 'min_sum_hessian_in_leaf': 3.740864118748723, 'reg_alpha': 4.5489208310505676e-07, 'reg_lambda': 9.620555086196925}. Best is trial 0 with value: -1312.5160231816428.


Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's l1: 1520.67
[200]	valid's l1: 1379.53
[300]	valid's l1: 1356.14
[400]	valid's l1: 1343.15
[500]	valid's l1: 1335.34
[600]	valid's l1: 1329.08
[700]	valid's l1: 1323.99
[800]	valid's l1: 1320.9
[900]	valid's l1: 1317.03
[1000]	valid's l1: 1313.55
[1100]	valid's l1: 1312.03
[1200]	valid's l1: 1310.96
Did not meet early stopping. Best iteration is:
[1184]	valid's l1: 1310.89
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed


[2021-09-26 19:22:40,949] (INFO): Trial 13 finished with value: -1310.8939857127325 and parameters: {'feature_fraction': 0.6272233742556023, 'num_leaves': 249, 'bagging_fraction': 0.8965895639414211, 'min_sum_hessian_in_leaf': 9.690219938278112, 'reg_alpha': 7.477334705747086e-07, 'reg_lambda': 2.4711229986370194e-07}. Best is trial 13 with value: -1310.8939857127325.


Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's l1: 1582.23
[200]	valid's l1: 1445.85
[300]	valid's l1: 1420.09
[400]	valid's l1: 1410.35
[500]	valid's l1: 1397.16
[600]	valid's l1: 1391.39
[700]	valid's l1: 1384.2
[800]	valid's l1: 1379.67
[900]	valid's l1: 1377.21
[1000]	valid's l1: 1373.67
[1100]	valid's l1: 1370.91
[1200]	valid's l1: 1368.93
Did not meet early stopping. Best iteration is:
[1200]	valid's l1: 1368.93
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed


[2021-09-26 19:22:58,055] (INFO): Trial 14 finished with value: -1368.9268008367505 and parameters: {'feature_fraction': 0.6377427086334254, 'num_leaves': 115, 'bagging_fraction': 0.9152578965219585, 'min_sum_hessian_in_leaf': 9.639757903159516, 'reg_alpha': 0.00021173064504670472, 'reg_lambda': 4.3935331429128053e-07}. Best is trial 13 with value: -1310.8939857127325.


Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 100 rounds
[100]	valid's l1: 1412.69
[200]	valid's l1: 1364.09
[300]	valid's l1: 1344.82
[400]	valid's l1: 1334.38
[500]	valid's l1: 1327.52
[600]	valid's l1: 1321.88
[700]	valid's l1: 1320.02
[800]	valid's l1: 1318.7
[900]	valid's l1: 1317.21
[1000]	valid's l1: 1316.05
[1100]	valid's l1: 1314.51
[1200]	valid's l1: 1312.34
[1300]	valid's l1: 1311.28
[1400]	valid's l1: 1310.85
[1500]	valid's l1: 1310.84
[1600]	valid's l1: 1310.19
[1700]	valid's l1: 1309.71
Early stopping, best iteration is:
[1677]	valid's l1: 1309.53

===== Start working with fold 1 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 100 rounds
[100]	valid's l1: 1392.44
[200]	valid's l1: 1356.58
[300]	valid's l1: 1346.5
[400]	valid's l1: 1335.61
[500]	valid's l1: 1331.59
[600]	valid's l1: 1328.86
[700]	valid's l1: 13

  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)



Start fitting Lvl_0_Pipe_0_Mod_0_LinearL2 ...

===== Start working with fold 0 for Lvl_0_Pipe_0_Mod_0_LinearL2 =====

Linear model: C = 1e-05 score = -3792.307486188616
Linear model: C = 5e-05 score = -3782.813213239397
Linear model: C = 0.0001 score = -3770.899846191406
Linear model: C = 0.0005 score = -3679.5392332589286
Linear model: C = 0.001 score = -3574.748364467076
Linear model: C = 0.005 score = -3012.567970282418
Linear model: C = 0.01 score = -2728.522736999512
Linear model: C = 0.05 score = -2398.6584154379707
Linear model: C = 0.1 score = -2320.213474986485
Linear model: C = 0.5 score = -2218.425082851955
Linear model: C = 1 score = -2218.424901092529
Linear model: C = 5 score = -2158.2164266422815
Linear model: C = 10 score = -2158.2165777696882
Linear model: C = 50 score = -2158.216657536098

===== Start working with fold 1 for Lvl_0_Pipe_0_Mod_0_LinearL2 =====

Linear model: C = 1e-05 score = -3617.604906389509
Linear model: C = 5e-05 score = -3608.5007586495535
Linear 

[2021-09-26 19:34:26,766] (INFO): A new study created in memory with name: no-name-e1d0bc46-d75e-4f4d-92a3-14ff31d4ec20


Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's l1: 1550.73
[200]	valid's l1: 1417.48
[300]	valid's l1: 1395.82
[400]	valid's l1: 1386.94
[500]	valid's l1: 1379.67
[600]	valid's l1: 1376.22
[700]	valid's l1: 1374.22
[800]	valid's l1: 1370.57
[900]	valid's l1: 1367.7
[1000]	valid's l1: 1364.44
[1100]	valid's l1: 1363.01
[1200]	valid's l1: 1362.01
Did not meet early stopping. Best iteration is:
[1200]	valid's l1: 1362.01
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed


[2021-09-26 19:35:05,449] (INFO): Trial 0 finished with value: -1362.0087845934459 and parameters: {'feature_fraction': 0.6872700594236812, 'num_leaves': 244, 'bagging_fraction': 0.8659969709057025, 'min_sum_hessian_in_leaf': 0.24810409748678125, 'reg_alpha': 2.5361081166471375e-07, 'reg_lambda': 2.5348407664333426e-07}. Best is trial 0 with value: -1362.0087845934459.


Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's l1: 1578.6
[200]	valid's l1: 1435.3
[300]	valid's l1: 1407.07
[400]	valid's l1: 1393.71
[500]	valid's l1: 1385.51
[600]	valid's l1: 1381.24
[700]	valid's l1: 1376.84
[800]	valid's l1: 1372.79
[900]	valid's l1: 1369.4
[1000]	valid's l1: 1366.75
[1100]	valid's l1: 1364.8
[1200]	valid's l1: 1362.99
Did not meet early stopping. Best iteration is:
[1200]	valid's l1: 1362.99
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed


[2021-09-26 19:35:38,963] (INFO): Trial 1 finished with value: -1362.9863887961592 and parameters: {'feature_fraction': 0.5290418060840998, 'num_leaves': 223, 'bagging_fraction': 0.8005575058716043, 'min_sum_hessian_in_leaf': 0.679657809075816, 'reg_alpha': 1.5320059381854043e-08, 'reg_lambda': 5.360294728728285}. Best is trial 0 with value: -1362.0087845934459.


Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's l1: 1685.21
[200]	valid's l1: 1539.21
[300]	valid's l1: 1507.61
[400]	valid's l1: 1489.87
[500]	valid's l1: 1475.1
[600]	valid's l1: 1465.41
[700]	valid's l1: 1457.88
[800]	valid's l1: 1452.56
[900]	valid's l1: 1448.26
[1000]	valid's l1: 1444.16
[1100]	valid's l1: 1441.7
[1200]	valid's l1: 1439.1
Did not meet early stopping. Best iteration is:
[1200]	valid's l1: 1439.1
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed


[2021-09-26 19:35:56,123] (INFO): Trial 2 finished with value: -1439.1009431636674 and parameters: {'feature_fraction': 0.9162213204002109, 'num_leaves': 66, 'bagging_fraction': 0.5909124836035503, 'min_sum_hessian_in_leaf': 0.00541524411940254, 'reg_alpha': 5.472429642032198e-06, 'reg_lambda': 0.00052821153945323}. Best is trial 0 with value: -1362.0087845934459.


Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's l1: 1658.56
[200]	valid's l1: 1510.01
[300]	valid's l1: 1475.47
[400]	valid's l1: 1457.21
[500]	valid's l1: 1444.58
[600]	valid's l1: 1436.82
[700]	valid's l1: 1428.22
[800]	valid's l1: 1424.47
[900]	valid's l1: 1417.36
[1000]	valid's l1: 1414.22
[1100]	valid's l1: 1412.13
[1200]	valid's l1: 1408.8
Did not meet early stopping. Best iteration is:
[1200]	valid's l1: 1408.8
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed


[2021-09-26 19:36:09,340] (INFO): Trial 3 finished with value: -1408.7987781168051 and parameters: {'feature_fraction': 0.7159725093210578, 'num_leaves': 85, 'bagging_fraction': 0.8059264473611898, 'min_sum_hessian_in_leaf': 0.003613894271216527, 'reg_alpha': 4.258943089524393e-06, 'reg_lambda': 1.9826980964985924e-05}. Best is trial 0 with value: -1362.0087845934459.


Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's l1: 1583.75
[200]	valid's l1: 1460.01
[300]	valid's l1: 1431.88
[400]	valid's l1: 1414.3
[500]	valid's l1: 1404.87
[600]	valid's l1: 1396.19
[700]	valid's l1: 1390.55
[800]	valid's l1: 1387.75
[900]	valid's l1: 1384.65
[1000]	valid's l1: 1380.42
[1100]	valid's l1: 1377.3
[1200]	valid's l1: 1375.7
Did not meet early stopping. Best iteration is:
[1194]	valid's l1: 1375.57
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed


[2021-09-26 19:36:32,702] (INFO): Trial 4 finished with value: -1375.5716194866725 and parameters: {'feature_fraction': 0.728034992108518, 'num_leaves': 204, 'bagging_fraction': 0.5998368910791798, 'min_sum_hessian_in_leaf': 0.11400863701127326, 'reg_alpha': 0.0021465011216654484, 'reg_lambda': 2.6185068507773707e-08}. Best is trial 0 with value: -1362.0087845934459.


Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's l1: 1721.48
[200]	valid's l1: 1570.17
[300]	valid's l1: 1532.41
[400]	valid's l1: 1513.4
[500]	valid's l1: 1500.83
[600]	valid's l1: 1491.07
[700]	valid's l1: 1483.58
[800]	valid's l1: 1477.02
[900]	valid's l1: 1471.92
[1000]	valid's l1: 1466.49
[1100]	valid's l1: 1461.8
[1200]	valid's l1: 1457.76
Did not meet early stopping. Best iteration is:
[1198]	valid's l1: 1457.73
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed


[2021-09-26 19:36:45,361] (INFO): Trial 5 finished with value: -1457.727897186143 and parameters: {'feature_fraction': 0.8037724259507192, 'num_leaves': 56, 'bagging_fraction': 0.5325257964926398, 'min_sum_hessian_in_leaf': 6.245139574743075, 'reg_alpha': 4.905556676028774, 'reg_lambda': 0.18861495878553936}. Best is trial 0 with value: -1362.0087845934459.


Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's l1: 1769.07
[200]	valid's l1: 1625.18
[300]	valid's l1: 1585.11
[400]	valid's l1: 1565.33
[500]	valid's l1: 1539.14
[600]	valid's l1: 1527.16
[700]	valid's l1: 1518.32
[800]	valid's l1: 1513.5
[900]	valid's l1: 1509.27
[1000]	valid's l1: 1504.61
[1100]	valid's l1: 1502.21
[1200]	valid's l1: 1497.14
Did not meet early stopping. Best iteration is:
[1196]	valid's l1: 1497.12
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed


[2021-09-26 19:36:52,531] (INFO): Trial 6 finished with value: -1497.1170447690147 and parameters: {'feature_fraction': 0.6523068845866853, 'num_leaves': 39, 'bagging_fraction': 0.8421165132560784, 'min_sum_hessian_in_leaf': 0.057624872164786026, 'reg_alpha': 1.254134495897175e-07, 'reg_lambda': 0.00028614897264046574}. Best is trial 0 with value: -1362.0087845934459.


Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's l1: 1577.49
[200]	valid's l1: 1440.72
[300]	valid's l1: 1417.81
[400]	valid's l1: 1402.64
[500]	valid's l1: 1391.88
[600]	valid's l1: 1387.57
[700]	valid's l1: 1383.07
[800]	valid's l1: 1380.1
[900]	valid's l1: 1376.18
[1000]	valid's l1: 1373.24
[1100]	valid's l1: 1370.53
[1200]	valid's l1: 1369.43
Did not meet early stopping. Best iteration is:
[1195]	valid's l1: 1369.34
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed


[2021-09-26 19:37:12,264] (INFO): Trial 7 finished with value: -1369.3386295160567 and parameters: {'feature_fraction': 0.5171942605576092, 'num_leaves': 234, 'bagging_fraction': 0.6293899908000085, 'min_sum_hessian_in_leaf': 0.4467752817973907, 'reg_alpha': 6.388511557344611e-06, 'reg_lambda': 0.0004793052550782129}. Best is trial 0 with value: -1362.0087845934459.


Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's l1: 1706.67
[200]	valid's l1: 1563.12
[300]	valid's l1: 1533.22
[400]	valid's l1: 1521.91
[500]	valid's l1: 1508.71
[600]	valid's l1: 1496.19
[700]	valid's l1: 1492.26
[800]	valid's l1: 1491.32
[900]	valid's l1: 1484.95
[1000]	valid's l1: 1476.68
[1100]	valid's l1: 1473.76
[1200]	valid's l1: 1469.82
Did not meet early stopping. Best iteration is:
[1197]	valid's l1: 1469.81
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed


[2021-09-26 19:37:22,653] (INFO): Trial 8 finished with value: -1469.8145641400474 and parameters: {'feature_fraction': 0.7733551396716398, 'num_leaves': 60, 'bagging_fraction': 0.9847923138822793, 'min_sum_hessian_in_leaf': 1.2604664585649468, 'reg_alpha': 2.854239907497756, 'reg_lambda': 1.1309571585271483}. Best is trial 0 with value: -1362.0087845934459.


Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's l1: 1585.96
[200]	valid's l1: 1453.85
[300]	valid's l1: 1434.91
[400]	valid's l1: 1421.02
[500]	valid's l1: 1411.52
[600]	valid's l1: 1405.41
[700]	valid's l1: 1402.69
[800]	valid's l1: 1398.75
[900]	valid's l1: 1397.13
[1000]	valid's l1: 1394.45
[1100]	valid's l1: 1391.1
[1200]	valid's l1: 1389.55
Did not meet early stopping. Best iteration is:
[1198]	valid's l1: 1389.53
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed


[2021-09-26 19:37:49,162] (INFO): Trial 9 finished with value: -1389.5337105413164 and parameters: {'feature_fraction': 0.7989499894055425, 'num_leaves': 237, 'bagging_fraction': 0.5442462510259598, 'min_sum_hessian_in_leaf': 0.006080390190296602, 'reg_alpha': 2.5529693461039728e-08, 'reg_lambda': 8.471746987003668e-06}. Best is trial 0 with value: -1362.0087845934459.


Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's l1: 1587.25
[200]	valid's l1: 1451.04
[300]	valid's l1: 1432.63
[400]	valid's l1: 1422.09
[500]	valid's l1: 1414.91
[600]	valid's l1: 1408.22
[700]	valid's l1: 1402.69
[800]	valid's l1: 1398.35
[900]	valid's l1: 1396.65
[1000]	valid's l1: 1394.25
[1100]	valid's l1: 1393.09
[1200]	valid's l1: 1392.54
Did not meet early stopping. Best iteration is:
[1200]	valid's l1: 1392.54
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed


[2021-09-26 19:38:07,556] (INFO): Trial 10 finished with value: -1392.5371351362637 and parameters: {'feature_fraction': 0.9725682721151934, 'num_leaves': 150, 'bagging_fraction': 0.9576199864322108, 'min_sum_hessian_in_leaf': 0.03293669426393944, 'reg_alpha': 0.005764962972197511, 'reg_lambda': 2.2311398834761413e-08}. Best is trial 0 with value: -1362.0087845934459.


Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's l1: 1594.07
[200]	valid's l1: 1454.97
[300]	valid's l1: 1428.53
[400]	valid's l1: 1413.6
[500]	valid's l1: 1404.6
[600]	valid's l1: 1397.08
[700]	valid's l1: 1393.69
[800]	valid's l1: 1390.55
[900]	valid's l1: 1386.63
[1000]	valid's l1: 1382.41
[1100]	valid's l1: 1379.76
[1200]	valid's l1: 1378.14
Did not meet early stopping. Best iteration is:
[1199]	valid's l1: 1378.11
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed


[2021-09-26 19:38:22,174] (INFO): Trial 11 finished with value: -1378.1089352938106 and parameters: {'feature_fraction': 0.5077973998292281, 'num_leaves': 177, 'bagging_fraction': 0.7287104201226786, 'min_sum_hessian_in_leaf': 0.5414118040417358, 'reg_alpha': 1.2005946274475368e-08, 'reg_lambda': 0.05327797205857794}. Best is trial 0 with value: -1362.0087845934459.


Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's l1: 1574.09
[200]	valid's l1: 1437.25
[300]	valid's l1: 1406.83
[400]	valid's l1: 1395.72
[500]	valid's l1: 1387.67
[600]	valid's l1: 1382.25
[700]	valid's l1: 1377.07
[800]	valid's l1: 1374.32
[900]	valid's l1: 1369.29
[1000]	valid's l1: 1366.46
[1100]	valid's l1: 1363.68
[1200]	valid's l1: 1361.5
Did not meet early stopping. Best iteration is:
[1199]	valid's l1: 1361.49
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed


[2021-09-26 19:38:44,976] (INFO): Trial 12 finished with value: -1361.4943052062647 and parameters: {'feature_fraction': 0.6033130267906559, 'num_leaves': 254, 'bagging_fraction': 0.8734617880586331, 'min_sum_hessian_in_leaf': 3.740864118748723, 'reg_alpha': 4.5489208310505676e-07, 'reg_lambda': 9.620555086196925}. Best is trial 12 with value: -1361.4943052062647.


Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's l1: 1551.59
[200]	valid's l1: 1423.35
[300]	valid's l1: 1402.68
[400]	valid's l1: 1391.23
[500]	valid's l1: 1384.49
[600]	valid's l1: 1380.55
[700]	valid's l1: 1374.11
[800]	valid's l1: 1371.75
[900]	valid's l1: 1368.57
[1000]	valid's l1: 1366.76
[1100]	valid's l1: 1364.48
[1200]	valid's l1: 1363.21
Did not meet early stopping. Best iteration is:
[1196]	valid's l1: 1363.13
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed


[2021-09-26 19:39:06,193] (INFO): Trial 13 finished with value: -1363.1343821084158 and parameters: {'feature_fraction': 0.6272233742556023, 'num_leaves': 249, 'bagging_fraction': 0.8965895639414211, 'min_sum_hessian_in_leaf': 9.690219938278112, 'reg_alpha': 7.477334705747086e-07, 'reg_lambda': 2.4711229986370194e-07}. Best is trial 12 with value: -1361.4943052062647.


Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's l1: 1631.32
[200]	valid's l1: 1491.38
[300]	valid's l1: 1463.88
[400]	valid's l1: 1445.79
[500]	valid's l1: 1437.05
[600]	valid's l1: 1430.75
[700]	valid's l1: 1422.01
[800]	valid's l1: 1418.63
[900]	valid's l1: 1414.73
[1000]	valid's l1: 1409.58
[1100]	valid's l1: 1406.23
[1200]	valid's l1: 1403.84
Did not meet early stopping. Best iteration is:
[1200]	valid's l1: 1403.84
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed


[2021-09-26 19:39:18,314] (INFO): Trial 14 finished with value: -1403.840301894537 and parameters: {'feature_fraction': 0.6377427086334254, 'num_leaves': 115, 'bagging_fraction': 0.7222977058252433, 'min_sum_hessian_in_leaf': 2.3401332856511408, 'reg_alpha': 0.0002538111812992266, 'reg_lambda': 0.011295678228455511}. Best is trial 12 with value: -1361.4943052062647.


Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's l1: 1573.79
[200]	valid's l1: 1440.55
[300]	valid's l1: 1414.94
[400]	valid's l1: 1404.31
[500]	valid's l1: 1393.57
[600]	valid's l1: 1386.39
[700]	valid's l1: 1380.83
[800]	valid's l1: 1377.43
[900]	valid's l1: 1374.78
[1000]	valid's l1: 1372.93
[1100]	valid's l1: 1371.39
[1200]	valid's l1: 1370.5
Did not meet early stopping. Best iteration is:
[1196]	valid's l1: 1370.46
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed


[2021-09-26 19:39:35,329] (INFO): Trial 15 finished with value: -1370.4551700190136 and parameters: {'feature_fraction': 0.5914831747534568, 'num_leaves': 194, 'bagging_fraction': 0.8936277429653271, 'min_sum_hessian_in_leaf': 0.166712291358107, 'reg_alpha': 4.9497914616273814e-05, 'reg_lambda': 1.036963650861359e-06}. Best is trial 12 with value: -1361.4943052062647.


Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 100 rounds
[100]	valid's l1: 1455.74
[200]	valid's l1: 1410.87
[300]	valid's l1: 1393.92
[400]	valid's l1: 1381.19
[500]	valid's l1: 1376.39
[600]	valid's l1: 1372.29
[700]	valid's l1: 1368.16
[800]	valid's l1: 1364.95
[900]	valid's l1: 1362.54
[1000]	valid's l1: 1359.71
[1100]	valid's l1: 1356.7
[1200]	valid's l1: 1355.69
[1300]	valid's l1: 1354.69
Early stopping, best iteration is:
[1259]	valid's l1: 1354.42

===== Start working with fold 1 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 100 rounds
[100]	valid's l1: 1339.5
[200]	valid's l1: 1297.19
[300]	valid's l1: 1283.85
[400]	valid's l1: 1276.98
[500]	valid's l1: 1272.61
[600]	valid's l1: 1267.31
[700]	valid's l1: 1264.66
[800]	valid's l1: 1262.48
[900]	valid's l1: 1260.28
[1000]	valid's l1: 1259.25
[1100]	valid's l1: 1258

  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)



Start fitting Lvl_0_Pipe_0_Mod_0_LinearL2 ...

===== Start working with fold 0 for Lvl_0_Pipe_0_Mod_0_LinearL2 =====

Linear model: C = 1e-05 score = -3822.1566178152902
Linear model: C = 5e-05 score = -3817.2438549804688
Linear model: C = 0.0001 score = -3810.884972795759
Linear model: C = 0.0005 score = -3762.898037248884
Linear model: C = 0.001 score = -3707.2527440011163
Linear model: C = 0.005 score = -3392.2609402138846
Linear model: C = 0.01 score = -3172.8490260445733
Linear model: C = 0.05 score = -2691.937402368818
Linear model: C = 0.1 score = -2557.981369875227
Linear model: C = 0.5 score = -2347.4993522513255
Linear model: C = 1 score = -2305.5848647989546
Linear model: C = 5 score = -2263.3669815684725
Linear model: C = 10 score = -2263.3669848038808
Linear model: C = 50 score = -2263.3669331692286
Linear model: C = 100 score = -2263.3668795318604
Linear model: C = 500 score = -2263.3667425842286
Linear model: C = 1000 score = -2263.366698425838
Linear model: C = 5000 sco

KeyboardInterrupt: 

## Step 4. Predict to test data and check scores

In [None]:
%%time

test_pred = automl.predict(test_data)
logging.info('Prediction for test data:\n{}\nShape = {}'
              .format(test_pred, test_pred.shape))

logging.info('Check scores...')
logging.info('OOF score: {}'.format(mean_absolute_error(train_data[TARGET_NAME].values, oof_pred.data[:, 0])))

CPU times: user 8min 17s, sys: 221 ms, total: 8min 17s
Wall time: 2min 11s


In [None]:
submission[TARGET_NAME] = test_pred.data[:, 0]
submission.head()

Unnamed: 0,row_ID,final_price
0,35000,2856.394287
1,35001,5529.273438
2,35002,2622.005859
3,35003,6810.166504
4,35004,4497.337891


## Step 5. Generate submission

In [None]:
# group train data by car_vin aggregate final price by mode
train_vins = train_data[['car_vin', 'final_price']].groupby('car_vin').agg({'final_price' : 
                                                                            lambda x: x.value_counts()
                                                                            .index[0]}).reset_index()

# copy submission df
sub = submission.copy().set_index('row_ID')
# add car_vin feature from test df
sub = sub.join(test_data[['car_vin', 'row_ID']].set_index('row_ID'), on=sub.index).reset_index()
# join to sub df train_vins df on car_vin feature
sub = sub[['row_ID', 'car_vin']].set_index('car_vin').join(train_vins.set_index('car_vin'), 
                                                            on='car_vin', how='left').reset_index()

# get indices than have not-null car prices
sub_index = sub[sub['final_price'].notnull()].index
# assert not-null car prices of sub df to corresponding prices column of submission df
submission.loc[sub_index, 'final_price'] = sub.loc[sub['final_price'].notnull(), 'final_price']

In [None]:
submission.to_csv('submission.csv', index = False)