In [1]:
!pip install -U --user LightAutoML-0.3.7.4-py3-none-any.whl

Processing ./LightAutoML-0.3.7.4-py3-none-any.whl
Collecting scikit-learn<=0.24.2,>=0.22
  Downloading scikit_learn-0.24.2-cp38-cp38-manylinux2010_x86_64.whl (24.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.9/24.9 MB[0m [31m7.3 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0mm
Collecting lightgbm<=3.2.1,>=2.3
  Downloading lightgbm-3.2.1-py3-none-manylinux1_x86_64.whl (2.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting poetry-core<2.0.0,>=1.0.0
  Downloading poetry_core-1.5.2-py3-none-any.whl (465 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m465.2/465.2 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting json2html
  Downloading json2html-1.3.0.tar.gz (7.0 kB)
  Preparing metadata (setup.py) ... [?25ldone
Collecting cmaes
  Downloading cmaes-0.9.1-py3-none-any.whl (21 kB)
Collecting numpy<1.24.0
  D

In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from tqdm.notebook import tqdm
import copy
import gc
import os
import time
import lightautoml

In [2]:
!nvidia-smi

Mon Mar 27 09:10:36 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.85.12    Driver Version: 525.85.12    CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ...  Off  | 00000000:01:00.0  On |                  Off |
|  0%   47C    P8    38W / 450W |   1017MiB / 24564MiB |     15%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [3]:
def reduce_mem_usage(df):
    """ iterate through all the columns of a dataframe and modify the data type
        to reduce memory usage.        
    """
    start_mem = df.memory_usage().sum() / 1024**2
    print('Memory usage of dataframe is {:.2f} MB'.format(start_mem))
    
    for col in df.columns:
        col_type = df[col].dtype
        
        if col_type != object:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)
        else:
            df[col] = df[col].astype('category')

    end_mem = df.memory_usage().sum() / 1024**2
    print('Memory usage after optimization is: {:.2f} MB'.format(end_mem))
    print('Decreased by {:.1f}%'.format(100 * (start_mem - end_mem) / start_mem))
    
    return df

In [4]:
train = pd.read_csv("train_ml_num_v2.csv")
test = pd.read_csv("test_ml_num_v2.csv")
test.sort_values("user_id", inplace=True)
cold = False

print(len(train))

270000


In [5]:
warm_features = ['social_media last', 'social_media first', 'sum_text last', 'sum_text first', 'urls_topics last', 'urls_topics first', 'url_host_mc', 'url_host_2_mc', 'url_host_3_mc', 'urls_topics_mc', 'url_host_day_mc', 'url_host_pod_mc', 'request_cnt max', 'request_cnt mean', 'request_cnt std', 'social_media sum', '339 last', '339 first', '68 last', '68 first', '215 last', '215 first', '503 last', '503 first', '251 last', '251 first', '316 last', '316 first', '314 last', '314 first', '418 last', '418 first', '284 last', '284 first', '289 last', '289 first', '415 last', '415 first', '765 last', '765 first', '165 last', '165 first', '556 last', '556 first', '391 last', '391 first', '535 last', '535 first', '385 last', '385 first', '147 last', '147 first', '25 last', '25 first', '526 last', '526 first', '40 last', '40 first', '199 last', '199 first', '583 last', '583 first', '273 last', '273 first', '169 last', '169 first', '15 last', '15 first', '592 last', '592 first', '268 last', '268 first', '437 last', '437 first', '315 last', '315 first', '893 last', '893 first', '493 last', '493 first', '465 last', '465 first', '1450 last', '1450 first', '794 last', '794 first', '1819 last', '1819 first','als_emb_url_host_day_0', 'als_emb_url_host_day_1', 'als_emb_url_host_day_2', 'als_emb_url_host_day_3', 'als_emb_url_host_day_4', 'als_emb_url_host_day_5', 'als_emb_url_host_day_6', 'als_emb_url_host_day_7', 'als_emb_url_host_day_8', 'als_emb_url_host_day_9', 'als_emb_url_host_day_10', 'als_emb_url_host_day_11', 'als_emb_url_host_day_12', 'als_emb_url_host_day_13', 'als_emb_url_host_day_14', 'als_emb_url_host_day_15', 'als_emb_url_host_day_16', 'als_emb_url_host_day_17', 'als_emb_url_host_day_18', 'als_emb_url_host_day_19', 'als_emb_url_host_day_20', 'als_emb_url_host_day_21', 'als_emb_url_host_day_22', 'als_emb_url_host_day_23', 'als_emb_url_host_day_24', 'als_emb_url_host_day_25', 'als_emb_url_host_day_26', 'als_emb_url_host_day_27', 'als_emb_url_host_day_28', 'als_emb_url_host_day_29', 'als_emb_url_host_day_30', 'als_emb_url_host_day_31', 'url_host_day_clusters_age', 'url_host_day_clusters_ismale', 'als_emb_url_host_pod_0', 'als_emb_url_host_pod_1', 'als_emb_url_host_pod_2', 'als_emb_url_host_pod_3', 'als_emb_url_host_pod_4', 'als_emb_url_host_pod_5', 'als_emb_url_host_pod_6', 'als_emb_url_host_pod_7', 'als_emb_url_host_pod_8', 'als_emb_url_host_pod_9', 'als_emb_url_host_pod_10', 'als_emb_url_host_pod_11', 'als_emb_url_host_pod_12', 'als_emb_url_host_pod_13', 'als_emb_url_host_pod_14', 'als_emb_url_host_pod_15', 'url_host_pod_clusters_age', 'url_host_pod_clusters_ismale', 'mean_req_cnt_mor', 'mean_req_cnt_day', 'mean_req_cnt_eve', 'mean_req_cnt_nig', 'mean_req_cnt_holiday', 'mean_req_cnt_noholiday', 'mean_req_cnt_weekend', 'mean_req_cnt_noweekend', 'mean_req_cnt_date', 'emb_coles_0', 'emb_coles_1', 'emb_coles_2', 'emb_coles_3', 'emb_coles_4', 'emb_coles_5', 'emb_coles_6', 'emb_coles_7', 'emb_coles_8', 'emb_coles_9', 'emb_coles_10', 'emb_coles_11', 'emb_coles_12', 'emb_coles_13', 'emb_coles_14', 'emb_coles_15', 'emb_coles_16', 'emb_coles_17', 'emb_coles_18', 'emb_coles_19', 'emb_coles_20', 'emb_coles_21', 'emb_coles_22', 'emb_coles_23', 'emb_coles_24', 'emb_coles_25', 'emb_coles_26', 'emb_coles_27', 'emb_coles_28', 'emb_coles_29', 'emb_coles_30', 'emb_coles_31', 'emb_coles_32', 'emb_coles_33', 'emb_coles_34', 'emb_coles_35', 'emb_coles_36', 'emb_coles_37', 'emb_coles_38', 'emb_coles_39', 'emb_coles_40', 'emb_coles_41', 'emb_coles_42', 'emb_coles_43', 'emb_coles_44', 'emb_coles_45', 'emb_coles_46', 'emb_coles_47', 'emb_coles_48', 'emb_coles_49', 'emb_coles_50', 'emb_coles_51', 'emb_coles_52', 'emb_coles_53', 'emb_coles_54', 'emb_coles_55', 'emb_coles_56', 'emb_coles_57', 'emb_coles_58', 'emb_coles_59', 'emb_coles_60', 'emb_coles_61', 'emb_coles_62', 'emb_coles_63', '0_ut', '1_ut', '2_ut', '3_ut', '4_ut', '5_ut', '6_ut', '7_ut', '8_ut', '9_ut', '10_ut', '11_ut', '12_ut', '13_ut', '14_ut', '15_ut', '16_ut', '17_ut', '18_ut', '19_ut', '20_ut', '21_ut', '22_ut', '23_ut', '24_ut', '25_ut', '26_ut', '27_ut', '28_ut', '29_ut', '30_ut', '31_ut', '32_ut', '33_ut', '34_ut', '35_ut', '36_ut', '37_ut', '38_ut', '39_ut', '40_ut', '41_ut', '42_ut', '43_ut', '44_ut', '45_ut', '46_ut', '47_ut', '48_ut', '49_ut', '50_ut', '51_ut', '52_ut', '53_ut', '54_ut', '55_ut', '56_ut', '57_ut', '58_ut', '59_ut', '60_ut', '61_ut', '62_ut', '63_ut', '64_ut', '65_ut', '66_ut', '67_ut', '68_ut', '69_ut', '70_ut', '71_ut', '72_ut', '73_ut', '74_ut', '75_ut', '76_ut', '77_ut', '78_ut', '79_ut', '80_ut', '81_ut', '82_ut', '83_ut', '84_ut', '85_ut', '86_ut', '87_ut', '88_ut', '89_ut', '90_ut', '91_ut', '92_ut', '93_ut', '94_ut', '95_ut', '96_ut', '97_ut', '98_ut', '99_ut', '100_ut', '101_ut', '102_ut', '103_ut', '104_ut', '105_ut', '106_ut', '107_ut', '108_ut', '109_ut', '110_ut', '111_ut', '112_ut', '113_ut', '114_ut', '115_ut', '116_ut', '117_ut', '118_ut', '119_ut', '120_ut', '121_ut', '122_ut', '123_ut', '124_ut', '125_ut', '126_ut', '127_ut', '128_ut', '129_ut', '130_ut', '131_ut', '132_ut', '133_ut', '134_ut', '135_ut', '136_ut', '137_ut', '138_ut', '139_ut', '140_ut', '141_ut', '142_ut', '143_ut', '144_ut', '145_ut', '146_ut', '147_ut', '148_ut', '149_ut', '150_ut', '151_ut', '152_ut', '153_ut', '154_ut', '155_ut', '156_ut', '157_ut', '158_ut', '159_ut', '160_ut', '161_ut', '162_ut', '163_ut', '164_ut', '165_ut', '166_ut', '167_ut', '168_ut', '169_ut', '170_ut', '171_ut', '172_ut', '173_ut', '174_ut', '175_ut', '176_ut', '177_ut', '178_ut', '179_ut', '180_ut', '181_ut', '182_ut', '183_ut', '184_ut', '185_ut', '186_ut', '187_ut', '188_ut', '189_ut', '190_ut', '191_ut', '192_ut', '193_ut', '194_ut', '195_ut', '196_ut', '197_ut', '198_ut', '199_ut', '200_ut', '201_ut', '202_ut', '203_ut', '204_ut', '205_ut', '206_ut', '207_ut', '208_ut', '209_ut', '210_ut', '211_ut', '212_ut', '213_ut', '214_ut', '215_ut', '216_ut', '217_ut', '218_ut', '219_ut', '220_ut', '221_ut', '222_ut', '223_ut', '224_ut', '225_ut', '226_ut', '227_ut', '228_ut', '229_ut', '230_ut', '231_ut', '232_ut', '233_ut', '234_ut', '235_ut', '236_ut', '237_ut', '238_ut', '239_ut', '240_ut', '241_ut', '242_ut', '243_ut', '244_ut', '245_ut', '246_ut', '247_ut', '248_ut', '249_ut', '250_ut', '251_ut', '252_ut', '253_ut', '254_ut', '255_ut', '256_ut', '257_ut', '258_ut', '259_ut', '260_ut', '261_ut', '262_ut', '263_ut', '264_ut', '265_ut', '266_ut', '267_ut', '268_ut', '269_ut', '270_ut', '271_ut', '272_ut', '273_ut', '274_ut', '275_ut', '276_ut', '277_ut', '278_ut', '279_ut', '280_ut', '281_ut', '282_ut', '283_ut', '284_ut', '285_ut', '286_ut', '287_ut', '288_ut', '289_ut', '290_ut', '291_ut', '292_ut', '293_ut', '294_ut', '295_ut', '296_ut', '297_ut', '298_ut', '299_ut', '0_pd', '1_pd', '2_pd', '3_pd', '4_pd', '5_pd', '6_pd', '7_pd', '8_pd', '9_pd', '10_pd', '11_pd', '12_pd', '13_pd', '14_pd', '15_pd', '16_pd', '17_pd', '18_pd', '19_pd', '20_pd', '21_pd', '22_pd', '23_pd', '24_pd', '25_pd', '26_pd', '27_pd', '0_dof', '1_dof', '2_dof', '3_dof', '4_dof', '5_dof', '6_dof', '0_pd.1', '1_pd.1', '2_pd.1', '3_pd.1', 'fe_0', 'fe_1', 'fe_2', 'fe_3', 'fe_4', 'fe_5', 'fe_6', 'fe_7', 'fe_8', 'fe_9', 'fe_10', 'fe_11', 'fe_12', 'fe_13', 'fe_14', 'fe_15', 'fe_16', 'fe_17', 'fe_18', 'fe_19', 'fe_20', 'fe_21', 'fe_22', 'fe_23', 'fe_24', 'fe_25', 'fe_26', 'fe_27', 'fe_28', 'fe_29', 'fe_30', 'fe_31', 'fe_32', 'fe_33', 'fe_34', 'fe_35', 'fe_36', 'fe_37', 'fe_38', 'fe_39', 'fe_40', 'fe_41', 'fe_42', 'fe_43', 'fe_44', 'fe_45', 'fe_46', 'fe_47', 'fe_48', 'fe_49', 'fe_50', 'fe_51', 'fe_52', 'fe_53', 'fe_54', 'fe_55', 'fe_56', 'fe_57', 'fe_58', 'fe_59', 'fe_60', 'fe_61', 'fe_62', 'fe_63', 'fe_64', 'fe_65', 'fe_66', 'fe_67', 'fe_68', 'fe_69', 'fe_70', 'fe_71', 'fe_72', 'fe_73', 'fe_74', 'fe_75', 'fe_76', 'fe_77', 'fe_78', 'fe_79', 'fe_80', 'fe_81', 'fe_82', 'fe_83', 'fe_84', 'fe_85', 'fe_86', 'fe_87', 'fe_88', 'fe_89', 'fe_90', 'fe_91', 'fe_92', 'fe_93', 'fe_94', 'fe_95', 'fe_96', 'fe_97', 'fe_98', 'fe_99', 'fe_100', 'fe_101', 'fe_102', 'fe_103', 'fe_104', 'fe_105', 'fe_106', 'fe_107', 'fe_108', 'fe_109', 'fe_110', 'fe_111', 'fe_112', 'fe_113', 'fe_114', 'fe_115', 'fe_116', 'fe_117', 'fe_118', 'fe_119', 'fe_120', 'fe_121', 'fe_122', 'fe_123', 'fe_124', 'fe_125', 'fe_126', 'fe_127', 'fe_128', 'fe_129', 'fe_130', 'fe_131', 'fe_132', 'fe_133', 'fe_134', 'fe_135', 'fe_136', 'fe_137', 'fe_138', 'fe_139', 'fe_140', 'fe_141', 'fe_142', 'fe_143', 'fe_144', 'fe_145', 'fe_146', 'fe_147', 'fe_148', 'fe_149', 'fe_150', 'fe_151', 'fe_152', 'fe_153', 'fe_154', 'fe_155', 'fe_156', 'fe_157', 'fe_158', 'fe_159', 'fe_160', 'fe_161', 'fe_162', 'fe_163', 'fe_164', 'fe_165', 'fe_166', 'fe_167', 'fe_168', 'fe_169', 'fe_170', 'fe_171', 'fe_172', 'fe_173', 'fe_174', 'fe_175', 'fe_176', 'fe_177', 'fe_178', 'fe_179', 'fe_180', 'fe_181', 'fe_182', 'fe_183', 'fe_184', 'fe_185', 'fe_186', 'fe_187', 'fe_188', 'fe_189', 'fe_190', 'fe_191', 'fe_192', 'fe_193', 'fe_194', 'fe_195', 'fe_196', 'fe_197', 'fe_198', 'fe_199', 'fe_200', 'fe_201', 'fe_202', 'fe_203', 'fe_204', 'fe_205', 'fe_206', 'fe_207', 'fe_208', 'fe_209', 'fe_210', 'fe_211', 'fe_212', 'fe_213', 'fe_214', 'fe_215', 'fe_216', 'fe_217', 'fe_218', 'fe_219', 'fe_220', 'fe_221', 'fe_222', 'fe_223', 'fe_224', 'fe_225', 'fe_226', 'fe_227', 'fe_228', 'fe_229', 'fe_230', 'fe_231', 'fe_232', 'fe_233', 'fe_234', 'fe_235', 'fe_236', 'fe_237', 'fe_238', 'fe_239', 'fe_240', 'fe_241', 'fe_242', 'fe_243', 'fe_244', 'fe_245', 'fe_246', 'fe_247', 'fe_248', 'fe_249', 'fe_250', 'fe_251', 'fe_252', 'fe_253', 'fe_254', 'fe_255', 'fe_256', 'fe_257', 'fe_258', 'fe_259', 'fe_260', 'fe_261', 'fe_262', 'fe_263', 'fe_264', 'fe_265', 'fe_266', 'fe_267', 'fe_268', 'fe_269', 'fe_270', 'fe_271', 'fe_272', 'fe_273', 'fe_274', 'fe_275', 'fe_276', 'fe_277', 'fe_278', 'fe_279', 'fe_280', 'fe_281', 'fe_282', 'fe_283', 'fe_284', 'fe_285', 'fe_286', 'fe_287', 'fe_288', 'fe_289', 'fe_290', 'fe_291', 'fe_292', 'fe_293', 'fe_294', 'fe_295', 'fe_296', 'fe_297', 'fe_298', 'fe_299', '339 max', '339 mean', '339 std', '68 max', '68 mean', '68 std', '215 max', '215 mean', '215 std', '503 max', '503 mean', '503 std', '251 max', '251 mean', '251 std', '316 max', '316 mean', '316 std', '314 max', '314 mean', '314 std', '418 max', '418 mean', '418 std', '284 max', '284 mean', '284 std', '289 max', '289 mean', '289 std', '415 max', '415 mean', '415 std', '765 max', '765 mean', '765 std', '165 max', '165 mean', '165 std', '556 max', '556 mean', '556 std', '391 max', '391 mean', '391 std', '535 max', '535 mean', '535 std', '385 max', '385 mean', '385 std', '147 max', '147 mean', '147 std', '25 max', '25 mean', '25 std', '526 max', '526 mean', '526 std', '40 max', '40 mean', '40 std', '199 max', '199 mean', '199 std', '583 max', '583 mean', '583 std', '273 max', '273 mean', '273 std', '169 max', '169 mean', '169 std', '15 max', '15 mean', '15 std', '592 max', '592 mean', '592 std', '268 max', '268 mean', '268 std', '437 max', '437 mean', '437 std', '315 max', '315 mean', '315 std', '893 max', '893 mean', '893 std', '493 max', '493 mean', '493 std', '465 max', '465 mean', '465 std', '1450 max', '1450 mean', '1450 std', '794 max', '794 mean', '794 std', '1819 max', '1819 mean', '1819 std', 'social_media max', 'social_media mean', 'social_media std', 'region_name_fq', 'city_name_fq', 'url_host_fq', 'region_name_un', 'city_name_un', 'cpe_type_cd_un', 'cpe_model_os_type_un', '339 sum', '68 sum', '215 sum', '503 sum', '251 sum', '316 sum', '314 sum', '418 sum', '284 sum', '289 sum', '415 sum', '765 sum', '165 sum', '556 sum', '391 sum', '535 sum', '385 sum', '147 sum', '25 sum', '526 sum', '40 sum', '199 sum', '583 sum', '273 sum', '169 sum', '15 sum', '592 sum', '268 sum', '437 sum', '315 sum', '893 sum', '493 sum', '465 sum', '1450 sum', '794 sum', '1819 sum', '0_u2', '1_u2', '2_u2', '3_u2', '4_u2', '5_u2', '6_u2', '7_u2', '8_u2', '9_u2', '10_u2', '11_u2', '12_u2', '13_u2', '14_u2', '15_u2', '16_u2', '17_u2', '18_u2', '19_u2', '20_u2', '21_u2', '22_u2', '23_u2', '24_u2', '25_u2', '26_u2', '27_u2', '28_u2', '29_u2', '30_u2', '31_u2', '32_u2', '33_u2', '34_u2', '35_u2', '36_u2', '37_u2', '38_u2', '39_u2', '40_u2', '41_u2', '42_u2', '43_u2', '44_u2', '45_u2', '46_u2', '47_u2', '48_u2', '49_u2', '50_u2', '51_u2', '52_u2', '53_u2', '54_u2', '55_u2', '56_u2', '57_u2', '58_u2', '59_u2', '60_u2', '61_u2', '62_u2', '63_u2', '64_u2', '65_u2', '66_u2', '67_u2', '68_u2', '69_u2', '70_u2', '71_u2', '72_u2', '73_u2', '74_u2', '75_u2', '76_u2', '77_u2', '78_u2', '79_u2', '80_u2', '81_u2', '82_u2', '83_u2', '84_u2', '85_u2', '86_u2', '87_u2', '88_u2', '89_u2', '90_u2', '91_u2', '92_u2', '93_u2', '94_u2', '95_u2', '96_u2', '97_u2', '98_u2', '99_u2', '100_u2', '101_u2', '102_u2', '103_u2', '104_u2', '105_u2', '106_u2', '107_u2', '108_u2', '109_u2', '110_u2', '111_u2', '112_u2', '113_u2', '114_u2', '115_u2', '116_u2', '117_u2', '118_u2', '119_u2', '120_u2', '121_u2', '122_u2', '123_u2', '124_u2', '125_u2', '126_u2', '127_u2', '128_u2', '129_u2', '130_u2', '131_u2', '132_u2', '133_u2', '134_u2', '135_u2', '136_u2', '137_u2', '138_u2', '139_u2', '140_u2', '141_u2', '142_u2', '143_u2', '144_u2', '145_u2', '146_u2', '147_u2', '148_u2', '149_u2', '150_u2', '151_u2', '152_u2', '153_u2', '154_u2', '155_u2', '156_u2', '157_u2', '158_u2', '159_u2', '160_u2', '161_u2', '162_u2', '163_u2', '164_u2', '165_u2', '166_u2', '167_u2', '168_u2', '169_u2', '170_u2', '171_u2', '172_u2', '173_u2', '174_u2', '175_u2', '176_u2', '177_u2', '178_u2', '179_u2', '180_u2', '181_u2', '182_u2', '183_u2', '184_u2', '185_u2', '186_u2', '187_u2', '188_u2', '189_u2', '190_u2', '191_u2', '192_u2', '193_u2', '194_u2', '195_u2', '196_u2', '197_u2', '198_u2', '199_u2', '200_u2', '201_u2', '202_u2', '203_u2', '204_u2', '205_u2', '206_u2', '207_u2', '208_u2', '209_u2', '210_u2', '211_u2', '212_u2', '213_u2', '214_u2', '215_u2', '216_u2', '217_u2', '218_u2', '219_u2', '220_u2', '221_u2', '222_u2', '223_u2', '224_u2', '225_u2', '226_u2', '227_u2', '228_u2', '229_u2', '230_u2', '231_u2', '232_u2', '233_u2', '234_u2', '235_u2', '236_u2', '237_u2', '238_u2', '239_u2', '240_u2', '241_u2', '242_u2', '243_u2', '244_u2', '245_u2', '246_u2', '247_u2', '248_u2', '249_u2', '250_u2', '251_u2', '252_u2', '253_u2', '254_u2', '255_u2', '256_u2', '257_u2', '258_u2', '259_u2', '260_u2', '261_u2', '262_u2', '263_u2', '264_u2', '265_u2', '266_u2', '267_u2', '268_u2', '269_u2', '270_u2', '271_u2', '272_u2', '273_u2', '274_u2', '275_u2', '276_u2', '277_u2', '278_u2', '279_u2', '280_u2', '281_u2', '282_u2', '283_u2', '284_u2', '285_u2', '286_u2', '287_u2', '288_u2', '289_u2', '290_u2', '291_u2', '292_u2', '293_u2', '294_u2', '295_u2', '296_u2', '297_u2', '298_u2', '299_u2', '300_u2', '301_u2', '302_u2', '303_u2', '304_u2', '305_u2', '306_u2', '307_u2', '308_u2', '309_u2', '310_u2', '311_u2', '312_u2', '313_u2', '314_u2', '315_u2', '316_u2', '317_u2', '318_u2', '319_u2', '320_u2', '321_u2', '322_u2', '323_u2', '324_u2', '325_u2', '326_u2', '327_u2', '328_u2', '329_u2', '330_u2', '331_u2', '332_u2', '333_u2', '334_u2', '335_u2', '336_u2', '337_u2', '338_u2', '339_u2', '340_u2', '341_u2', '342_u2', '343_u2', '344_u2', '345_u2', '346_u2', '347_u2', '348_u2', '349_u2', '350_u2', '351_u2', '352_u2', '353_u2', '354_u2', '355_u2', '356_u2', '357_u2', '358_u2', '359_u2', '360_u2', '361_u2', '362_u2', '363_u2', '364_u2', '365_u2', '366_u2', '367_u2', '368_u2', '369_u2', '370_u2', '371_u2', '372_u2', '373_u2', '374_u2', '375_u2', '376_u2', '377_u2', '378_u2', '379_u2', '380_u2', '381_u2', '382_u2', '383_u2', '384_u2', '385_u2', '386_u2', '387_u2', '388_u2', '389_u2', '390_u2', '391_u2', '392_u2', '393_u2', '394_u2', '395_u2', '396_u2', '397_u2', '398_u2', '399_u2', '400_u2', '401_u2', '402_u2', '403_u2', '404_u2', '405_u2', '406_u2', '407_u2', '408_u2', '409_u2', '410_u2', '411_u2', '412_u2', '413_u2', '414_u2', '415_u2', '416_u2', '417_u2', '418_u2', '419_u2', '420_u2', '421_u2', '422_u2', '423_u2', '424_u2', '425_u2', '426_u2', '427_u2', '428_u2', '429_u2', '430_u2', '431_u2', '432_u2', '433_u2', '434_u2', '435_u2', '436_u2', '437_u2', '438_u2', '439_u2', '440_u2', '441_u2', '442_u2', '443_u2', '444_u2', '445_u2', '446_u2', '447_u2', '448_u2', '449_u2', '450_u2', '451_u2', '452_u2', '453_u2', '454_u2', '455_u2', '456_u2', '457_u2', '458_u2', '459_u2', '460_u2', '461_u2', '462_u2', '463_u2', '464_u2', '465_u2', '466_u2', '467_u2', '468_u2', '469_u2', '470_u2', '471_u2', '472_u2', '473_u2', '474_u2', '475_u2', '476_u2', '477_u2', '478_u2', '479_u2', '480_u2', '481_u2', '482_u2', '483_u2', '484_u2', '485_u2', '486_u2', '487_u2', '488_u2', '489_u2', '490_u2', '491_u2', '492_u2', '493_u2', '494_u2', '495_u2', '496_u2', '497_u2', '498_u2', '499_u2', '0_u3', '1_u3', '2_u3', '3_u3', '4_u3', '5_u3', '6_u3', '7_u3', '8_u3', '9_u3', '10_u3', '11_u3', '12_u3', '13_u3', '14_u3', '15_u3', '16_u3', '17_u3', '18_u3', '19_u3', '20_u3', '21_u3', '22_u3', '23_u3', '24_u3', '25_u3', '26_u3', '27_u3', '28_u3', '29_u3', '30_u3', '31_u3', '32_u3', '33_u3', '34_u3', '35_u3', '36_u3', '37_u3', '38_u3', '39_u3', '40_u3', '41_u3', '42_u3', '43_u3', '44_u3', '45_u3', '46_u3', '47_u3', '48_u3', '49_u3', '50_u3', '51_u3', '52_u3', '53_u3', '54_u3', '55_u3', '56_u3', '57_u3', '58_u3', '59_u3', '60_u3', '61_u3', '62_u3', '63_u3', '64_u3', '65_u3', '66_u3', '67_u3', '68_u3', '69_u3', '70_u3', '71_u3', '72_u3', '73_u3', '74_u3', '75_u3', '76_u3', '77_u3', '78_u3', '79_u3', '80_u3', '81_u3', '82_u3', '83_u3', '84_u3', '85_u3', '86_u3', '87_u3', '88_u3', '89_u3', '90_u3', '91_u3', '92_u3', '93_u3', '94_u3', '95_u3', '96_u3', '97_u3', '98_u3', '99_u3', '100_u3', '101_u3', '102_u3', '103_u3', '104_u3', '105_u3', '106_u3', '107_u3', '108_u3', '109_u3', '110_u3', '111_u3', '112_u3', '113_u3', '114_u3', '115_u3', '116_u3', '117_u3', '118_u3', '119_u3', '120_u3', '121_u3', '122_u3', '123_u3', '124_u3', '125_u3', '126_u3', '127_u3', '128_u3', '129_u3', '130_u3', '131_u3', '132_u3', '133_u3', '134_u3', '135_u3', '136_u3', '137_u3', '138_u3', '139_u3', '140_u3', '141_u3', '142_u3', '143_u3', '144_u3', '145_u3', '146_u3', '147_u3', '148_u3', '149_u3', '150_u3', '151_u3', '152_u3', '153_u3', '154_u3', '155_u3', '156_u3', '157_u3', '158_u3', '159_u3', '160_u3', '161_u3', '162_u3', '163_u3', '164_u3', '165_u3', '166_u3', '167_u3', '168_u3', '169_u3', '170_u3', '171_u3', '172_u3', '173_u3', '174_u3', '175_u3', '176_u3', '177_u3', '178_u3', '179_u3', '180_u3', '181_u3', '182_u3', '183_u3', '184_u3', '185_u3', '186_u3', '187_u3', '188_u3', '189_u3', '190_u3', '191_u3', '192_u3', '193_u3', '194_u3', '195_u3', '196_u3', '197_u3', '198_u3', '199_u3', '200_u3', '201_u3', '202_u3', '203_u3', '204_u3', '205_u3', '206_u3', '207_u3', '208_u3', '209_u3', '210_u3', '211_u3', '212_u3', '213_u3', '214_u3', '215_u3', '216_u3', '217_u3', '218_u3', '219_u3', '220_u3', '221_u3', '222_u3', '223_u3', '224_u3', '225_u3', '226_u3', '227_u3', '228_u3', '229_u3', '230_u3', '231_u3', '232_u3', '233_u3', '234_u3', '235_u3', '236_u3', '237_u3', '238_u3', '239_u3', '240_u3', '241_u3', '242_u3', '243_u3', '244_u3', '245_u3', '246_u3', '247_u3', '248_u3', '249_u3', '250_u3', '251_u3', '252_u3', '253_u3', '254_u3', '255_u3', '256_u3', '257_u3', '258_u3', '259_u3', '260_u3', '261_u3', '262_u3', '263_u3', '264_u3', '265_u3', '266_u3', '267_u3', '268_u3', '269_u3', '270_u3', '271_u3', '272_u3', '273_u3', '274_u3', '275_u3', '276_u3', '277_u3', '278_u3', '279_u3', '280_u3', '281_u3', '282_u3', '283_u3', '284_u3', '285_u3', '286_u3', '287_u3', '288_u3', '289_u3', '290_u3', '291_u3', '292_u3', '293_u3', '294_u3', '295_u3', '296_u3', '297_u3', '298_u3', '299_u3', '300_u3', '301_u3', '302_u3', '303_u3', '304_u3', '305_u3', '306_u3', '307_u3', '308_u3', '309_u3', '310_u3', '311_u3', '312_u3', '313_u3', '314_u3', '315_u3', '316_u3', '317_u3', '318_u3', '319_u3', '320_u3', '321_u3', '322_u3', '323_u3', '324_u3', '325_u3', '326_u3', '327_u3', '328_u3', '329_u3', '330_u3', '331_u3', '332_u3', '333_u3', '334_u3', '335_u3', '336_u3', '337_u3', '338_u3', '339_u3', '340_u3', '341_u3', '342_u3', '343_u3', '344_u3', '345_u3', '346_u3', '347_u3', '348_u3', '349_u3', '350_u3', '351_u3', '352_u3', '353_u3', '354_u3', '355_u3', '356_u3', '357_u3', '358_u3', '359_u3', '360_u3', '361_u3', '362_u3', '363_u3', '364_u3', '365_u3', '366_u3', '367_u3', '368_u3', '369_u3', '370_u3', '371_u3', '372_u3', '373_u3', '374_u3', '375_u3', '376_u3', '377_u3', '378_u3', '379_u3', '380_u3', '381_u3', '382_u3', '383_u3', '384_u3', '385_u3', '386_u3', '387_u3', '388_u3', '389_u3', '390_u3', '391_u3', '392_u3', '393_u3', '394_u3', '395_u3', '396_u3', '397_u3', '398_u3', '399_u3', '400_u3', '401_u3', '402_u3', '403_u3', '404_u3', '405_u3', '406_u3', '407_u3', '408_u3', '409_u3', '410_u3', '411_u3', '412_u3', '413_u3', '414_u3', '415_u3', '416_u3', '417_u3', '418_u3', '419_u3', '420_u3', '421_u3', '422_u3', '423_u3', '424_u3', '425_u3', '426_u3', '427_u3', '428_u3', '429_u3', '430_u3', '431_u3', '432_u3', '433_u3', '434_u3', '435_u3', '436_u3', '437_u3', '438_u3', '439_u3', '440_u3', '441_u3', '442_u3', '443_u3', '444_u3', '445_u3', '446_u3', '447_u3', '448_u3', '449_u3', '450_u3', '451_u3', '452_u3', '453_u3', '454_u3', '455_u3', '456_u3', '457_u3', '458_u3', '459_u3', '460_u3', '461_u3', '462_u3', '463_u3', '464_u3', '465_u3', '466_u3', '467_u3', '468_u3', '469_u3', '470_u3', '471_u3', '472_u3', '473_u3', '474_u3', '475_u3', '476_u3', '477_u3', '478_u3', '479_u3', '480_u3', '481_u3', '482_u3', '483_u3', '484_u3', '485_u3', '486_u3', '487_u3', '488_u3', '489_u3', '490_u3', '491_u3', '492_u3', '493_u3', '494_u3', '495_u3', '496_u3', '497_u3', '498_u3', '499_u3', '500_u3', '501_u3', '502_u3', '503_u3', '504_u3', '505_u3', '506_u3', '507_u3', '508_u3', '509_u3', '510_u3', '511_u3', '512_u3', '513_u3', '514_u3', '515_u3', '516_u3', '517_u3', '518_u3', '519_u3', '520_u3', '521_u3', '522_u3', '523_u3', '524_u3', '525_u3', '526_u3', '527_u3', '528_u3', '529_u3', '530_u3', '531_u3', '532_u3', '533_u3', '534_u3', '535_u3', '536_u3', '537_u3', '538_u3', '539_u3', '540_u3', '541_u3', '542_u3', '543_u3', '544_u3', '545_u3', '546_u3', '547_u3', '548_u3', '549_u3', '550_u3', '551_u3', '552_u3', '553_u3', '554_u3', '555_u3', '556_u3', '557_u3', '558_u3', '559_u3', '560_u3', '561_u3', '562_u3', '563_u3', '564_u3', '565_u3', '566_u3', '567_u3', '568_u3', '569_u3', '570_u3', '571_u3', '572_u3', '573_u3', '574_u3', '575_u3', '576_u3', '577_u3', '578_u3', '579_u3', '580_u3', '581_u3', '582_u3', '583_u3', '584_u3', '585_u3', '586_u3', '587_u3', '588_u3', '589_u3', '590_u3', '591_u3', '592_u3', '593_u3', '594_u3', '595_u3', '596_u3', '597_u3', '598_u3', '599_u3', '600_u3', '601_u3', '602_u3', '603_u3', '604_u3', '605_u3', '606_u3', '607_u3', '608_u3', '609_u3', '610_u3', '611_u3', '612_u3', '613_u3', '614_u3', '615_u3', '616_u3', '617_u3', '618_u3', '619_u3', '620_u3', '621_u3', '622_u3', '623_u3', '624_u3', '625_u3', '626_u3', '627_u3', '628_u3', '629_u3', '630_u3', '631_u3', '632_u3', '633_u3', '634_u3', '635_u3', '636_u3', '637_u3', '638_u3', '639_u3', '640_u3', '641_u3', '642_u3', '643_u3', '644_u3', '645_u3', '646_u3', '647_u3', '648_u3', '649_u3', '650_u3', '651_u3', '652_u3', '653_u3', '654_u3', '655_u3', '656_u3', '657_u3', '658_u3', '659_u3', '660_u3', '661_u3', '662_u3', '663_u3', '664_u3', '665_u3', '666_u3', '667_u3', '668_u3', '669_u3', '670_u3', '671_u3', '672_u3', '673_u3', '674_u3', '675_u3', '676_u3', '677_u3', '678_u3', '679_u3', '680_u3', '681_u3', '682_u3', '683_u3', '684_u3', '685_u3', '686_u3', '687_u3', '688_u3', '689_u3', '690_u3', '691_u3', '692_u3', '693_u3', '694_u3', '695_u3', '696_u3', '697_u3', '698_u3', '699_u3', '700_u3', '701_u3', '702_u3', '703_u3', '704_u3', '705_u3', '706_u3', '707_u3', '708_u3', '709_u3', '710_u3', '711_u3', '712_u3', '713_u3', '714_u3', '715_u3', '716_u3', '717_u3', '718_u3', '719_u3', '720_u3', '721_u3', '722_u3', '723_u3', '724_u3', '725_u3', '726_u3', '727_u3', '728_u3', '729_u3', '730_u3', '731_u3', '732_u3', '733_u3', '734_u3', '735_u3', '736_u3', '737_u3', '738_u3', '739_u3', '740_u3', '741_u3', '742_u3', '743_u3', '744_u3', '745_u3', '746_u3', '747_u3', '748_u3', '749_u3', '750_u3', '751_u3', '752_u3', '753_u3', '754_u3', '755_u3', '756_u3', '757_u3', '758_u3', '759_u3', '760_u3', '761_u3', '762_u3', '763_u3', '764_u3', '765_u3', '766_u3', '767_u3', '768_u3', '769_u3', '770_u3', '771_u3', '772_u3', '773_u3', '774_u3', '775_u3', '776_u3', '777_u3', '778_u3', '779_u3', '780_u3', '781_u3', '782_u3', '783_u3', '784_u3', '785_u3', '786_u3', '787_u3', '788_u3', '789_u3', '790_u3', '791_u3', '792_u3', '793_u3', '794_u3', '795_u3', '796_u3', '797_u3', '798_u3', '799_u3', '800_u3', '801_u3', '802_u3', '803_u3', '804_u3', '805_u3', '806_u3', '807_u3', '808_u3', '809_u3', '810_u3', '811_u3', '812_u3', '813_u3', '814_u3', '815_u3', '816_u3', '817_u3', '818_u3', '819_u3', '820_u3', '821_u3', '822_u3', '823_u3', '824_u3', '825_u3', '826_u3', '827_u3', '828_u3', '829_u3', '830_u3', '831_u3', '832_u3', '833_u3', '834_u3', '835_u3', '836_u3', '837_u3', '838_u3', '839_u3', '840_u3', '841_u3', '842_u3', '843_u3', '844_u3', '845_u3', '846_u3', '847_u3', '848_u3', '849_u3', '850_u3', '851_u3', '852_u3', '853_u3', '854_u3', '855_u3', '856_u3', '857_u3', '858_u3', '859_u3', '860_u3', '861_u3', '862_u3', '863_u3', '864_u3', '865_u3', '866_u3', '867_u3', '868_u3', '869_u3', '870_u3', '871_u3', '872_u3', '873_u3', '874_u3', '875_u3', '876_u3', '877_u3', '878_u3', '879_u3', '880_u3', '881_u3', '882_u3', '883_u3', '884_u3', '885_u3', '886_u3', '887_u3', '888_u3', '889_u3', '890_u3', '891_u3', '892_u3', '893_u3', '894_u3', '895_u3', '896_u3', '897_u3', '898_u3', '899_u3', '900_u3', '901_u3', '902_u3', '903_u3', '904_u3', '905_u3', '906_u3', '907_u3', '908_u3', '909_u3', '910_u3', '911_u3', '912_u3', '913_u3', '914_u3', '915_u3', '916_u3', '917_u3', '918_u3', '919_u3', '920_u3', '921_u3', '922_u3', '923_u3', '924_u3', '925_u3', '926_u3', '927_u3', '928_u3', '929_u3', '930_u3', '931_u3', '932_u3', '933_u3', '934_u3', '935_u3', '936_u3', '937_u3', '938_u3', '939_u3', '940_u3', '941_u3', '942_u3', '943_u3', '944_u3', '945_u3', '946_u3', '947_u3', '948_u3', '949_u3', '950_u3', '951_u3', '952_u3', '953_u3', '954_u3', '955_u3', '956_u3', '957_u3', '958_u3', '959_u3', '960_u3', '961_u3', '962_u3', '963_u3', '964_u3', '965_u3', '966_u3', '967_u3', '968_u3', '969_u3', '970_u3', '971_u3', '972_u3', '973_u3', '974_u3', '975_u3', '976_u3', '977_u3', '978_u3', '979_u3', '980_u3', '981_u3', '982_u3', '983_u3', '984_u3', '985_u3', '986_u3', '987_u3', '988_u3', '989_u3', '990_u3', '991_u3', '992_u3', '993_u3', '994_u3', '995_u3', '996_u3', '997_u3', '998_u3', '999_u3', '1000_u3', '1001_u3', '1002_u3', '1003_u3', '1004_u3', '1005_u3', '1006_u3', '1007_u3', '1008_u3', '1009_u3', '1010_u3', '1011_u3', '1012_u3', '1013_u3', '1014_u3', '1015_u3', '1016_u3', '1017_u3', '1018_u3', '1019_u3', '1020_u3', '1021_u3', '1022_u3', '1023_u3', '1024_u3', '1025_u3', '1026_u3', '1027_u3', '1028_u3', '1029_u3', '1030_u3', '1031_u3', '1032_u3', '1033_u3', '1034_u3', '1035_u3', '1036_u3', '1037_u3', '1038_u3', '1039_u3', '1040_u3', '1041_u3', '1042_u3', '1043_u3', '1044_u3', '1045_u3', '1046_u3', '1047_u3', '1048_u3', '1049_u3', '1050_u3', '1051_u3', '1052_u3', '1053_u3', '1054_u3', '1055_u3', '1056_u3', '1057_u3', '1058_u3', '1059_u3', '1060_u3', '1061_u3', '1062_u3', '1063_u3', '1064_u3', '1065_u3', '1066_u3', '1067_u3', '1068_u3', '1069_u3', '1070_u3', '1071_u3', '1072_u3', '1073_u3', '1074_u3', '1075_u3', '1076_u3', '1077_u3', '1078_u3', '1079_u3', '1080_u3', '1081_u3', '1082_u3', '1083_u3', '1084_u3', '1085_u3', '1086_u3', '1087_u3', '1088_u3', '1089_u3', '1090_u3', '1091_u3', '1092_u3', '1093_u3', '1094_u3', '1095_u3', '1096_u3', '1097_u3', '1098_u3', '1099_u3', '1100_u3', '1101_u3', '1102_u3', '1103_u3', '1104_u3', '1105_u3', '1106_u3', '1107_u3', '1108_u3', '1109_u3', '1110_u3', '1111_u3', '1112_u3', '1113_u3', '1114_u3', '1115_u3', '1116_u3', '1117_u3', '1118_u3', '1119_u3', '1120_u3', '1121_u3', '1122_u3', '1123_u3', '1124_u3', '1125_u3', '1126_u3', '1127_u3', '1128_u3', '1129_u3', '1130_u3', '1131_u3', '1132_u3', '1133_u3', '1134_u3', '1135_u3', '1136_u3', '1137_u3', '1138_u3', '1139_u3', '1140_u3', '1141_u3', '1142_u3', '1143_u3', '1144_u3', '1145_u3', '1146_u3', '1147_u3', '1148_u3', '1149_u3', '1150_u3', '1151_u3', '1152_u3', '1153_u3', '1154_u3', '1155_u3', '1156_u3', '1157_u3', '1158_u3', '1159_u3', '1160_u3', '1161_u3', '1162_u3', '1163_u3', '1164_u3', '1165_u3', '1166_u3', '1167_u3', '1168_u3', '1169_u3', '1170_u3', '1171_u3', '1172_u3', '1173_u3', '1174_u3', '1175_u3', '1176_u3', '1177_u3', '1178_u3', '1179_u3', '1180_u3', '1181_u3', '1182_u3', '1183_u3', '1184_u3', '1185_u3', '1186_u3', '1187_u3', '1188_u3', '1189_u3', '1190_u3', '1191_u3', '1192_u3', '1193_u3', '1194_u3', '1195_u3', '1196_u3', '1197_u3', '1198_u3', '1199_u3', '1200_u3', '1201_u3', '1202_u3', '1203_u3', '1204_u3', '1205_u3', '1206_u3', '1207_u3', '1208_u3', '1209_u3', '1210_u3', '1211_u3', '1212_u3', '1213_u3', '1214_u3', '1215_u3', '1216_u3', '1217_u3', '1218_u3', '1219_u3', '1220_u3', '1221_u3', '1222_u3', '1223_u3', '1224_u3', '1225_u3', '1226_u3', '1227_u3', '1228_u3', '1229_u3', '1230_u3', '1231_u3', '1232_u3', '1233_u3', '1234_u3', '1235_u3', '1236_u3', '1237_u3', '1238_u3', '1239_u3', '1240_u3', '1241_u3', '1242_u3', '1243_u3', '1244_u3', '1245_u3', '1246_u3', '1247_u3', '1248_u3', '1249_u3', '1250_u3', '1251_u3', '1252_u3', '1253_u3', '1254_u3', '1255_u3', '1256_u3', '1257_u3', '1258_u3', '1259_u3', '1260_u3', '1261_u3', '1262_u3', '1263_u3', '1264_u3', '1265_u3', '1266_u3', '1267_u3', '1268_u3', '1269_u3', '1270_u3', '1271_u3', '1272_u3', '1273_u3', '1274_u3', '1275_u3', '1276_u3', '1277_u3', '1278_u3', '1279_u3', '1280_u3', '1281_u3', '1282_u3', '1283_u3', '1284_u3', '1285_u3', '1286_u3', '1287_u3', '1288_u3', '1289_u3', '1290_u3', '1291_u3', '1292_u3', '1293_u3', '1294_u3', '1295_u3', '1296_u3', '1297_u3', '1298_u3', '1299_u3', '1300_u3', '1301_u3', '1302_u3', '1303_u3', '1304_u3', '1305_u3', '1306_u3', '1307_u3', '1308_u3', '1309_u3', '1310_u3', '1311_u3', '1312_u3', '1313_u3', '1314_u3', '1315_u3', '1316_u3', '1317_u3', '1318_u3', '1319_u3', '1320_u3', '1321_u3', '1322_u3', '1323_u3', '1324_u3', '1325_u3', '1326_u3', '1327_u3', '1328_u3', '1329_u3', '1330_u3', '1331_u3', '1332_u3', '1333_u3', '1334_u3', '1335_u3', '1336_u3', '1337_u3', '1338_u3', '1339_u3', '1340_u3', '1341_u3', '1342_u3', '1343_u3', '1344_u3', '1345_u3', '1346_u3', '1347_u3', '1348_u3', '1349_u3', '1350_u3', '1351_u3', '1352_u3', '1353_u3', '1354_u3', '1355_u3', '1356_u3', '1357_u3', '1358_u3', '1359_u3', '1360_u3', '1361_u3', '1362_u3', '1363_u3', '1364_u3', '1365_u3', '1366_u3', '1367_u3', '1368_u3', '1369_u3', '1370_u3', '1371_u3', '1372_u3', '1373_u3', '1374_u3', '1375_u3', '1376_u3', '1377_u3', '1378_u3', '1379_u3', '1380_u3', '1381_u3', '1382_u3', '1383_u3', '1384_u3', '1385_u3', '1386_u3', '1387_u3', '1388_u3', '1389_u3', '1390_u3', '1391_u3', '1392_u3', '1393_u3', '1394_u3', '1395_u3', '1396_u3', '1397_u3', '1398_u3', '1399_u3', '1400_u3', '1401_u3', '1402_u3', '1403_u3', '1404_u3', '1405_u3', '1406_u3', '1407_u3', '1408_u3', '1409_u3', '1410_u3', '1411_u3', '1412_u3', '1413_u3', '1414_u3', '1415_u3', '1416_u3', '1417_u3', '1418_u3', '1419_u3', '1420_u3', '1421_u3', '1422_u3', '1423_u3', '1424_u3', '1425_u3', '1426_u3', '1427_u3', '1428_u3', '1429_u3', '1430_u3', '1431_u3', '1432_u3', '1433_u3', '1434_u3', '1435_u3', '1436_u3', '1437_u3', '1438_u3', '1439_u3', '1440_u3', '1441_u3', '1442_u3', '1443_u3', '1444_u3', '1445_u3', '1446_u3', '1447_u3', '1448_u3', '1449_u3', '1450_u3', '1451_u3', '1452_u3', '1453_u3', '1454_u3', '1455_u3', '1456_u3', '1457_u3', '1458_u3', '1459_u3', '1460_u3', '1461_u3', '1462_u3', '1463_u3', '1464_u3', '1465_u3', '1466_u3', '1467_u3', '1468_u3', '1469_u3', '1470_u3', '1471_u3', '1472_u3', '1473_u3', '1474_u3', '1475_u3', '1476_u3', '1477_u3', '1478_u3', '1479_u3', '1480_u3', '1481_u3', '1482_u3', '1483_u3', '1484_u3', '1485_u3', '1486_u3', '1487_u3', '1488_u3', '1489_u3', '1490_u3', '1491_u3', '1492_u3', '1493_u3', '1494_u3', '1495_u3', '1496_u3', '1497_u3', '1498_u3', '1499_u3','als_emb_url_host_rc_0', 'als_emb_url_host_rc_1', 'als_emb_url_host_rc_2', 'als_emb_url_host_rc_3', 'als_emb_url_host_rc_4', 'als_emb_url_host_rc_5', 'als_emb_url_host_rc_6', 'als_emb_url_host_rc_7', 'als_emb_url_host_rc_8', 'als_emb_url_host_rc_9', 'als_emb_url_host_rc_10', 'als_emb_url_host_rc_11', 'als_emb_url_host_rc_12', 'als_emb_url_host_rc_13', 'als_emb_url_host_rc_14', 'als_emb_url_host_rc_15', 'als_emb_url_host_rc_16', 'als_emb_url_host_rc_17', 'als_emb_url_host_rc_18', 'als_emb_url_host_rc_19', 'als_emb_url_host_rc_20', 'als_emb_url_host_rc_21', 'als_emb_url_host_rc_22', 'als_emb_url_host_rc_23', 'als_emb_url_host_rc_24', 'als_emb_url_host_rc_25', 'als_emb_url_host_rc_26', 'als_emb_url_host_rc_27', 'als_emb_url_host_rc_28', 'als_emb_url_host_rc_29', 'als_emb_url_host_rc_30', 'als_emb_url_host_rc_31', 'als_emb_url_host_wh0', 'als_emb_url_host_wh1', 'als_emb_url_host_wh2', 'als_emb_url_host_wh3', 'als_emb_url_host_wh4', 'als_emb_url_host_wh5', 'als_emb_url_host_wh6', 'als_emb_url_host_wh7', 'als_emb_url_host_wh8', 'als_emb_url_host_wh9', 'als_emb_url_host_wh10', 'als_emb_url_host_wh11', 'als_emb_url_host_wh12', 'als_emb_url_host_wh13', 'als_emb_url_host_wh14', 'als_emb_url_host_wh15', 'als_emb_url_host_wh16', 'als_emb_url_host_wh17', 'als_emb_url_host_wh18', 'als_emb_url_host_wh19', 'als_emb_url_host_wh20', 'als_emb_url_host_wh21', 'als_emb_url_host_wh22', 'als_emb_url_host_wh23', 'als_emb_url_host_wh24', 'als_emb_url_host_wh25', 'als_emb_url_host_wh26', 'als_emb_url_host_wh27', 'als_emb_url_host_wh28', 'als_emb_url_host_wh29', 'als_emb_url_host_wh30', 'als_emb_url_host_wh31', 'als_emb_url_host_wh32', 'als_emb_url_host_wh33', 'als_emb_url_host_wh34', 'als_emb_url_host_wh35', 'als_emb_url_host_wh36', 'als_emb_url_host_wh37', 'als_emb_url_host_wh38', 'als_emb_url_host_wh39', 'als_emb_url_host_wh40', 'als_emb_url_host_wh41', 'als_emb_url_host_wh42', 'als_emb_url_host_wh43', 'als_emb_url_host_wh44', 'als_emb_url_host_wh45', 'als_emb_url_host_wh46', 'als_emb_url_host_wh47', 'als_emb_url_host_wh48', 'als_emb_url_host_wh49', 'als_emb_url_host_wh50', 'als_emb_url_host_wh51', 'als_emb_url_host_wh52', 'als_emb_url_host_wh53', 'als_emb_url_host_wh54', 'als_emb_url_host_wh55', 'als_emb_url_host_wh56', 'als_emb_url_host_wh57', 'als_emb_url_host_wh58', 'als_emb_url_host_wh59', 'als_emb_url_host_wh60', 'als_emb_url_host_wh61', 'als_emb_url_host_wh62', 'als_emb_url_host_wh63', 'als_emb_url_host_um0', 'als_emb_url_host_um1', 'als_emb_url_host_um2', 'als_emb_url_host_um3', 'als_emb_url_host_um4', 'als_emb_url_host_um5', 'als_emb_url_host_um6', 'als_emb_url_host_um7', 'als_emb_url_host_um8', 'als_emb_url_host_um9', 'als_emb_url_host_um10', 'als_emb_url_host_um11', 'als_emb_url_host_um12', 'als_emb_url_host_um13', 'als_emb_url_host_um14', 'als_emb_url_host_um15', 'als_emb_url_host_um16', 'als_emb_url_host_um17', 'als_emb_url_host_um18', 'als_emb_url_host_um19', 'als_emb_url_host_um20', 'als_emb_url_host_um21', 'als_emb_url_host_um22', 'als_emb_url_host_um23', 'als_emb_url_host_um24', 'als_emb_url_host_um25', 'als_emb_url_host_um26', 'als_emb_url_host_um27', 'als_emb_url_host_um28', 'als_emb_url_host_um29', 'als_emb_url_host_um30', 'als_emb_url_host_um31', 'als_emb_url_host_um32', 'als_emb_url_host_um33', 'als_emb_url_host_um34', 'als_emb_url_host_um35', 'als_emb_url_host_um36', 'als_emb_url_host_um37', 'als_emb_url_host_um38', 'als_emb_url_host_um39', 'als_emb_url_host_um40', 'als_emb_url_host_um41', 'als_emb_url_host_um42', 'als_emb_url_host_um43', 'als_emb_url_host_um44', 'als_emb_url_host_um45', 'als_emb_url_host_um46', 'als_emb_url_host_um47', 'als_emb_url_host_um48', 'als_emb_url_host_um49', 'als_emb_url_host_um50', 'als_emb_url_host_um51', 'als_emb_url_host_um52', 'als_emb_url_host_um53', 'als_emb_url_host_um54', 'als_emb_url_host_um55', 'als_emb_url_host_um56', 'als_emb_url_host_um57', 'als_emb_url_host_um58', 'als_emb_url_host_um59', 'als_emb_url_host_um60', 'als_emb_url_host_um61', 'als_emb_url_host_um62', 'als_emb_url_host_um63', 'als_emb_url_host_um64', 'als_emb_url_host_um65', 'als_emb_url_host_um66', 'als_emb_url_host_um67', 'als_emb_url_host_um68', 'als_emb_url_host_um69', 'als_emb_url_host_um70', 'als_emb_url_host_um71', 'als_emb_url_host_um72', 'als_emb_url_host_um73', 'als_emb_url_host_um74', 'als_emb_url_host_um75', 'als_emb_url_host_um76', 'als_emb_url_host_um77', 'als_emb_url_host_um78', 'als_emb_url_host_um79', 'als_emb_url_host_um80', 'als_emb_url_host_um81', 'als_emb_url_host_um82', 'als_emb_url_host_um83', 'als_emb_url_host_um84', 'als_emb_url_host_um85', 'als_emb_url_host_um86', 'als_emb_url_host_um87', 'als_emb_url_host_um88', 'als_emb_url_host_um89', 'als_emb_url_host_um90', 'als_emb_url_host_um91', 'als_emb_url_host_um92', 'als_emb_url_host_um93', 'als_emb_url_host_um94', 'als_emb_url_host_um95', 'als_emb_url_host_um96', 'als_emb_url_host_um97', 'als_emb_url_host_um98', 'als_emb_url_host_um99', 'als_emb_url_host_um100', 'als_emb_url_host_um101', 'als_emb_url_host_um102', 'als_emb_url_host_um103', 'als_emb_url_host_um104', 'als_emb_url_host_um105', 'als_emb_url_host_um106', 'als_emb_url_host_um107', 'als_emb_url_host_um108', 'als_emb_url_host_um109', 'als_emb_url_host_um110', 'als_emb_url_host_um111', 'als_emb_url_host_um112', 'als_emb_url_host_um113', 'als_emb_url_host_um114', 'als_emb_url_host_um115', 'als_emb_url_host_um116', 'als_emb_url_host_um117', 'als_emb_url_host_um118', 'als_emb_url_host_um119', 'als_emb_url_host_um120', 'als_emb_url_host_um121', 'als_emb_url_host_um122', 'als_emb_url_host_um123', 'als_emb_url_host_um124', 'als_emb_url_host_um125', 'als_emb_url_host_um126', 'als_emb_url_host_um127', 'als_emb_url_host_dc_0', 'als_emb_url_host_dc_1', 'als_emb_url_host_dc_2', 'als_emb_url_host_dc_3', 'als_emb_url_host_dc_4', 'als_emb_url_host_dc_5', 'als_emb_url_host_dc_6', 'als_emb_url_host_dc_7', 'als_emb_url_host_dc_8', 'als_emb_url_host_dc_9', 'als_emb_url_host_dc_10', 'als_emb_url_host_dc_11', 'als_emb_url_host_dc_12', 'als_emb_url_host_dc_13', 'als_emb_url_host_dc_14', 'als_emb_url_host_dc_15', 'als_emb_url_host_dc_16', 'als_emb_url_host_dc_17', 'als_emb_url_host_dc_18', 'als_emb_url_host_dc_19', 'als_emb_url_host_dc_20', 'als_emb_url_host_dc_21', 'als_emb_url_host_dc_22', 'als_emb_url_host_dc_23', 'als_emb_url_host_dc_24', 'als_emb_url_host_dc_25', 'als_emb_url_host_dc_26', 'als_emb_url_host_dc_27', 'als_emb_url_host_dc_28', 'als_emb_url_host_dc_29', 'als_emb_url_host_dc_30', 'als_emb_url_host_dc_31', 'als_emb_url_host_dc_32', 'als_emb_url_host_dc_33', 'als_emb_url_host_dc_34', 'als_emb_url_host_dc_35', 'als_emb_url_host_dc_36', 'als_emb_url_host_dc_37', 'als_emb_url_host_dc_38', 'als_emb_url_host_dc_39', 'als_emb_url_host_dc_40', 'als_emb_url_host_dc_41', 'als_emb_url_host_dc_42', 'als_emb_url_host_dc_43', 'als_emb_url_host_dc_44', 'als_emb_url_host_dc_45', 'als_emb_url_host_dc_46', 'als_emb_url_host_dc_47', 'als_emb_url_host_dc_48', 'als_emb_url_host_dc_49', 'als_emb_url_host_dc_50', 'als_emb_url_host_dc_51', 'als_emb_url_host_dc_52', 'als_emb_url_host_dc_53', 'als_emb_url_host_dc_54', 'als_emb_url_host_dc_55', 'als_emb_url_host_dc_56', 'als_emb_url_host_dc_57', 'als_emb_url_host_dc_58', 'als_emb_url_host_dc_59', 'als_emb_url_host_dc_60', 'als_emb_url_host_dc_61', 'als_emb_url_host_dc_62', 'als_emb_url_host_dc_63', 'url_host_dc_clusters_age', 'url_host_dc_clusters_ismale']

with open('cold_users.pickle', 'rb') as handle:
    cold_users = pickle.load(handle)

if cold:
    train = train.drop(columns=warm_features)
    test = test.drop(columns=warm_features)
else:
    train = train[~train.user_id.isin(cold_users)]
    
print(len(train))

263065


In [7]:
train = train[~train['is_male'].isna() & (train['is_male'].apply(lambda x: x != 'NA'))]
train['is_male'] = train['is_male'].astype('int')
print(len(train))

257535


In [8]:
from torch import nn
from torch.autograd import Function
import torch.nn.functional as F
import torch
import torch
import torch.nn as nn
import numpy as np
import torch.nn.functional as F
from scipy.special import softmax
from torch.utils.data import DataLoader
from torch.nn.functional import cross_entropy, mse_loss


def _make_ix_like(input, dim=0):
    d = input.size(dim)
    rho = torch.arange(1, d + 1, device=input.device, dtype=input.dtype)
    view = [1] * input.dim()
    view[0] = -1
    return rho.view(view).transpose(0, dim)


class SparsemaxFunction(Function):
    @staticmethod
    def forward(ctx, input, dim=-1):
        ctx.dim = dim
        max_val, _ = input.max(dim=dim, keepdim=True)
        input -= max_val  # same numerical stability trick as for softmax
        tau, supp_size = SparsemaxFunction._threshold_and_support(input, dim=dim)
        output = torch.clamp(input - tau, min=0)
        ctx.save_for_backward(supp_size, output)
        return output

    @staticmethod
    def backward(ctx, grad_output):
        supp_size, output = ctx.saved_tensors
        dim = ctx.dim
        grad_input = grad_output.clone()
        grad_input[output == 0] = 0

        v_hat = grad_input.sum(dim=dim) / supp_size.to(output.dtype).squeeze()
        v_hat = v_hat.unsqueeze(dim)
        grad_input = torch.where(output != 0, grad_input - v_hat, grad_input)
        return grad_input, None

    @staticmethod
    def _threshold_and_support(input, dim=-1):
        input_srt, _ = torch.sort(input, descending=True, dim=dim)
        input_cumsum = input_srt.cumsum(dim) - 1
        rhos = _make_ix_like(input, dim)
        support = rhos * input_srt > input_cumsum

        support_size = support.sum(dim=dim).unsqueeze(dim)
        tau = input_cumsum.gather(dim, support_size - 1)
        tau /= support_size.to(input.dtype)
        return tau, support_size


sparsemax = SparsemaxFunction.apply


class Sparsemax(nn.Module):
    def __init__(self, dim=-1):
        self.dim = dim
        super(Sparsemax, self).__init__()

    def forward(self, input):
        return sparsemax(input, self.dim)


class Entmax15Function(Function):
    @staticmethod
    def forward(ctx, input, dim=-1):
        ctx.dim = dim

        max_val, _ = input.max(dim=dim, keepdim=True)
        input = input - max_val  # same numerical stability trick as for softmax
        input = input / 2  # divide by 2 to solve actual Entmax

        tau_star, _ = Entmax15Function._threshold_and_support(input, dim)
        output = torch.clamp(input - tau_star, min=0) ** 2
        ctx.save_for_backward(output)
        return output

    @staticmethod
    def backward(ctx, grad_output):
        Y, = ctx.saved_tensors
        gppr = Y.sqrt()  # = 1 / g'' (Y)
        dX = grad_output * gppr
        q = dX.sum(ctx.dim) / gppr.sum(ctx.dim)
        q = q.unsqueeze(ctx.dim)
        dX -= q * gppr
        return dX, None

    @staticmethod
    def _threshold_and_support(input, dim=-1):
        Xsrt, _ = torch.sort(input, descending=True, dim=dim)

        rho = _make_ix_like(input, dim)
        mean = Xsrt.cumsum(dim) / rho
        mean_sq = (Xsrt ** 2).cumsum(dim) / rho
        ss = rho * (mean_sq - mean ** 2)
        delta = (1 - ss) / rho

        # NOTE this is not exactly the same as in reference algo
        # Fortunately it seems the clamped values never wrongly
        # get selected by tau <= sorted_z. Prove this!
        delta_nz = torch.clamp(delta, 0)
        tau = mean - torch.sqrt(delta_nz)

        support_size = (tau <= Xsrt).sum(dim).unsqueeze(dim)
        tau_star = tau.gather(dim, support_size - 1)
        return tau_star, support_size


class Entmoid15(Function):
    """ A highly optimized equivalent of lambda x: Entmax15([x, 0]) """

    @staticmethod
    def forward(ctx, input):
        output = Entmoid15._forward(input)
        ctx.save_for_backward(output)
        return output

    @staticmethod
    def _forward(input):
        input, is_pos = abs(input), input >= 0
        tau = (input + torch.sqrt(F.relu(8 - input ** 2))) / 2
        tau.masked_fill_(tau <= input, 2.0)
        y_neg = 0.25 * F.relu(tau - input, inplace=True) ** 2
        return torch.where(is_pos, 1 - y_neg, y_neg)

    @staticmethod
    def backward(ctx, grad_output):
        return Entmoid15._backward(ctx.saved_tensors[0], grad_output)

    @staticmethod
    def _backward(output, grad_output):
        gppr0, gppr1 = output.sqrt(), (1 - output).sqrt()
        grad_input = grad_output * gppr0
        q = grad_input / (gppr0 + gppr1)
        grad_input -= q * gppr0
        return grad_input


entmax15 = Entmax15Function.apply
entmoid15 = Entmoid15.apply


class Entmax15(nn.Module):
    def __init__(self, dim=-1):
        self.dim = dim
        super(Entmax15, self).__init__()

    def forward(self, input):
        return entmax15(input, self.dim)
    
    
def initialize_glu(module, input_dim, output_dim):
    gain_value = np.sqrt((input_dim + output_dim) / np.sqrt(input_dim))
    torch.nn.init.xavier_normal_(module.weight, gain=gain_value)
    return

class GBN(torch.nn.Module):
    def __init__(self, input_dim, virtual_batch_size=512):
        super(GBN, self).__init__()
        self.input_dim = input_dim
        self.virtual_batch_size = virtual_batch_size
        self.bn = nn.BatchNorm1d(self.input_dim)

    def forward(self, x):
        if self.training == True:
            chunks = x.chunk(int(np.ceil(x.shape[0] / self.virtual_batch_size)), 0)
            res = [self.bn(x_) for x_ in chunks]
            return torch.cat(res, dim=0)
        else:
            return self.bn(x)

class LearnableLocality(nn.Module):
    def __init__(self, input_dim, k):
        super(LearnableLocality, self).__init__()
        self.register_parameter('weight', nn.Parameter(torch.rand(k, input_dim)))
        self.smax = Entmax15(dim=-1)

    def forward(self, x):
        mask = self.smax(self.weight)
        masked_x = torch.einsum('nd,bd->bnd', mask, x)  # [B, k, D]
        return masked_x

class AbstractLayer(nn.Module):
    def __init__(self, base_input_dim, base_output_dim, k, virtual_batch_size, bias=True):
        super(AbstractLayer, self).__init__()
        self.masker = LearnableLocality(input_dim=base_input_dim, k=k)
        self.fc = nn.Conv1d(base_input_dim * k, 2 * k * base_output_dim, kernel_size=1, groups=k, bias=bias)
        initialize_glu(self.fc, input_dim=base_input_dim * k, output_dim=2 * k * base_output_dim)
        self.bn = GBN(2 * base_output_dim * k, virtual_batch_size)
        self.k = k
        self.base_output_dim = base_output_dim

    def forward(self, x):
        b = x.size(0)
        x = self.masker(x)  # [B, D] -> [B, k, D]
        x = self.fc(x.view(b, -1, 1))  # [B, k, D] -> [B, k * D, 1] -> [B, k * (2 * D'), 1]
        x = self.bn(x)
        chunks = x.chunk(self.k, 1)  # k * [B, 2 * D', 1]
        x = sum([F.relu(torch.sigmoid(x_[:, :self.base_output_dim, :]) * x_[:, self.base_output_dim:, :]) for x_ in chunks])  # k * [B, D', 1] -> [B, D', 1]
        return x.squeeze(-1)


class BasicBlock(nn.Module):
    def __init__(self, input_dim, base_outdim, k, virtual_batch_size, fix_input_dim, drop_rate):
        super(BasicBlock, self).__init__()
        self.conv1 = AbstractLayer(input_dim, base_outdim // 2, k, virtual_batch_size)
        self.conv2 = AbstractLayer(base_outdim // 2, base_outdim, k, virtual_batch_size)

        self.downsample = nn.Sequential(
            nn.Dropout(drop_rate),
            AbstractLayer(fix_input_dim, base_outdim, k, virtual_batch_size)
        )

    def forward(self, x, pre_out=None):
        if pre_out == None:
            pre_out = x
        out = self.conv1(pre_out)
        out = self.conv2(out)
        identity = self.downsample(x)
        out += identity
        return F.leaky_relu(out, 0.01)


class DANet(nn.Module):
    def __init__(self,
                 n_in,
                 n_out,
                 layer_num=20,
                 base_outdim=64,
                 k=5,
                 virtual_batch_size=256,
                 drop_rate=0.1,
                 **kwargs,
                ):
        super(DANet, self).__init__()
        params = {'base_outdim': base_outdim, 'k': k, 'virtual_batch_size': virtual_batch_size,
                  'fix_input_dim': n_in, 'drop_rate': drop_rate}
        self.init_layer = BasicBlock(n_in, **params)
        self.lay_num = layer_num
        self.layer = nn.ModuleList()
        for i in range((layer_num // 2) - 1):
            self.layer.append(BasicBlock(base_outdim, **params))
        self.drop = nn.Dropout(0.1)

        self.fc = nn.Sequential(nn.Linear(base_outdim, 256),
                                nn.ReLU(inplace=True),
                                nn.Linear(256, 512),
                                nn.ReLU(inplace=True),
                                nn.Linear(512, n_out))

    def forward(self, x):
        out = self.init_layer(x)
        for i in range(len(self.layer)):
            out = self.layer[i](x, out)
        out = self.drop(out)
        out = self.fc(out)
        return out

In [9]:
from collections import OrderedDict


class SimpleNet(nn.Module):
    def __init__(
        self,
        n_in,
        n_out,
        hidden_size=256,
        drop_rate=0.2,
        **kwargs, # kwargs is must-have to hold unnecessary parameters
    ):
        super(SimpleNet, self).__init__()
        self.features = nn.Sequential(OrderedDict([]))
        self.features.add_module("norm", nn.BatchNorm1d(n_in))
        self.features.add_module("dense1", nn.Linear(n_in, hidden_size))
        self.features.add_module("act", nn.SiLU())
        self.features.add_module("dropout", nn.Dropout(p=drop_rate))
        self.features.add_module("dense2", nn.Linear(hidden_size, n_out))

    def forward(self, x):
        for layer in self.features:
            x = layer(x)
        return x

In [10]:
import warnings
warnings.filterwarnings("ignore")
warnings.simplefilter(action='ignore', category=FutureWarning)
import pandas as pd
from sklearn.metrics import f1_score
from sklearn.metrics import roc_auc_score
from lightautoml.automl.presets.tabular_presets import TabularAutoML, TabularUtilizedAutoML
from lightautoml.automl.presets.text_presets import TabularNLPAutoML
from lightautoml.tasks import Task
import time


def fxn():
    warnings.warn("deprecated", DeprecationWarning)

with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    fxn()

roles = {'target': 'is_male',
                 'drop': ['user_id', 'index', "age"]}
default_nn_params = {
    "n_epochs": 200,
}
default_nn_params_2 = {
    "n_epochs": 100,
}
automl = TabularAutoML(
    task = Task(name='binary'),
    cpu_limit = 24,
    timeout=3600*15000,
    gpu_ids='0',
    debug=True,
    tuning_params = {'max_tuning_iter': 100},
    general_params = {"use_algos": [[DANet, "resnet_tuned", "resnet", "dense", "denselight",
                                     "linear_l2", "lgb", "cb", "cb_tuned", "denselight_tuned"]], 
                      'return_all_predictions': True,
                      'weighted_blender_max_nonzero_coef': 0.0},
    nn_pipeline_params = {"use_te": True,  "max_intersection_depth": 0},
    nn_params = {"0":{**default_nn_params},
                 "1":{**default_nn_params_2}},
    reader_params = {'cv': 10, 'random_state': 42}
)
start_time = time.time()
oof_pred = automl.fit_predict(
        train,
        roles=roles,
        verbose = 1
    )
end_time = time.time()

[20:21:06] Stdout logging level is INFO.
[20:21:06] Copying TaskTimer may affect the parent PipelineTimer, so copy will create new unlimited TaskTimer
[20:21:06] Task: binary

[20:21:06] Start automl preset with listed constraints:
[20:21:06] - time: 54000000.00 seconds
[20:21:06] - CPU: 24 cores
[20:21:06] - memory: 16 GB

[20:21:06] [1mTrain data shape: (257535, 3431)[0m

[20:22:22] Layer [1m1[0m train process start. Time left 53999924.06 secs
[20:22:55] Start fitting [1mLvl_0_Pipe_0_Mod_0_LinearL2[0m ...
[20:35:13] Fitting [1mLvl_0_Pipe_0_Mod_0_LinearL2[0m finished. score = [1m0.8821279032716562[0m
[20:35:13] [1mLvl_0_Pipe_0_Mod_0_LinearL2[0m fitting and predicting completed
[20:35:13] Time left 53999152.44 secs

[20:56:14] [1mSelector_LightGBM[0m fitting and predicting completed
[20:56:37] Start fitting [1mLvl_0_Pipe_1_Mod_0_LightGBM[0m ...
[23:16:12] Fitting [1mLvl_0_Pipe_1_Mod_0_LightGBM[0m finished. score = [1m0.88855800722462[0m
[23:16:12] [1mLvl_0_Pipe_1_M

Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU


[23:19:45] Fitting [1mLvl_0_Pipe_1_Mod_1_CatBoost[0m finished. score = [1m0.8838861669990338[0m
[23:19:45] [1mLvl_0_Pipe_1_Mod_1_CatBoost[0m fitting and predicting completed
[23:19:45] Start hyperparameters optimization for [1mLvl_0_Pipe_1_Mod_2_Tuned_CatBoost[0m ... Time budget is 300.00 secs


Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU


[23:25:11] Hyperparameters optimization for [1mLvl_0_Pipe_1_Mod_2_Tuned_CatBoost[0m completed
[23:25:11] Start fitting [1mLvl_0_Pipe_1_Mod_2_Tuned_CatBoost[0m ...


Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU


[23:33:00] Fitting [1mLvl_0_Pipe_1_Mod_2_Tuned_CatBoost[0m finished. score = [1m0.8856357817061904[0m
[23:33:00] [1mLvl_0_Pipe_1_Mod_2_Tuned_CatBoost[0m fitting and predicting completed
[23:33:00] Time left 53988485.37 secs

[23:33:23] Start fitting [1mLvl_0_Pipe_2_Mod_0_TorchNN_0[0m ...
[01:34:49] Fitting [1mLvl_0_Pipe_2_Mod_0_TorchNN_0[0m finished. score = [1m0.8863459801514103[0m
[01:34:49] [1mLvl_0_Pipe_2_Mod_0_TorchNN_0[0m fitting and predicting completed
[01:34:49] Start hyperparameters optimization for [1mLvl_0_Pipe_2_Mod_1_Tuned_TorchNN_resnet_tuned_1[0m ... Time budget is 3600.00 secs
[02:37:53] Hyperparameters optimization for [1mLvl_0_Pipe_2_Mod_1_Tuned_TorchNN_resnet_tuned_1[0m completed
[02:37:53] Start fitting [1mLvl_0_Pipe_2_Mod_1_Tuned_TorchNN_resnet_tuned_1[0m ...
[03:16:50] Fitting [1mLvl_0_Pipe_2_Mod_1_Tuned_TorchNN_resnet_tuned_1[0m finished. score = [1m0.8918195829070951[0m
[03:16:50] [1mLvl_0_Pipe_2_Mod_1_Tuned_TorchNN_resnet_tuned_1[0m f

In [12]:
print(f"spend time: {end_time - start_time}")
pred_ismale = oof_pred.data[: ,0]
pred_ismale[np.isnan(pred_ismale)] = 0.5
print(f"age ca roc auc: {roc_auc_score(train['is_male'], pred_ismale)}")
print(automl.create_model_str_desc())

spend time: 33425.671926021576
age ca roc auc: 0.8821279032716562
Final prediction for new objects (level 0) = 
	 0.01658 * (10 averaged models Lvl_0_Pipe_0_Mod_0_LinearL2) +
	 0.25218 * (10 averaged models Lvl_0_Pipe_1_Mod_0_LightGBM) +
	 0.01771 * (10 averaged models Lvl_0_Pipe_1_Mod_1_CatBoost) +
	 0.01650 * (10 averaged models Lvl_0_Pipe_1_Mod_2_Tuned_CatBoost) +
	 0.07345 * (10 averaged models Lvl_0_Pipe_2_Mod_0_TorchNN_0) +
	 0.22431 * (10 averaged models Lvl_0_Pipe_2_Mod_1_Tuned_TorchNN_resnet_tuned_1) +
	 0.01749 * (10 averaged models Lvl_0_Pipe_2_Mod_2_TorchNN_resnet_2) +
	 0.10915 * (10 averaged models Lvl_0_Pipe_2_Mod_3_TorchNN_dense_3) +
	 0.09542 * (10 averaged models Lvl_0_Pipe_2_Mod_4_TorchNN_denselight_4) +
	 0.17722 * (10 averaged models Lvl_0_Pipe_2_Mod_5_Tuned_TorchNN_denselight_tuned_5) 


In [13]:
test_pred_ismale = automl.predict(test)
sub = pd.DataFrame()
sub['user_id'] = test.user_id
sub[list(automl.collect_model_stats().keys())] = test_pred_ismale.data
sub.sort_values("user_id", inplace=True)
sub.to_csv("test_warm_male_big_automl.csv", index=False)
sub.head()

Unnamed: 0,user_id
72084,6
14368,7
130234,9
100995,10
14369,11


In [17]:
oof = pd.DataFrame()
oof['user_id'] = train.user_id
oof[list(automl.collect_model_stats().keys())] = oof_pred.data
oof.head()
oof.to_csv("oof_warm_male_big_automl.csv", index=False)

Unnamed: 0,user_id,Lvl_0_Pipe_0_Mod_0_LinearL2,Lvl_0_Pipe_1_Mod_0_LightGBM,Lvl_0_Pipe_1_Mod_1_CatBoost,Lvl_0_Pipe_1_Mod_2_Tuned_CatBoost,Lvl_0_Pipe_2_Mod_0_TorchNN_0,Lvl_0_Pipe_2_Mod_1_Tuned_TorchNN_resnet_tuned_1,Lvl_0_Pipe_2_Mod_2_TorchNN_resnet_2,Lvl_0_Pipe_2_Mod_3_TorchNN_dense_3,Lvl_0_Pipe_2_Mod_4_TorchNN_denselight_4,Lvl_0_Pipe_2_Mod_5_Tuned_TorchNN_denselight_tuned_5
0,4,0.056811,0.048226,0.031019,0.03215,0.066619,0.031741,0.040632,0.025944,0.021272,0.037604
1,16,0.584372,0.858543,0.837406,0.854719,0.626704,0.701507,0.666221,0.674068,0.720842,0.708428
2,18,0.997506,0.994687,0.995327,0.995208,0.985653,0.984499,0.99121,0.992509,0.993675,0.991972
3,26,0.124199,0.211962,0.140921,0.119063,0.038975,0.116071,0.070326,0.102647,0.111554,0.089488
4,29,0.245797,0.418034,0.354164,0.424152,0.043008,0.126105,0.142034,0.07195,0.081098,0.180981
