In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

pd.set_option('display.max_columns', None)

%load_ext autoreload
%autoreload 2

In [2]:
target = pd.read_csv("../data/target_train.csv")
sample = pd.read_csv("../data/sample_submission.csv")

plavki_train = pd.read_csv("../data/plavki_train.csv")
plavki_test = pd.read_csv("../data/plavki_test.csv")

gas_train = pd.read_csv("../data/gas_train.csv")
gas_test = pd.read_csv("../data/gas_test.csv")

chugun_train = pd.read_csv("../data/chugun_train.csv")
chugun_test = pd.read_csv("../data/chugun_test.csv")

lom_train = pd.read_csv("../data/lom_train.csv")
lom_test = pd.read_csv("../data/lom_test.csv")

produv_train = pd.read_csv("../data/produv_train.csv")
produv_test = pd.read_csv("../data/produv_test.csv")

chronom_train = pd.read_csv("../data/chronom_train.csv")
chronom_test = pd.read_csv("../data/chronom_test.csv")

sip_train = pd.read_csv("../data/sip_train.csv")
sip_test = pd.read_csv("../data/sip_test.csv")

from pipeline import merge_data

params = {
    "chugun": {},
    "plavki": {"bow_count": 10},
    "vector_size": 10
}

train, test, y, num_features, cat_features = merge_data(
    sample, target, plavki_train, plavki_test, gas_train, gas_test, chugun_train, chugun_test, 
    lom_train, lom_test, produv_train, produv_test, chronom_train, chronom_test, 
    sip_train, sip_test, params
)

  lom_train_transformed['ves_loma/ves_chuguna'] = lom_train_transformed['ves_loma'].values/chugun_train['VES'].values
  lom_test_transformed['ves_loma/ves_chuguna'] = lom_test_transformed['ves_loma'].values/chugun_test['VES'].values


  0%|          | 0/2063 [00:00<?, ?it/s]

  0%|          | 0/780 [00:00<?, ?it/s]

In [3]:
from sklearn.model_selection import KFold, TimeSeriesSplit
from tqdm.auto import tqdm, trange

def metric(c_true, tst_true, c_pred, tst_pred, pwc=None, pwt=None):
    
    if pwc is not None:
        c_pred = pwc.inverse_transform(c_pred.reshape(-1, 1)).reshape(-1)
    if pwt is not None:
        tst_pred = pwt.inverse_transform(tst_pred.reshape(-1, 1)).reshape(-1)
    
    delta_c = np.abs(c_true - c_pred)
    hit_rate_c = np.int64(delta_c < 0.02)

    delta_t = np.abs(tst_true - tst_pred)
    hit_rate_t = np.int64(delta_t < 20)

    N = c_pred.shape[0]    
    return np.sum(hit_rate_c) / N, np.sum(hit_rate_t) / N, np.sum(hit_rate_c + hit_rate_t) / 2 / N

def pipeline(model_c, model_tst, train_c, test_c, train_tst, test_tst, y, sample, n_splits=10, pwc=None, pwt=None):
    kf = KFold(n_splits=n_splits, shuffle=True)

    sample["C"] = 0
    sample["TST"] = 0
    
    res_c, res_t, res = [], [], []
    for train_idx, test_idx in tqdm(kf.split(train_c), total=n_splits):
        cur_train_c = train_c[train_idx]
        cur_eval_c = train_c[test_idx]        

        cur_train_tst = train_tst[train_idx]
        cur_eval_tst = train_tst[test_idx]

        cur_train_y = y.iloc[train_idx]
        cur_eval_y = y.iloc[test_idx]
        
        if pwc is not None:
            model_c.fit(cur_train_c, pwc.transform(cur_train_y["C"].values.reshape(-1, 1)).reshape(-1))
        else:
            model_c.fit(cur_train_c, cur_train_y["C"])
            
        if pwt is not None:
            model_tst.fit(cur_train_tst, pwt.transform(cur_train_y["TST"].values.reshape(-1, 1)).reshape(-1))
        else:
            model_tst.fit(cur_train_tst, cur_train_y["TST"])
        
        eval_pred_c = model_c.predict(cur_eval_c)
        eval_pred_tst = model_tst.predict(cur_eval_tst)
    
        sample["C"] += model_c.predict(test_c) / n_splits
        sample["TST"] += model_tst.predict(test_tst) / n_splits
        
        hit_rate_c, hit_rate_t, hit_rate = metric(cur_eval_y["C"], cur_eval_y["TST"], eval_pred_c, eval_pred_tst, pwc, pwt)
        res_c.append(hit_rate_c)
        res_t.append(hit_rate_t)
        res.append(hit_rate)
    
    res_c = np.array(res_c)
    res_t = np.array(res_t)
    res = np.array(res)
    print(f"Carbon score: {res_c.mean()} ± {res_c.std()}")
    print(f"Temperature score: {res_t.mean()} ± {res_t.std()}")    
    print(f"Overall score: {res.mean()} ± {res.std()}") 
    
    if pwt is not None:
        sample["TST"] = pwt.inverse_transform(sample["TST"].values.reshape(-1, 1)).reshape(-1)
    if pwc is not None:
        sample["C"] = pwc.inverse_transform(sample["C"].values.reshape(-1, 1)).reshape(-1)
    
    sample["C"] = sample["C"].clip(0, 1)
    
    return sample

In [17]:
from sklearn.preprocessing import PowerTransformer

pwrC = PowerTransformer('box-cox')
pwrT = PowerTransformer('box-cox')

pwrC2 = PowerTransformer()
pwrT2 = PowerTransformer()

y_exp = y.copy()

y_exp["C"] = pwrC.fit_transform(y['C'].values.reshape(-1, 1))
y_exp["TST"] = pwrT.fit_transform(y['TST'].values.reshape(-1, 1))

pwrC2.fit(y['C'].values.reshape(-1, 1))
pwrT2.fit(y['TST'].values.reshape(-1, 1))

PowerTransformer()

In [42]:
train_backup = train.copy()
test_backup = test.copy()

In [48]:
train = train_backup.copy()
test = test_backup.copy()

In [8]:
gas_filtered_for_C = pd.read_csv("../gas/gas_filtered_for_C.csv").set_index("NPLV")
gas_filtered_for_TST = pd.read_csv("../gas/gas_filtered_for_TST.csv").set_index("NPLV")

gas_test_autofeatures = pd.read_csv("../gas/gas_test_autofeatures.csv").set_index("NPLV")

In [9]:
train_gas_C = gas_filtered_for_C
train_gas_TST = gas_filtered_for_TST

test_gas_C = gas_test_autofeatures[gas_filtered_for_C.columns]
test_gas_TST = gas_test_autofeatures[gas_filtered_for_TST.columns]

In [49]:
train_C = train.join(train_gas_C, on="NPLV")
train_TST = train.join(train_gas_TST, on="NPLV")

test_C = test.join(test_gas_C, on="NPLV")
test_TST = test.join(test_gas_TST, on="NPLV")

In [46]:
from xgboost import XGBRegressor
from sklearn.ensemble import RandomForestRegressor

model_c = XGBRegressor(n_estimators=40, n_jobs=-1)
model_tst = XGBRegressor(n_estimators=40, n_jobs=-1)

# model_c = RandomForestRegressor(n_estimators=50, n_jobs=-1)
# model_tst = RandomForestRegressor(n_estimators=50, n_jobs=-1)

res = pipeline(model_c, model_tst, train_C.values, test_C.values, train_TST.values, test_TST.values, y, sample, 
               n_splits=50, pwc=pwrC, pwt=pwrT)

  0%|          | 0/50 [00:00<?, ?it/s]

Carbon score: 0.7371660859465738 ± 0.06580878338775255
Temperature score: 0.6718118466898956 ± 0.07481081467917064
Overall score: 0.7044889663182348 ± 0.05393976342579064


In [50]:
model_c = XGBRegressor(n_estimators=50, n_jobs=-1)
model_c.fit(train_C, y["C"])

model_tst = XGBRegressor(n_estimators=50, n_jobs=-1)
model_tst.fit(train_TST, y["TST"])

XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,
             importance_type='gain', interaction_constraints='',
             learning_rate=0.300000012, max_delta_step=0, max_depth=6,
             min_child_weight=1, missing=nan, monotone_constraints='()',
             n_estimators=50, n_jobs=-1, num_parallel_tree=1, random_state=0,
             reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
             tree_method='exact', validate_parameters=1, verbosity=None)

In [52]:
train_C.to_csv("train_C.csv", index=False)
train_TST.to_csv("train_TST.csv", index=False)

y.to_csv("y.csv", index=False)

Unnamed: 0_level_0,gas_duration,gas_mean_V,gas_mean_T,gas_mean_O2,gas_mean_N2,gas_mean_H2,gas_mean_CO2,gas_mean_CO,gas_mean_AR,gas_mean_T фурмы 1,gas_mean_T фурмы 2,gas_mean_O2_pressure,gas_std_V,gas_std_T,gas_std_O2,gas_std_N2,gas_std_H2,gas_std_CO2,gas_std_CO,gas_std_AR,gas_std_T фурмы 1,gas_std_T фурмы 2,gas_std_O2_pressure,gas_sum_volume_O2,gas_sum_volume_N2,gas_sum_volume_H2,gas_sum_volume_CO2,gas_sum_volume_CO,gas_sum_volume_AR,gas_mean_volume_O2,gas_mean_volume_N2,gas_mean_volume_H2,gas_mean_volume_CO2,gas_mean_volume_CO,gas_mean_volume_AR,gas_std_volume_O2,gas_std_volume_N2,gas_std_volume_H2,gas_std_volume_CO2,gas_std_volume_CO,gas_std_volume_AR,dayofmonth,hour,duration,truncated_NMZ,st_diff_is_zero,dayofweek,plavka_TIPE_GOL,plavka_TIPE_FUR,plavka_NAPR_ZAD,bow_.z0,bow_/Э,bow_/ЭТ,bow_3пс,bow_SC2,bow_Ст3,bow_ЭТ,bow_пс/,bow_с/Э,bow_т3п,VES,T,SI,MN,S,P,CR,NI,CU,V,TI,si_portion,mn_portion,s_portion,p_portion,cr_portion,ni_portion,cu_portion,v_portion,ti_portion,ves_loma,ves_loma/ves_chuguna,durationproduv_,RAS_mean,POL_mean,0,1,2,3,4,5,6,7,8,9,O2_вн.пл.прост._0,O2_межпл.прост._0,O2_межпл.прост._1,O2_опер_0,total_duration_вн.пл.прост._0,total_duration_межпл.прост._0,total_duration_межпл.прост._1,total_duration_опер_0,min_duration_вн.пл.прост._0,min_duration_межпл.прост._0,min_duration_межпл.прост._1,min_duration_опер_0,max_duration_вн.пл.прост._0,max_duration_межпл.прост._0,max_duration_межпл.прост._1,max_duration_опер_0,total_operations_вн.пл.прост._0,total_operations_межпл.прост._0,total_operations_межпл.прост._1,total_operations_опер_0,min_mass,max_mass,total_count,unique_count,min_ratio,max_ratio,unique_ratio,w2v_0,w2v_1,w2v_2,w2v_3,w2v_4,w2v_5,w2v_6,w2v_7,w2v_8,w2v_9,w2v_10,w2v_11,w2v_12,w2v_13,w2v_14,w2v_15,w2v_16,w2v_17,w2v_18,w2v_19,w2v_20,w2v_sip_0,w2v_sip_1,w2v_sip_2,w2v_sip_3,w2v_sip_4,w2v_sip_5,w2v_sip_6,w2v_sip_7,w2v_sip_8,w2v_sip_9,w2v_sip_10,w2v_sip_11,w2v_sip_12,w2v_sip_13,w2v_sip_14,w2v_sip_15,w2v_sip_16,w2v_sip_17,w2v_sip_18,w2v_sip_19,w2v_sip_20,"CO__fft_coefficient__attr_""imag""__coeff_1","T__fft_coefficient__attr_""imag""__coeff_1","T фурмы 2__change_quantiles__f_agg_""var""__isabs_True__qh_1.0__ql_0.2","CO2__change_quantiles__f_agg_""mean""__isabs_True__qh_0.4__ql_0.0","T__agg_linear_trend__attr_""stderr""__chunk_len_5__f_agg_""min""","CO2__fft_coefficient__attr_""abs""__coeff_25",AR__energy_ratio_by_chunks__num_segments_10__segment_focus_7,"T фурмы 2__change_quantiles__f_agg_""var""__isabs_True__qh_1.0__ql_0.4",N2__quantile__q_0.9,CO2__sum_values,CO__energy_ratio_by_chunks__num_segments_10__segment_focus_6,CO2__energy_ratio_by_chunks__num_segments_10__segment_focus_9,T__index_mass_quantile__q_0.6,"CO2__fft_coefficient__attr_""abs""__coeff_57","O2__fft_coefficient__attr_""abs""__coeff_68","CO__fft_coefficient__attr_""imag""__coeff_2",CO2__index_mass_quantile__q_0.8,"T фурмы 2__change_quantiles__f_agg_""mean""__isabs_True__qh_1.0__ql_0.6","N2__fft_coefficient__attr_""abs""__coeff_65","CO2__change_quantiles__f_agg_""var""__isabs_False__qh_0.4__ql_0.0","T фурмы 2__change_quantiles__f_agg_""var""__isabs_False__qh_1.0__ql_0.6","CO2__fft_coefficient__attr_""abs""__coeff_16",T__absolute_sum_of_changes,"T__agg_linear_trend__attr_""stderr""__chunk_len_10__f_agg_""mean""","O2__fft_coefficient__attr_""abs""__coeff_52",N2__quantile__q_0.7,"T__agg_linear_trend__attr_""stderr""__chunk_len_10__f_agg_""max""",CO2__index_mass_quantile__q_0.9,"V__fft_coefficient__attr_""abs""__coeff_13",CO__index_mass_quantile__q_0.6,T__last_location_of_maximum,AR__sum_of_reoccurring_values,CO__energy_ratio_by_chunks__num_segments_10__segment_focus_8,AR__quantile__q_0.9,"CO2__fft_coefficient__attr_""abs""__coeff_27","CO2__fft_coefficient__attr_""real""__coeff_2","CO2__fft_coefficient__attr_""imag""__coeff_2",AR__autocorrelation__lag_2,"N2__fft_aggregated__aggtype_""variance""","T фурмы 2__fft_coefficient__attr_""abs""__coeff_13",AR__autocorrelation__lag_7,"T фурмы 2__fft_coefficient__attr_""imag""__coeff_2","O2_pressure__fft_coefficient__attr_""real""__coeff_8","AR__augmented_dickey_fuller__attr_""teststat""__autolag_""AIC""","N2__fft_coefficient__attr_""abs""__coeff_66",O2__count_below_mean,AR__autocorrelation__lag_8,AR__cid_ce__normalize_True,"T фурмы 2__fft_coefficient__attr_""abs""__coeff_9","N2__fft_coefficient__attr_""abs""__coeff_16"
NPLV,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1,Unnamed: 142_level_1,Unnamed: 143_level_1,Unnamed: 144_level_1,Unnamed: 145_level_1,Unnamed: 146_level_1,Unnamed: 147_level_1,Unnamed: 148_level_1,Unnamed: 149_level_1,Unnamed: 150_level_1,Unnamed: 151_level_1,Unnamed: 152_level_1,Unnamed: 153_level_1,Unnamed: 154_level_1,Unnamed: 155_level_1,Unnamed: 156_level_1,Unnamed: 157_level_1,Unnamed: 158_level_1,Unnamed: 159_level_1,Unnamed: 160_level_1,Unnamed: 161_level_1,Unnamed: 162_level_1,Unnamed: 163_level_1,Unnamed: 164_level_1,Unnamed: 165_level_1,Unnamed: 166_level_1,Unnamed: 167_level_1,Unnamed: 168_level_1,Unnamed: 169_level_1,Unnamed: 170_level_1,Unnamed: 171_level_1,Unnamed: 172_level_1,Unnamed: 173_level_1,Unnamed: 174_level_1,Unnamed: 175_level_1,Unnamed: 176_level_1,Unnamed: 177_level_1,Unnamed: 178_level_1,Unnamed: 179_level_1,Unnamed: 180_level_1,Unnamed: 181_level_1,Unnamed: 182_level_1,Unnamed: 183_level_1,Unnamed: 184_level_1,Unnamed: 185_level_1,Unnamed: 186_level_1,Unnamed: 187_level_1,Unnamed: 188_level_1,Unnamed: 189_level_1,Unnamed: 190_level_1,Unnamed: 191_level_1,Unnamed: 192_level_1,Unnamed: 193_level_1,Unnamed: 194_level_1,Unnamed: 195_level_1,Unnamed: 196_level_1,Unnamed: 197_level_1,Unnamed: 198_level_1,Unnamed: 199_level_1,Unnamed: 200_level_1,Unnamed: 201_level_1,Unnamed: 202_level_1,Unnamed: 203_level_1,Unnamed: 204_level_1,Unnamed: 205_level_1,Unnamed: 206_level_1,Unnamed: 207_level_1,Unnamed: 208_level_1,Unnamed: 209_level_1,Unnamed: 210_level_1,Unnamed: 211_level_1,Unnamed: 212_level_1,Unnamed: 213_level_1,Unnamed: 214_level_1
510008,2560,216789.292999,506.912198,8.080398,60.971011,0.300348,13.646908,16.236455,0.801776,0.000000,0.000000,13.356058,5369.811888,273.329244,8.601033,19.843291,0.798206,7.340789,20.627802,0.189599,0.000000,0.000000,0.292023,4.502321e+07,3.391881e+08,1.624003e+06,7.575220e+07,8.913852e+07,4.458630e+06,17587.192761,132495.366168,634.376020,29590.704477,34819.734247,1741.652526,18701.426346,43818.901201,1655.726824,15946.086198,44086.505490,425.909341,1,3,2579.0,17.0,1,4,0,1.0,1.0,0,0,0,0,0,0,0,0,0,0,263700.0,1396.0,0.44,0.22,0.023,0.097,0.03,0.01,0.03,0.103,0.084,0.000002,8.342814e-07,8.722033e-08,3.678422e-07,1.137656e-07,3.792188e-08,1.137656e-07,3.905954e-07,3.185438e-07,76200,0.288965,1168.0,833.024315,1.145962,-1.156259,8.058909,7.624763,6.477648,4.481481,5.410652,6.022574,5.195676,5.381444,5.631197,0.0,0.0,0.0,2909.0,246.0,0.0,840.0,2489.0,246.0,0.0,46.0,16.0,246.0,0.0,424.0,1170.0,1.0,0.0,5.0,6.0,220,7300,12,4,0.000834,0.027683,0.333333,0.055480,0.044956,0.181583,0.068003,0.222904,0.044101,0.447012,0.412328,-0.289287,-0.125010,0.209294,0.165494,0.242884,0.189409,0.256750,0.192854,0.238297,0.191987,0.161226,0.218433,4.000000,0.037566,-0.177224,0.081650,0.287576,0.095227,-0.250882,0.429995,-0.530720,-0.426937,-0.219830,0.179243,0.107382,0.051450,0.103938,0.115363,0.113439,0.076093,0.051423,0.053506,0.118572,3.464102,-8605.983138,-35146.443133,0.000000,0.032945,0.081186,411.369958,0.141490,0.000000,78.139999,34936.083312,0.070156,0.050924,0.550391,282.453301,199.167171,8076.023524,0.755469,0.000000,386.011768,0.031513,0.000000,298.243939,2072.569473,0.230205,606.298966,78.139999,0.230518,0.876953,1.088954e+06,0.486328,0.651563,43.160,0.000054,0.980000,154.108548,721.767360,2509.633109,0.994717,32503.751532,0.000000,0.971516,0.000000,22.364470,-1.799046,110.091100,1154.0,0.966172,3.273249,0.000000,916.630280
510009,3949,217054.421867,375.840941,10.985339,64.784377,0.163313,11.761720,11.466482,0.838266,0.000000,0.000000,13.950163,4250.571350,291.238099,10.616286,16.819827,0.538952,8.095644,18.080320,0.167612,0.000000,0.000000,1.003299,9.485353e+07,5.562788e+08,1.364358e+06,1.004032e+08,9.705018e+07,7.193678e+06,24019.631951,140865.726324,345.494449,25424.967838,24575.886359,1821.645526,23278.991109,37231.830524,1123.023713,17501.298735,38641.706115,373.312144,1,4,4004.0,17.0,1,4,0,1.0,1.0,0,0,0,0,0,0,0,0,0,0,264500.0,1419.0,0.68,0.20,0.017,0.087,0.02,0.01,0.03,0.084,0.096,0.000003,7.561437e-07,6.427221e-08,3.289225e-07,7.561437e-08,3.780718e-08,1.134216e-07,3.175803e-07,3.629490e-07,78600,0.297164,1226.0,832.179153,1.346743,-0.674527,12.779936,5.824023,2.713871,4.521441,5.085553,6.196332,5.750713,4.999381,5.749108,0.0,0.0,0.0,2182.0,922.0,0.0,1140.0,2987.0,302.0,0.0,54.0,31.0,620.0,0.0,574.0,1230.0,2.0,0.0,6.0,6.0,10,9950,15,4,0.000038,0.037618,0.266667,0.012371,0.023209,0.195905,0.080198,0.166644,0.002629,0.485046,0.397635,-0.303890,-0.125385,0.217216,0.161063,0.237895,0.181988,0.255135,0.188436,0.246306,0.193136,0.144435,0.207359,4.242640,-0.024514,-0.163379,0.068872,0.265739,0.126926,-0.260764,0.428321,-0.540686,-0.400133,-0.155253,0.183156,0.100941,0.062302,0.111288,0.133587,0.125069,0.107017,0.074302,0.103236,0.179183,3.872983,12615.968082,335435.057618,0.000000,0.021750,0.044691,492.526373,0.134152,0.000000,77.190002,46447.033896,0.269381,0.116896,0.609521,233.351330,535.508247,-15569.025202,0.848063,0.000000,311.423357,0.016360,0.000000,1099.100337,2177.430191,0.126849,835.746951,77.190002,0.127498,0.924031,5.178729e+05,0.575336,0.694859,40.710,0.000210,0.990000,861.500083,5777.297835,1464.518369,0.995089,82312.947393,0.000000,0.975110,0.000000,78.271266,-2.083097,187.909749,2419.0,0.970498,4.406261,0.000000,3953.110610
510010,2871,215606.942311,489.881937,8.745518,62.633599,0.312984,12.723079,14.755786,0.828832,0.000000,0.000000,14.022366,6659.001169,301.143710,9.489587,19.181862,0.797407,8.108025,19.036043,0.167892,0.000000,0.000000,0.330850,5.488448e+07,3.886485e+08,1.874444e+06,7.840161e+07,9.005883e+07,5.138755e+06,19116.849917,135370.437027,652.889050,27308.119240,31368.453349,1789.883300,20947.742464,42260.955051,1623.914332,17333.228492,40336.007001,376.987465,1,5,2904.0,21.0,1,4,0,1.0,0.0,0,1,0,1,0,1,0,1,1,1,263800.0,1384.0,0.56,0.26,0.017,0.096,0.03,0.01,0.03,0.115,0.110,0.000002,9.855951e-07,6.444276e-08,3.639121e-07,1.137225e-07,3.790751e-08,1.137225e-07,4.359363e-07,4.169826e-07,76300,0.289234,1222.0,821.524510,1.262255,-0.704444,12.429848,5.910546,2.883735,4.516992,5.105981,6.147994,5.715422,5.011985,5.797691,0.0,0.0,0.0,2841.0,0.0,0.0,594.0,2796.0,0.0,0.0,55.0,133.0,0.0,0.0,301.0,1226.0,0.0,0.0,4.0,6.0,10,5050,13,5,0.000038,0.019143,0.384615,0.063352,0.027522,0.163736,0.079687,0.220757,0.027317,0.421971,0.412206,-0.296892,-0.118194,0.218725,0.169707,0.253276,0.199308,0.261907,0.186265,0.240514,0.205209,0.156947,0.232613,3.741657,0.015712,-0.175391,0.068200,0.268593,0.121434,-0.236177,0.407831,-0.547900,-0.397414,-0.176144,0.188116,0.103365,0.067929,0.119567,0.143273,0.120305,0.106019,0.077347,0.114464,0.189419,3.605551,-3882.563299,-19960.450515,0.000000,0.026250,0.075629,575.231166,0.142594,0.000000,79.639999,36527.960575,0.172881,0.043646,0.554511,282.234867,114.255264,2730.931321,0.745037,0.000000,801.583059,0.032230,0.000000,1206.978333,2200.347389,0.213940,304.735071,79.639999,0.214378,0.872518,1.091175e+06,0.510623,0.673981,39.450,0.000073,1.010000,269.220022,2745.330143,4398.249905,0.996704,38920.738226,0.000000,0.984350,0.000000,12.296795,-1.408791,486.711648,2144.0,0.981467,2.941612,0.000000,2574.851443
510011,3261,218908.844905,439.273874,9.016227,62.605303,0.100366,13.566362,13.771099,0.835434,0.000000,0.000000,14.250926,5341.347771,267.257197,8.395936,17.444834,0.215926,7.790133,18.094440,0.194226,0.000000,0.000000,0.665478,6.478924e+07,4.477267e+08,7.091111e+05,9.674182e+07,9.717645e+07,5.974200e+06,19867.905305,137297.364374,217.452049,29666.305531,29799.586702,1832.014654,18607.672797,38883.156660,468.388798,17089.798747,39012.937295,437.916480,1,6,3291.0,16.0,1,4,0,1.0,0.0,1,0,0,0,0,0,0,0,0,0,264000.0,1401.0,0.48,0.27,0.018,0.091,0.03,0.01,0.02,0.112,0.110,0.000002,1.022727e-06,6.818182e-08,3.446970e-07,1.136364e-07,3.787879e-08,7.575758e-08,4.242424e-07,4.166667e-07,84100,0.318561,1056.0,805.565217,1.442741,-0.561065,-3.350970,7.085355,2.852356,4.773051,5.025595,6.558962,4.313759,5.554585,5.484573,0.0,0.0,0.0,10.0,0.0,0.0,1542.0,2966.0,0.0,0.0,42.0,17.0,0.0,0.0,775.0,1058.0,0.0,0.0,4.0,7.0,320,5020,13,4,0.001212,0.019015,0.307692,0.045713,0.014881,0.145655,0.053311,0.270954,0.002451,0.398979,0.407824,-0.287521,-0.138730,0.239591,0.164172,0.242634,0.225279,0.224384,0.202971,0.270060,0.202800,0.140143,0.231441,3.872983,-0.005551,-0.176102,0.065530,0.266739,0.118550,-0.243189,0.415591,-0.548270,-0.390773,-0.165750,0.189764,0.102677,0.066295,0.119511,0.141650,0.125427,0.109540,0.077063,0.107890,0.190314,3.605551,-24283.951881,-299640.091202,0.000000,0.045483,0.052171,1003.270172,0.144166,0.000000,75.919998,44239.906145,0.001513,0.044355,0.478688,445.564041,245.265593,16562.235134,0.739650,0.000000,691.368817,0.054759,0.000000,2632.482727,2957.291557,0.147683,376.504951,75.919998,0.148374,0.869979,2.675320e+06,0.401717,0.253297,44.380,0.000031,1.030000,1381.362069,548.106196,5311.577201,0.995943,57590.782899,0.000000,0.977626,0.000000,-140.562131,-2.107981,728.884793,1412.0,0.973405,3.709498,0.000000,5199.373266
510012,2860,217981.805452,478.608197,8.470485,62.286450,0.442823,13.643787,14.273926,0.802074,0.000000,0.000000,14.259779,5757.933597,282.254884,9.054424,18.477454,0.878580,8.633744,18.366216,0.182802,0.000000,0.000000,0.652421,5.328600e+07,3.894694e+08,2.698631e+06,8.476063e+07,8.769451e+07,5.013289e+06,18631.467155,136178.120683,943.577316,29636.583246,30662.417509,1752.898361,19987.724321,41474.251736,1859.532540,18623.947548,39404.426535,419.124607,1,7,2895.0,14.0,1,4,0,1.0,2.0,0,0,1,0,1,0,1,0,0,0,263300.0,1422.0,0.47,0.23,0.018,0.096,0.02,0.01,0.03,0.083,0.070,0.000002,8.735283e-07,6.836308e-08,3.646031e-07,7.595898e-08,3.797949e-08,1.139385e-07,3.152298e-07,2.658564e-07,76100,0.289024,1236.0,813.665590,1.141244,-0.585319,12.929362,5.491992,1.965759,4.635198,4.976575,6.157715,5.814332,4.916671,5.827915,0.0,0.0,0.0,3225.0,0.0,0.0,601.0,2836.0,0.0,0.0,43.0,54.0,0.0,0.0,305.0,1239.0,0.0,0.0,4.0,9.0,40,4980,16,5,0.000152,0.018914,0.312500,0.095475,0.022746,0.165881,0.094492,0.257441,0.039452,0.389931,0.419327,-0.282776,-0.119992,0.222097,0.177258,0.239524,0.190702,0.263596,0.189727,0.254250,0.193427,0.165760,0.233128,4.000000,-0.051226,-0.133820,0.062210,0.240138,0.178648,-0.267566,0.418415,-0.546023,-0.398936,-0.108317,0.169017,0.085487,0.074373,0.112453,0.132587,0.122628,0.122334,0.091360,0.143541,0.199359,4.000000,-4995.763723,13747.313485,0.000000,0.060036,0.071505,517.498075,0.124193,0.000000,79.680132,39021.231253,0.141232,0.095285,0.563287,31.630515,390.504780,4216.954914,0.826923,0.000000,329.865651,0.074155,0.000000,1578.822627,2219.444336,0.202809,275.894442,75.199997,0.203340,0.913636,1.424784e+06,0.507343,0.319930,44.835,0.000021,0.980000,127.211707,4733.349319,7349.046090,0.994653,42187.391212,0.000000,0.966217,0.000000,10.232734,-2.420528,149.879325,1933.0,0.959468,3.605736,0.000000,1975.508842
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
512318,3060,208248.303329,439.574555,4.152793,53.516385,1.191883,17.997605,22.405678,0.736660,28.701842,25.883701,15.004221,7548.831043,246.178558,5.610260,21.662747,1.267843,8.394836,20.068151,0.219879,5.081189,0.080459,0.990635,2.659664e+07,3.426617e+08,7.449856e+06,1.149322e+08,1.408978e+08,4.706967e+06,8691.713694,111980.943607,2434.593391,37559.535747,46045.018769,1538.224385,11749.972138,45978.755732,2509.652248,17805.931661,40458.211385,468.885956,26,13,3084.0,7.0,0,0,0,0.0,2.0,1,0,1,0,0,0,1,0,0,0,267200.0,1415.0,0.38,0.28,0.019,0.099,0.02,0.01,0.02,0.081,0.060,0.000001,1.047904e-06,7.110778e-08,3.705090e-07,7.485030e-08,3.742515e-08,7.485030e-08,3.031437e-07,2.245509e-07,73600,0.275449,1060.0,853.589454,1.487194,-0.570159,-3.360159,7.136364,3.007238,4.801440,5.021038,6.548068,4.302829,5.579718,5.512657,0.0,0.0,0.0,1917.0,932.0,0.0,747.0,3064.0,179.0,0.0,94.0,0.0,753.0,0.0,377.0,1063.0,2.0,0.0,3.0,9.0,30,3700,15,2,0.000112,0.013847,0.133333,0.091610,0.031375,0.165468,0.066474,0.331668,0.099350,0.365745,0.421079,-0.234726,-0.120819,0.223927,0.177292,0.239277,0.194475,0.232555,0.210077,0.256852,0.188604,0.167369,0.231861,4.000000,-0.110916,-0.079442,0.095220,0.262453,0.186370,-0.362848,0.497617,-0.490741,-0.472487,-0.105040,0.093965,0.003142,0.011798,0.008196,0.012746,0.030990,0.034294,0.001638,0.029348,0.045935,3.872983,-27462.585004,-432045.165999,0.000040,0.066592,0.047443,969.986710,0.103482,0.000033,81.000000,55072.672708,0.050200,0.019816,0.433660,278.908913,384.915706,5734.342882,0.696405,0.001744,303.349002,0.074133,0.000044,999.633792,2044.444061,0.134398,447.557080,77.099998,0.134400,0.810458,6.455052e+05,0.409804,0.466993,44.730,0.000151,1.009031,460.016851,-3417.294890,-6563.463495,0.998082,47050.739424,15.693736,0.990315,112.672036,152.360703,-1.567197,397.991700,1946.0,0.988522,1.983710,12.467844,2376.107609
512319,3803,206951.068996,336.801224,8.228796,62.478096,0.696861,12.629975,15.130581,0.831568,28.272125,26.229685,14.582193,6563.914573,264.291825,6.208800,22.759382,1.188099,7.192062,22.684423,0.232025,4.975858,0.087734,0.983222,6.564858e+07,4.959340e+08,5.250390e+06,9.879792e+07,1.147826e+08,6.588681e+06,17262.314047,130405.990406,1380.591525,25978.942183,30182.131880,1732.495661,13041.372039,48829.070068,2297.477335,14707.273186,44622.023951,508.801136,26,14,3843.0,7.0,0,0,0,0.0,2.0,1,0,1,0,0,0,1,0,0,0,266800.0,1405.0,0.50,0.30,0.017,0.104,0.02,0.01,0.02,0.079,0.081,0.000002,1.124438e-06,6.371814e-08,3.898051e-07,7.496252e-08,3.748126e-08,7.496252e-08,2.961019e-07,3.035982e-07,76600,0.287106,1106.0,851.745487,1.286227,-0.833243,-0.390571,7.876576,5.187843,4.629604,5.283978,6.331582,4.512397,5.647919,5.563581,0.0,0.0,0.0,1660.0,1314.0,0.0,1734.0,3783.0,1314.0,0.0,33.0,0.0,1314.0,0.0,870.0,1415.0,1.0,0.0,5.0,8.0,10,3710,15,3,0.000037,0.013906,0.200000,0.087417,0.025163,0.131707,0.056601,0.229480,0.088694,0.410350,0.396646,-0.331454,-0.079962,0.209899,0.179913,0.224604,0.197226,0.261390,0.201279,0.266822,0.186857,0.191619,0.226362,4.242640,-0.104070,-0.094874,0.093371,0.268608,0.169085,-0.352519,0.493352,-0.495007,-0.461225,-0.113295,0.112985,0.055647,0.008708,0.029238,0.056761,0.063924,0.046655,0.014892,0.030847,0.070692,3.872983,27491.474689,456447.069026,0.000030,0.017107,0.039690,995.800556,0.080599,0.000040,78.160004,48031.796573,0.376164,0.019135,0.654483,226.706080,581.817842,-28819.451392,0.748620,0.000691,131.372266,0.008791,0.000010,2143.415204,2099.820115,0.112523,153.384561,77.099998,0.113029,0.804628,2.114738e+06,0.617407,0.489351,49.480,0.000530,1.030000,574.647149,-1247.820166,-5539.648227,0.999284,53627.801959,2.588853,0.996035,-193.589012,2.895442,-1.428536,512.295426,1429.0,0.995234,1.808791,17.310259,2913.329408
512320,3281,204139.850108,390.420212,6.857118,56.626062,1.016596,13.776500,20.953071,0.772488,28.442658,26.432436,14.911266,7688.084786,242.283799,6.598437,24.151341,1.608945,7.782328,23.908767,0.236470,4.749328,0.141237,0.655360,4.690546e+07,3.839843e+08,6.466457e+06,9.173897e+07,1.354799e+08,5.219883e+06,14296.086335,117032.712352,1970.879806,27960.673621,41292.265092,1590.942698,13772.352381,51525.321369,2992.428589,15803.015948,46076.419562,519.163936,26,15,3305.0,16.0,0,0,0,0.0,1.0,0,0,0,0,0,0,0,0,0,0,276100.0,1398.0,0.61,0.31,0.025,0.115,0.03,0.01,0.03,0.086,0.066,0.000002,1.122782e-06,9.054690e-08,4.165158e-07,1.086563e-07,3.621876e-08,1.086563e-07,3.114813e-07,2.390438e-07,64200,0.232524,1054.0,836.458333,1.445606,-0.553893,-3.405646,7.067086,2.816972,4.777777,5.021437,6.564477,4.311214,5.549003,5.480371,0.0,0.0,0.0,0.0,438.0,0.0,859.0,3150.0,438.0,0.0,62.0,0.0,438.0,0.0,434.0,1058.0,1.0,0.0,4.0,8.0,20,4570,15,3,0.000072,0.016552,0.200000,0.066897,0.042841,0.163913,0.069685,0.253252,0.028486,0.381380,0.400823,-0.304269,-0.125081,0.221148,0.169650,0.241882,0.201416,0.274802,0.183047,0.281025,0.196487,0.155816,0.234600,3.872983,-0.104070,-0.094874,0.093371,0.268608,0.169085,-0.352519,0.493352,-0.495007,-0.461225,-0.113295,0.112985,0.055647,0.008708,0.029238,0.056761,0.063924,0.046655,0.014892,0.030847,0.070692,3.872983,-2369.155166,-71854.647042,0.000018,0.031325,0.049752,393.850468,0.090105,0.000014,78.160004,45200.696418,0.111757,0.047707,0.540079,269.558489,518.670991,8638.124638,0.722036,0.000730,395.729140,0.027961,0.000018,204.473265,2138.194351,0.140852,338.679773,78.160004,0.141391,0.874124,2.036918e+06,0.512649,0.589759,54.430,0.000037,1.030000,640.219739,-23.743804,-89.384119,0.999007,37608.502136,19.662146,0.994170,211.750649,52.228351,-1.524315,504.083488,1581.0,0.993037,1.756305,21.648034,3036.838431
512321,3620,203045.139149,385.978217,6.075653,54.750024,0.971723,14.838620,22.604694,0.743987,27.847579,27.110961,15.110062,5914.722753,255.687099,6.902912,21.863376,1.252370,8.081957,20.558332,0.208407,4.733242,0.493382,0.854355,4.532896e+07,4.047396e+08,6.971648e+06,1.083322e+08,1.640471e+08,5.490289e+06,12521.812758,111806.518678,1925.869725,29926.025952,45316.891016,1516.654440,14269.994093,45872.532849,2389.602262,16162.428239,40923.238233,444.977376,26,16,3660.0,16.0,0,0,0,0.0,1.0,0,0,0,0,0,0,0,0,0,0,275800.0,1408.0,0.38,0.27,0.021,0.100,0.02,0.01,0.03,0.076,0.060,0.000001,9.789703e-07,7.614213e-08,3.625816e-07,7.251632e-08,3.625816e-08,1.087745e-07,2.755620e-07,2.175489e-07,66200,0.240029,1166.0,808.969178,1.533904,-1.131414,7.823157,8.000347,7.026843,4.192688,5.539564,5.876938,5.048804,5.455830,5.727036,0.0,0.0,0.0,371.0,566.0,0.0,704.0,3654.0,104.0,0.0,38.0,0.0,462.0,0.0,355.0,1169.0,2.0,0.0,4.0,9.0,10,3080,11,3,0.000036,0.011168,0.272727,0.073424,0.016827,0.158106,0.089560,0.237454,0.053767,0.387097,0.410224,-0.303407,-0.097933,0.225419,0.185495,0.235674,0.195798,0.265682,0.213789,0.258780,0.188443,0.174841,0.231982,4.123106,-0.088178,-0.100038,0.094381,0.272016,0.164618,-0.344342,0.486909,-0.496324,-0.461316,-0.122848,0.128299,0.064207,0.009978,0.033499,0.065716,0.072949,0.053034,0.017202,0.036021,0.080451,3.316625,50.716438,-298452.610858,0.000011,0.024137,0.043772,1006.561774,0.066352,0.000006,78.610001,53715.803051,0.158619,0.004218,0.485635,255.978370,520.632590,3797.144365,0.672376,0.001107,454.496951,0.021623,0.000008,1574.753514,1853.472092,0.124044,295.766050,76.400002,0.124205,0.742265,1.314703e+06,0.550276,0.519061,34.890,0.000111,1.000000,246.567182,-3040.781136,-3376.004236,0.998227,57773.099814,48.541417,0.990899,604.658765,-33.462406,-1.500280,486.536418,2012.0,0.989129,2.247210,77.294987,2103.889687


In [51]:
sorted(zip(model_c.feature_importances_, train_C.columns), reverse=True)[:50]

[(0.34396577, 'truncated_NMZ'),
 (0.10533005, 'CO__index_mass_quantile__q_0.6'),
 (0.07750683, 'total_operations_опер_0'),
 (0.0370315, 'T__absolute_sum_of_changes'),
 (0.02811455, 'T__index_mass_quantile__q_0.6'),
 (0.025079371, 'unique_count'),
 (0.023421153, 'min_ratio'),
 (0.018794352, 'N2__fft_aggregated__aggtype_"variance"'),
 (0.016543178, 'gas_mean_T фурмы 2'),
 (0.012752775, 'w2v_sip_4'),
 (0.011344053, 'CO__fft_coefficient__attr_"imag"__coeff_2'),
 (0.011310781,
  'T фурмы 2__change_quantiles__f_agg_"var"__isabs_False__qh_1.0__ql_0.6'),
 (0.010854029, 'N2__quantile__q_0.7'),
 (0.010292539, 'w2v_18'),
 (0.009005749, 'AR__autocorrelation__lag_8'),
 (0.0084295655, 'w2v_sip_19'),
 (0.007422591,
  'T фурмы 2__change_quantiles__f_agg_"var"__isabs_True__qh_1.0__ql_0.4'),
 (0.007261362, 'T__fft_coefficient__attr_"imag"__coeff_1'),
 (0.007218814, 'VES'),
 (0.006763379, 'N2__quantile__q_0.9'),
 (0.006727357, 'max_mass'),
 (0.0063897823, 'CO2__sum_values'),
 (0.0063811927, 'gas_sum_volu

In [47]:
res.to_csv("xgb_no_NMZ.csv", index=False)