In [147]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import StackingRegressor
import pandas as pd
import numpy as np

from catboost import CatBoostRegressor, Pool, cv
import xgboost as xgb

from sklearn.model_selection import train_test_split

In [148]:
def mape(y_true, y_pred):
    return np.mean(np.abs((y_pred-y_true)/y_true))

RANDOM_SEED=42

In [149]:
data = pd.read_csv('data_for_stacking.csv')
cat_features_ids = ['bodyType', 'brand', 'color','descr_labels', 'fuelType', 
                    'model_name', 'vehicleTransmission',
                    'pts', 'privod', 'wheel', 'state', 'descr_labels']

for colum in cat_features_ids:
    data[colum] = data[colum].astype('category').cat.codes

data.drop('Unnamed: 0', axis=1, inplace=True, errors='ignore')

MemoryError: Unable to allocate 16.0 KiB for an array with shape (2048,) and data type int64

In [140]:
data[data['sample']==1].equip_len.unique()

array([  0,   3,  37,  49,  40,  11,   9,   1,  43,  44,  35,  72,  20,
        16,  29,  32,  51,  25,  19,  39,  28,  36,  13,  12,  56,  54,
        18,  61,  65,  41,  57,  53,   7,  63,  38,  58,  75,  67,  83,
        34,  59,  68,  42,   2,  21,   5,  50,  17,  30,  27,  24,  23,
        55,  31,  26,   6,  14,  48,   8,   4,  22,  10,  33,  15,  64,
        45,  47,  71,  46,  60,  76,  52,  74,  62,  84,  69,  70,  89,
        80,  66,  81,  85,  82,  73,  92,  94,  77,  79,  98,  99,  93,
        88,  96,  78,  86,  87,  90,  95, 104,  97, 106,  91, 100, 105,
       103, 102, 115, 107, 110, 101, 111, 109, 108, 120, 113], dtype=int64)

In [141]:
X = data.query('sample == 1').drop(['sample'], axis=1)
y = X['price']

X.drop('price', axis=1, inplace=True, errors='ignore')
X_sub = data.query('sample == 0').drop(['sample'], axis=1)

VAL_SIZE=0.20
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=VAL_SIZE, shuffle=True, random_state=RANDOM_SEED)

In [142]:
y_train = np.log(y_train)
y_test = np.log(y_test)

In [143]:
catboost_params = {
    'iterations': 5000,
    'learning_rate': 0.05775539388456,
    'depth': 12,
    'random_seed': RANDOM_SEED,
    'eval_metric': 'MAPE',
    'custom_metric': ['R2', 'MAE'],
    'l2_leaf_reg': 1,
    "loss_function": "MAPE"
}

train_pool = Pool(
    X_train,
    y_train,
    cat_features=cat_features_ids,
) 
test_pool = Pool(
    X_test,
    y_test,
    cat_features=cat_features_ids,
)

model_catb = CatBoostRegressor(iterations = catboost_params['iterations'],
                          learning_rate = catboost_params['learning_rate'],
                          random_seed = RANDOM_SEED,
                          eval_metric=catboost_params['eval_metric'],
                          custom_metric=catboost_params['custom_metric'],
                          l2_leaf_reg=catboost_params['l2_leaf_reg'],
                          depth=catboost_params['depth'],
                          metric_period=catboost_params['depth'],
                          od_type='Iter',
                          od_wait=20,
                          rsm=0.2,
                          devices='GPU'
                         )
model_catb.fit(train_pool,
         eval_set=test_pool,
         verbose_eval=catboost_params['depth']*4,
         use_best_model=True,
         plot=False
         )

0:	learn: 0.0704868	test: 0.0706362	best: 0.0706362 (0)	total: 528ms	remaining: 44m 1s
48:	learn: 0.0130970	test: 0.0133373	best: 0.0133373 (48)	total: 24.2s	remaining: 40m 46s
96:	learn: 0.0106379	test: 0.0110393	best: 0.0110393 (96)	total: 47.9s	remaining: 40m 23s
144:	learn: 0.0099386	test: 0.0105261	best: 0.0105261 (144)	total: 1m 11s	remaining: 40m 2s
192:	learn: 0.0094102	test: 0.0101787	best: 0.0101787 (192)	total: 1m 35s	remaining: 39m 30s
240:	learn: 0.0089125	test: 0.0098906	best: 0.0098906 (240)	total: 1m 59s	remaining: 39m 14s
288:	learn: 0.0085308	test: 0.0096830	best: 0.0096830 (288)	total: 2m 22s	remaining: 38m 47s
336:	learn: 0.0082050	test: 0.0095304	best: 0.0095304 (336)	total: 2m 46s	remaining: 38m 24s
384:	learn: 0.0079127	test: 0.0093996	best: 0.0093996 (384)	total: 3m 10s	remaining: 37m 58s
432:	learn: 0.0076489	test: 0.0092833	best: 0.0092833 (432)	total: 3m 34s	remaining: 37m 40s
480:	learn: 0.0074255	test: 0.0091792	best: 0.0091792 (480)	total: 3m 58s	remaining

<catboost.core.CatBoostRegressor at 0x23c144912c8>

In [None]:
predict_catb = model_catb.predict(X_test)

predict_catb = np.e ** predict_catb
y_test = np.e ** y_test

# оцениваем точность
MAPE = f'{(mape(y_test, predict_catb))*100:0.4f}'
print(f"Точность модели по метрике MAPE: {MAPE}%")

In [145]:
model_xgb = xgb.XGBRegressor(base_score=0.5, 
                             booster='gbtree',
                             colsample_bylevel=1,
                             colsample_bynode=1, 
                             colsample_bytree=1, 
                             gamma=0, #0.15340366103115533, #0,
                             importance_type='gain', 
                             learning_rate=0.08,
                             max_delta_step=0,
                             max_depth=12, #6, #7, 
                             min_child_weight=1, 
                             missing=None, 
                             n_estimators=5000,
                             n_jobs=20, 
                             nthread=None, 
                             random_state=0,
                             reg_alpha=0, #1.8400184528746324, #0, 
                             reg_lambda=1, #1.0868061353806249,#1, 
                             scale_pos_weight=1, 
                             seed=RANDOM_SEED,
                             silent=None, 
                             subsample=0.75, 
                             verbosity=0, 
                             objective='reg:squarederror',
                             verbose=True#, eval_metric=mape
                            )
                            
model_xgb.fit(X_train, y_train, eval_metric=mape)

OSError: [WinError -529697949] Windows Error 0xe06d7363

In [126]:
predict_xgb = model_xgb.predict(X_test)

predict_xgb = np.e ** predict_xgb
# y_test = np.e ** y_test

# оцениваем точность
MAPE = f'{(mape(y_test, predict_xgb))*100:0.4f}'
print(f"Точность модели по метрике MAPE: {MAPE}%")

Точность модели по метрике MAPE: 9.7719%


# FINAL PREDICT

In [None]:
predict_fin = np.mean(np.array([predict_catb, predict_xgb]), axis=0)

# оцениваем точность
MAPE = f'{(mape(y_test, predict_fin))*100:0.4f}'
print(f"Точность модели по метрике MAPE: {MAPE}%")

In [None]:
predict_fin_1k = np.round(predict_fin // 1000) * 1000
MAPE = f'{(mape(y_test, predict_fin_1k))*100:0.4f}'
print(f"Точность модели по метрике MAPE: {MAPE}%")

In [None]:
predict_fin_10k = np.round(predict_fin // 10000) * 10000
MAPE = f'{(mape(y_test, predict_fin_10k))*100:0.4f}'
print(f"Точность модели по метрике MAPE: {MAPE}%")

array([ 11,  29,  50,   0,  90,  40,  51,   1,  61,  35,  26,  16,  48,
        56,  58,  22,  42,  73,  47,  20,  44,   4,  19,  54,  32,   5,
        23,  57,  46,  12,  14,  33,  43,  66,  79,  76,  59,  53,  78,
        69,  27,  30,   3,  49,  34,   6,  24,  96,  41,  91,  21,  70,
        52,  37,  72,  77,  60,  10,   8,  25,  38,  31,  36,  81,  65,
        74,  63,  13,  55,  17,  67,  62,  45,  88,   2,  39,  64,   9,
        15,  80,  71,  18,  28,  99,  83,  92, 111,   7,  85,  84,  75,
        68,  82,  89,  87,  86,  98,  97, 103, 105,  95,  94,  93, 104,
       100, 102, 101, 106, 109], dtype=int64)

In [136]:
predict_xgb_sub = model_xgb.predict(X_sub)
predict_xgb_sub = np.e ** predict_xgb_sub

predict_catb_sub = model_catb.predict(X_sub)
predict_catb_sub = np.e ** predict_catb_sub

predict_submission = np.mean(np.array([predict_xgb_sub, predict_catb_sub]), axis=0)

predict_submission = np.e ** predict_submission
predict_submission = predict_submission // 10000 * 10000
sample_submission['price'] = predict_submission
sample_submission.to_csv(f'..\kaggle\submissions_kaggle\submission_{new_version}.csv', index=False)
sample_submission.head(5)

ValueError: feature_names mismatch: ['Unnamed: 0', 'hcc', 'power-child-locks-rear-doors', 'folding-front-passenger-seat', 'steering-wheel-gear-shift-paddles', 'drowsy-driver-alert-system', 'airbag-side', 'third-rear-headrest', 'multi-wheel', 'entertainment-system-for-rear-seat-passengers', 'panorama-roof', 'seats-9', 'rear-seats-heat', 'front-seat-support', '19-inch-wheels', 'black-roof', '345', 'armored', '3FB', 'reduce-spare-wheel', '881', 'light-cleaner', 'airbag-6', 'vsm', 'fabric-seats', 'airbrush', 'PW1', 'airbag-rear-side', 'air-suspension', 'audiosystem-tv', '16-inch-wheels', 'airbag-passenger', 'light-interior', 'seat-transformation', '239', 'traffic-sign-recognition', 'wheel-configuration3', 'gbo', 'massage-seats', 'seats-2', 'feedback-alarm', 'PRD', '6R1', '401', '1N1', 'audiopreparation', 'turnbuckle', 'PCG', 'electro-trunk', '246', 'PL2', 'knee-airbag', '51U', 'voice-recognition', 'leather-gear-stick', '448', 'aux', 'lock', '242', '872', 'wheel-configuration2', 'driver-seat-support', 'B63', '23-inch-wheels', '27-inch-wheels', 'U22', '18-inch-wheels', 'adaptive-light', '12v-socket', 'P17', 'rear-camera', 'driver-seat-memory', 'electro-window-front', 'park-assist-f', 'heated-wash-system', '443', 'U25', 'seat-memory', '4A3', '17-inch-wheels', 'servo', 'climate-control-1', 'auto-mirrors', '3L5', 'PCH', 'spare-wheel', 'velvet-seats', '632', 'engine-proof', '68BF', 'laser-lights', 'RAB', 'volume-sensor', 'parktronik', 'ashtray-and-cigarette-lighter', 'night-vision', 'E0P', 'collision-prevention-assist', '086GC', '776', 'S7N', '414', '3S1', 'electro-window-all', '03B', 'bas', 'front-seats-heat', 'front-seats-heat-vent', 'wireless-charger', 'body-kit', 'e-adjustment-wheel', 'H06', 'R66', 'duo-body-color', '4R3', 'auto-cruise', '3Q6', 'electro-mirrors', 'electro-seat', 'led-lights', 'windcleaner-heat', '540', 'folding-tables-rear', '9P5', 'music-super', 'passenger-seat-manual', '9T1', '475', 'sport-suspension', 'isofix', 'roof-rails', 'blind-spot', '431', 'UI4', 'automatic-lighting-control', '6XD', '4UE', '4R2', '871', '927', 'immobiliser-not-standard', 'airbag-2', 'PB0', 'multizone-climate-control', 'wheel-configuration1', '9S6', 'wheel-power', 'door-sill-panel', 'android-auto', 'tyre-pressure', 'electronic-gage-panel', '840', 'climate-control-3', '270', 'decorative-interior-lighting', 'esp', 'adj-pedals', 'airbag-curtain', '7X5', 'apple-carplay', 'alcantara', '989', '086EG', '14U', 'lane-keeping-assist', '551', 'asr', '6XE', '4I2', '772', '351', 'start-stop-function', 'start-button', 'B16', '6E3', '313', 'activ-suspension', '364', 'passenger-seat-electric', 'DZ9', '3NZ', 'dark-interior', 'PA1', 'rain-sensor', '873', 'leather', 'wheel-memory', 'N4M', 'PQ9', '033IN', 'not_specified', 'tinted-glass', '15-inch-wheels', '9VS', '086FA', 'driver-seat-manual', '824', '728', '889', 'PN0', 'sport-pedals', 'P31', '234', 'sport-seats', '274', 'seats-4', 'airbag-driver', 'paint-metallic', '9S3', '3B3', '8T6', 'electro-rear-seat', 'drl', '7Y4', 'hatch', '3PB', 'power-latching-doors', 'navigation', 'driver-seat-updown', 'computer', 'xenon', '883', 'body-mouldings', 'L5C', 'passenger-seat-memory', 'seats-7', 'seats-heat', 'WVA', 'drive-mode-sys', 'U09', '670', '4X4', '513', '500', '810', 'ya-auto', 'wheel-leather', 'PRG', '440', 'programmed-block-heater', '14-inch-wheels', 'audiosystem', 'keyless-entry', 'audiosystem-cd', 'P29', 'seats-8', '220v-socket', '249', '25-inch-wheels', '608', 'roller-blinds-for-rear-side-windows', '531', 'easy-trunk-opening', 'laminated-safety-glass', 'cooling-box', '61U', 'immo', 'R01', '501', '597', 'usb', '360-camera', '20-inch-wheels', 'glonass', 'rear-seat-heat-vent', 'airbag-4', '6XF', 'PRA', '21-inch-wheels', 'advanced-frontlighting-system', '301', '6NQ', 'climate-control', 'windscreen-heat', '066AC', 'bluetooth', '9AB', 'alloy-wheel-disks', '8T2', 'halogen', 'seats-6', '7B2', 'steel-wheels', '7AA', 'passenger-seat-updown', 'KA2', 'eco-leather', '24-inch-wheels', '235', 'mirrors-heat', 'RDIF09', 'light-sensor', '8RM', 'wheel-heat', '17U', 'seats-5', '218', '42D', '033GQ', 'auto-park', 'front-centre-armrest', 'isofix-front', 'remote-engine-start', '882', 'PX2', '4D3', 'condition', 'driver-seat-electric', '367', '807', '041CZ', 'front-camera', 'abs', 'projection-display', '228', 'third-row-seats', 'P44', '12-inch-wheels', 'ptf', '68BD', '22-inch-wheels', 'CJ4', 'electro-window-back', 'climate-control-2', '2C5', 'cruise-control', 'alarm', '16U', '13-inch-wheels', 'combo-interior', 'dha', '9R1', '357', 'park-assist-r', '4UC', 'PNQ', 'RVR', 'high-beam-assist', 'roller-blind-for-rear-window', 'bodyType', 'brand', 'color', 'engineDisplacement', 'enginePower', 'fuelType', 'mileage', 'modelDate', 'model_name', 'numberOfDoors', 'parsing_unixtime', 'productionDate', 'sell_id', 'vehicleTransmission', 'owners', 'owning', 'pts', 'privod', 'wheel', 'state', 'mileage600', 'engpower_doors', 'engpower_endisp', 'engpower_owners', 'prod-model-date', 'car_used', 'brand_price', 'body_price', 'model_name_price', 'brand_mileage', 'model_name_mileage', 'equip_len', 'descr_labels'] ['Unnamed: 0', 'hcc', 'power-child-locks-rear-doors', 'folding-front-passenger-seat', 'steering-wheel-gear-shift-paddles', 'drowsy-driver-alert-system', 'airbag-side', 'third-rear-headrest', 'multi-wheel', 'entertainment-system-for-rear-seat-passengers', 'panorama-roof', 'seats-9', 'rear-seats-heat', 'front-seat-support', '19-inch-wheels', 'black-roof', '345', 'armored', '3FB', 'reduce-spare-wheel', '881', 'light-cleaner', 'airbag-6', 'vsm', 'fabric-seats', 'airbrush', 'PW1', 'airbag-rear-side', 'air-suspension', 'audiosystem-tv', '16-inch-wheels', 'airbag-passenger', 'light-interior', 'seat-transformation', '239', 'traffic-sign-recognition', 'wheel-configuration3', 'gbo', 'massage-seats', 'seats-2', 'feedback-alarm', 'PRD', '6R1', '401', '1N1', 'audiopreparation', 'turnbuckle', 'PCG', 'electro-trunk', '246', 'PL2', 'knee-airbag', '51U', 'voice-recognition', 'leather-gear-stick', '448', 'aux', 'lock', '242', '872', 'wheel-configuration2', 'driver-seat-support', 'B63', '23-inch-wheels', '27-inch-wheels', 'U22', '18-inch-wheels', 'adaptive-light', '12v-socket', 'P17', 'rear-camera', 'driver-seat-memory', 'electro-window-front', 'park-assist-f', 'heated-wash-system', '443', 'U25', 'seat-memory', '4A3', '17-inch-wheels', 'servo', 'climate-control-1', 'auto-mirrors', '3L5', 'PCH', 'spare-wheel', 'velvet-seats', '632', 'engine-proof', '68BF', 'laser-lights', 'RAB', 'volume-sensor', 'parktronik', 'ashtray-and-cigarette-lighter', 'night-vision', 'E0P', 'collision-prevention-assist', '086GC', '776', 'S7N', '414', '3S1', 'electro-window-all', '03B', 'bas', 'front-seats-heat', 'front-seats-heat-vent', 'wireless-charger', 'body-kit', 'e-adjustment-wheel', 'H06', 'R66', 'duo-body-color', '4R3', 'auto-cruise', '3Q6', 'electro-mirrors', 'electro-seat', 'led-lights', 'windcleaner-heat', '540', 'folding-tables-rear', '9P5', 'music-super', 'passenger-seat-manual', '9T1', '475', 'sport-suspension', 'isofix', 'roof-rails', 'blind-spot', '431', 'UI4', 'automatic-lighting-control', '6XD', '4UE', '4R2', '871', '927', 'immobiliser-not-standard', 'airbag-2', 'PB0', 'multizone-climate-control', 'wheel-configuration1', '9S6', 'wheel-power', 'door-sill-panel', 'android-auto', 'tyre-pressure', 'electronic-gage-panel', '840', 'climate-control-3', '270', 'decorative-interior-lighting', 'esp', 'adj-pedals', 'airbag-curtain', '7X5', 'apple-carplay', 'alcantara', '989', '086EG', '14U', 'lane-keeping-assist', '551', 'asr', '6XE', '4I2', '772', '351', 'start-stop-function', 'start-button', 'B16', '6E3', '313', 'activ-suspension', '364', 'passenger-seat-electric', 'DZ9', '3NZ', 'dark-interior', 'PA1', 'rain-sensor', '873', 'leather', 'wheel-memory', 'N4M', 'PQ9', '033IN', 'not_specified', 'tinted-glass', '15-inch-wheels', '9VS', '086FA', 'driver-seat-manual', '824', '728', '889', 'PN0', 'sport-pedals', 'P31', '234', 'sport-seats', '274', 'seats-4', 'airbag-driver', 'paint-metallic', '9S3', '3B3', '8T6', 'electro-rear-seat', 'drl', '7Y4', 'hatch', '3PB', 'power-latching-doors', 'navigation', 'driver-seat-updown', 'computer', 'xenon', '883', 'body-mouldings', 'L5C', 'passenger-seat-memory', 'seats-7', 'seats-heat', 'WVA', 'drive-mode-sys', 'U09', '670', '4X4', '513', '500', '810', 'ya-auto', 'wheel-leather', 'PRG', '440', 'programmed-block-heater', '14-inch-wheels', 'audiosystem', 'keyless-entry', 'audiosystem-cd', 'P29', 'seats-8', '220v-socket', '249', '25-inch-wheels', '608', 'roller-blinds-for-rear-side-windows', '531', 'easy-trunk-opening', 'laminated-safety-glass', 'cooling-box', '61U', 'immo', 'R01', '501', '597', 'usb', '360-camera', '20-inch-wheels', 'glonass', 'rear-seat-heat-vent', 'airbag-4', '6XF', 'PRA', '21-inch-wheels', 'advanced-frontlighting-system', '301', '6NQ', 'climate-control', 'windscreen-heat', '066AC', 'bluetooth', '9AB', 'alloy-wheel-disks', '8T2', 'halogen', 'seats-6', '7B2', 'steel-wheels', '7AA', 'passenger-seat-updown', 'KA2', 'eco-leather', '24-inch-wheels', '235', 'mirrors-heat', 'RDIF09', 'light-sensor', '8RM', 'wheel-heat', '17U', 'seats-5', '218', '42D', '033GQ', 'auto-park', 'front-centre-armrest', 'isofix-front', 'remote-engine-start', '882', 'PX2', '4D3', 'condition', 'driver-seat-electric', '367', '807', '041CZ', 'front-camera', 'abs', 'projection-display', '228', 'third-row-seats', 'P44', '12-inch-wheels', 'ptf', '68BD', '22-inch-wheels', 'CJ4', 'electro-window-back', 'climate-control-2', '2C5', 'cruise-control', 'alarm', '16U', '13-inch-wheels', 'combo-interior', 'dha', '9R1', '357', 'park-assist-r', '4UC', 'PNQ', 'RVR', 'high-beam-assist', 'roller-blind-for-rear-window', 'bodyType', 'brand', 'color', 'engineDisplacement', 'enginePower', 'fuelType', 'mileage', 'modelDate', 'model_name', 'numberOfDoors', 'parsing_unixtime', 'productionDate', 'sell_id', 'vehicleTransmission', 'owners', 'owning', 'pts', 'privod', 'wheel', 'state', 'price', 'mileage600', 'engpower_doors', 'engpower_endisp', 'engpower_owners', 'prod-model-date', 'car_used', 'brand_price', 'body_price', 'model_name_price', 'brand_mileage', 'model_name_mileage', 'equip_len', 'descr_labels']
training data did not have the following fields: price

In [None]:
# estimators = [
#     ('catboost', model_catb),
#     ('xgboost', model_xgb)
# ]

# stacker = StackingRegressor(
#     estimators=estimators,
#     final_estimator=RandomForestRegressor(n_estimators=10,
#                                           random_state=RANDOM_SEED),
#     n_jobs=-1,
#     cv=4
# )

# stacker.fit(X_train, y_train).score(X_test, y_test)

In [None]:
# predict = stacker.predict(X_test)

# predict = np.e ** predict
# y_test = np.e ** y_test

# # оцениваем точность
# MAPE = f'{(mape(y_test, predict))*100:0.4f}'
# print(f"Точность модели по метрике MAPE: {MAPE}%")