# Imports

In [1]:
!pip install catboost



In [2]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from catboost import CatBoostRegressor
from sklearn.svm import SVR, LinearSVR
from sklearn.ensemble import StackingRegressor
from sklearn.metrics import mean_absolute_error

rng = np.random.RandomState(42)

# Data

In [3]:
data_path = 'https://raw.githubusercontent.com/antbartash/max_temp/master/data/data_features_w_base.csv'
data = pd.read_csv(data_path)
data['DATE'] = data['DATE'].astype('datetime64[ns]')

X_train = data.loc[data['DATE'].dt.year <= 2021].drop(columns=['TARGET', 'DATE']).copy()
y_train = data.loc[data['DATE'].dt.year <= 2021, 'TARGET'].copy()
X_valid = data.loc[data['DATE'].dt.year == 2022].drop(columns=['TARGET', 'DATE']).copy()
y_valid = data.loc[data['DATE'].dt.year == 2022, 'TARGET'].copy()
X_test = data.loc[data['DATE'].dt.year == 2023].drop(columns=['TARGET', 'DATE']).copy()
y_test = data.loc[data['DATE'].dt.year == 2023, 'TARGET'].copy()

print(f'Train: {X_train.shape}, {y_train.shape}')
print(f'Valid: {X_valid.shape}, {y_valid.shape}')
print(f'Test: {X_test.shape}, {y_test.shape}')

Train: (34938, 66), (34938,)
Valid: (2920, 66), (2920,)
Test: (2920, 66), (2920,)


# Base models

In [4]:
svr_model = SVR(
    kernel='poly', degree=2, coef0=5,
    C=0.75, gamma='scale',
    max_iter=100000
)
linearsvr_model = LinearSVR(
    max_iter=100000, random_state=rng
)
catboost_model = CatBoostRegressor(
    n_estimators=900, depth=7,
    l2_leaf_reg=6.5, random_strength=0.1225, bagging_temperature=100, 
    grow_policy='SymmetricTree', verbose=100, random_state=42
)
regression_model = LinearRegression()

In [5]:
model = StackingRegressor(
    estimators=[
      #('svr', svr_model),
      ('linearsvr', linearsvr_model),
      ('catboost', catboost_model),
      #('regression', regression_model)
    ],
    final_estimator=LinearRegression()
)
model.fit(X_train, y_train)

print(f'Train MAE: {mean_absolute_error(y_train, model.predict(X_train))}')
print(f'Valid MAE: {mean_absolute_error(y_valid, model.predict(X_valid))}')



0:	learn: 10.4657631	total: 81ms	remaining: 1m 12s
100:	learn: 3.9695458	total: 1.42s	remaining: 11.3s
200:	learn: 3.8537351	total: 2.65s	remaining: 9.24s
300:	learn: 3.8177686	total: 3.81s	remaining: 7.59s
400:	learn: 3.7792069	total: 5s	remaining: 6.23s
500:	learn: 3.7418762	total: 6.19s	remaining: 4.93s
600:	learn: 3.7050616	total: 7.46s	remaining: 3.71s
700:	learn: 3.6748319	total: 8.66s	remaining: 2.46s
800:	learn: 3.6461959	total: 9.87s	remaining: 1.22s
899:	learn: 3.6190808	total: 11.1s	remaining: 0us




0:	learn: 10.6672574	total: 19.1ms	remaining: 17.2s
100:	learn: 4.1099173	total: 1.23s	remaining: 9.71s
200:	learn: 3.9858404	total: 2.37s	remaining: 8.25s
300:	learn: 3.9447533	total: 3.47s	remaining: 6.91s
400:	learn: 3.9013298	total: 4.59s	remaining: 5.71s
500:	learn: 3.8610015	total: 5.71s	remaining: 4.55s
600:	learn: 3.8159437	total: 6.92s	remaining: 3.44s
700:	learn: 3.7786199	total: 8.04s	remaining: 2.28s
800:	learn: 3.7411135	total: 9.2s	remaining: 1.14s
899:	learn: 3.7008193	total: 10.4s	remaining: 0us
0:	learn: 10.4460264	total: 18.8ms	remaining: 16.9s
100:	learn: 3.7378768	total: 1.23s	remaining: 9.7s
200:	learn: 3.6163708	total: 2.37s	remaining: 8.24s
300:	learn: 3.5848824	total: 3.46s	remaining: 6.89s
400:	learn: 3.5491966	total: 4.57s	remaining: 5.69s
500:	learn: 3.5093384	total: 5.69s	remaining: 4.53s
600:	learn: 3.4725749	total: 6.87s	remaining: 3.42s
700:	learn: 3.4396203	total: 8s	remaining: 2.27s
800:	learn: 3.4042509	total: 9.17s	remaining: 1.13s
899:	learn: 3.36773

In [6]:
model = StackingRegressor(
    estimators=[
      ('svr', svr_model),
      ('linearsvr', linearsvr_model),
      ('catboost', catboost_model),
      ('regression', regression_model)
    ],
    final_estimator=LinearRegression()
)
model.fit(X_train, y_train)

print(f'Train MAE: {mean_absolute_error(y_train, model.predict(X_train))}')
print(f'Valid MAE: {mean_absolute_error(y_valid, model.predict(X_valid))}')



0:	learn: 10.4657631	total: 24.5ms	remaining: 22.1s
100:	learn: 3.9695458	total: 1.37s	remaining: 10.9s
200:	learn: 3.8537351	total: 2.62s	remaining: 9.1s
300:	learn: 3.8177686	total: 3.8s	remaining: 7.55s
400:	learn: 3.7792069	total: 5.01s	remaining: 6.23s
500:	learn: 3.7418762	total: 6.22s	remaining: 4.96s
600:	learn: 3.7050616	total: 7.47s	remaining: 3.71s
700:	learn: 3.6748319	total: 8.74s	remaining: 2.48s
800:	learn: 3.6461959	total: 9.96s	remaining: 1.23s
899:	learn: 3.6190808	total: 11.2s	remaining: 0us




0:	learn: 10.6672574	total: 18.5ms	remaining: 16.6s
100:	learn: 4.1099173	total: 1.23s	remaining: 9.73s
200:	learn: 3.9858404	total: 2.39s	remaining: 8.31s
300:	learn: 3.9447533	total: 3.49s	remaining: 6.94s
400:	learn: 3.9013298	total: 4.61s	remaining: 5.73s
500:	learn: 3.8610015	total: 5.75s	remaining: 4.58s
600:	learn: 3.8159437	total: 6.9s	remaining: 3.43s
700:	learn: 3.7786199	total: 8.09s	remaining: 2.29s
800:	learn: 3.7411135	total: 9.23s	remaining: 1.14s
899:	learn: 3.7008193	total: 10.4s	remaining: 0us
0:	learn: 10.4460264	total: 18.8ms	remaining: 16.9s
100:	learn: 3.7378768	total: 1.24s	remaining: 9.82s
200:	learn: 3.6163708	total: 2.4s	remaining: 8.35s
300:	learn: 3.5848824	total: 3.5s	remaining: 6.96s
400:	learn: 3.5491966	total: 4.61s	remaining: 5.74s
500:	learn: 3.5093384	total: 5.74s	remaining: 4.57s
600:	learn: 3.4725749	total: 6.92s	remaining: 3.44s
700:	learn: 3.4396203	total: 8.05s	remaining: 2.29s
800:	learn: 3.4042509	total: 9.21s	remaining: 1.14s
899:	learn: 3.367