# Imports

In [1]:
!pip install catboost



In [2]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from catboost import CatBoostRegressor
from sklearn.svm import SVR, LinearSVR
from sklearn.ensemble import StackingRegressor
from sklearn.metrics import mean_absolute_error

rng = np.random.RandomState(42)

# Data

In [5]:
data_path = 'https://raw.githubusercontent.com/antbartash/max_temp/master/data/data_features.csv'
data = pd.read_csv(data_path)
data['DATE'] = data['DATE'].astype('datetime64[ns]')

X_train = data.loc[data['DATE'].dt.year <= 2021].drop(columns=['TARGET', 'DATE']).copy()
y_train = data.loc[data['DATE'].dt.year <= 2021, 'TARGET'].copy()
X_valid = data.loc[data['DATE'].dt.year == 2022].drop(columns=['TARGET', 'DATE']).copy()
y_valid = data.loc[data['DATE'].dt.year == 2022, 'TARGET'].copy()
X_test = data.loc[data['DATE'].dt.year == 2023].drop(columns=['TARGET', 'DATE']).copy()
y_test = data.loc[data['DATE'].dt.year == 2023, 'TARGET'].copy()

print(f'Train: {X_train.shape}, {y_train.shape}')
print(f'Valid: {X_valid.shape}, {y_valid.shape}')
print(f'Test: {X_test.shape}, {y_test.shape}')

Train: (34938, 52), (34938,)
Valid: (2920, 52), (2920,)
Test: (2920, 52), (2920,)


# Base models

In [3]:
svr_model = SVR(
    kernel='poly', degree=2, coef0=1,
    C=0.75, gamma='scale',
    max_iter=100000
)
linearsvr_model = LinearSVR(
    max_iter=100000, random_state=rng
)
catboost_model = CatBoostRegressor(
    n_estimators=571, learning_rate=0.035, depth=7,
    l2_leaf_reg=18.65, random_strength=0.1243, bagging_temperature=79.24,
    grow_policy='SymmetricTree', verbose=0, random_state=0
)
regression_model = LinearRegression()

In [7]:
model = StackingRegressor(
    estimators=[
      #('svr', svr_model),
      ('linearsvr', linearsvr_model),
      ('catboost', catboost_model),
      #('regression', regression_model)
    ],
    final_estimator=LinearRegression()
)
model.fit(X_train, y_train)

print(f'Train MAE: {mean_absolute_error(y_train, model.predict(X_train))}')
print(f'Valid MAE: {mean_absolute_error(y_valid, model.predict(X_valid))}')



Train MAE: 2.812657502870887
Valid MAE: 3.0954233216302423


In [8]:
model = StackingRegressor(
    estimators=[
      ('svr', svr_model),
      ('linearsvr', linearsvr_model),
      ('catboost', catboost_model),
      ('regression', regression_model)
    ],
    final_estimator=LinearRegression()
)
model.fit(X_train, y_train)

print(f'Train MAE: {mean_absolute_error(y_train, model.predict(X_train))}')
print(f'Valid MAE: {mean_absolute_error(y_valid, model.predict(X_valid))}')



Train MAE: 2.847360957441589
Valid MAE: 3.110927279246434
