# Imports

In [1]:
!pip install --upgrade scikit-learn

Collecting scikit-learn
  Downloading scikit_learn-1.6.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (18 kB)
Downloading scikit_learn-1.6.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.5/13.5 MB[0m [31m74.8 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25hInstalling collected packages: scikit-learn
  Attempting uninstall: scikit-learn
    Found existing installation: scikit-learn 1.2.2
    Uninstalling scikit-learn-1.2.2:
      Successfully uninstalled scikit-learn-1.2.2
Successfully installed scikit-learn-1.6.0


In [2]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from catboost import CatBoostRegressor
from sklearn.svm import SVR, LinearSVR
from sklearn.ensemble import VotingRegressor
from sklearn.metrics import mean_absolute_error

rng = np.random.RandomState(42)

In [3]:
import sklearn
sklearn.__version__

'1.6.0'

# Data

In [4]:
data_path = 'https://raw.githubusercontent.com/antbartash/max_temp/master/data/data_features_w_base.csv'
data = pd.read_csv(data_path)
data['DATE'] = data['DATE'].astype('datetime64[ns]')

X_train = data.loc[data['DATE'].dt.year <= 2021].drop(columns=['TARGET', 'DATE']).copy()
y_train = data.loc[data['DATE'].dt.year <= 2021, 'TARGET'].copy()
X_valid = data.loc[data['DATE'].dt.year == 2022].drop(columns=['TARGET', 'DATE']).copy()
y_valid = data.loc[data['DATE'].dt.year == 2022, 'TARGET'].copy()
X_test = data.loc[data['DATE'].dt.year == 2023].drop(columns=['TARGET', 'DATE']).copy()
y_test = data.loc[data['DATE'].dt.year == 2023, 'TARGET'].copy()

print(f'Train: {X_train.shape}, {y_train.shape}')
print(f'Valid: {X_valid.shape}, {y_valid.shape}')
print(f'Test: {X_test.shape}, {y_test.shape}')

Train: (34938, 66), (34938,)
Valid: (2920, 66), (2920,)
Test: (2920, 66), (2920,)


# Base models

In [5]:
svr_model = SVR(
    kernel='poly', degree=2, coef0=5,
    C=0.75, gamma='scale',
    max_iter=100000
)
linearsvr_model = LinearSVR(
    max_iter=100000, random_state=rng
)
catboost_model = CatBoostRegressor(
    n_estimators=900, depth=7,
    l2_leaf_reg=6.5, random_strength=0.1225, bagging_temperature=100, 
    grow_policy='SymmetricTree', verbose=100, random_state=42
)
regression_model = LinearRegression()

# 2 voting models

In [6]:
model = VotingRegressor([
    ('svr', svr_model),
    ('linearsvr', linearsvr_model),
    #('catboost', catboost_model),
    #('regression', regression_model)
], n_jobs=-1)
model.fit(X_train, y_train)

print(f'Train MAE: {mean_absolute_error(y_train, model.predict(X_train))}')
print(f'Valid MAE: {mean_absolute_error(y_valid, model.predict(X_valid))}')

Train MAE: 2.8829136858104025
Valid MAE: 3.102251974873919


In [7]:
model = VotingRegressor([
    ('svr', svr_model),
    #('linearsvr', linearsvr_model),
    ('catboost', catboost_model),
    #('regression', regression_model)
])
model.fit(X_train, y_train)

print(f'Train MAE: {mean_absolute_error(y_train, model.predict(X_train))}')
print(f'Valid MAE: {mean_absolute_error(y_valid, model.predict(X_valid))}')

0:	learn: 10.4657631	total: 73.4ms	remaining: 1m 5s
100:	learn: 3.9695458	total: 1.36s	remaining: 10.7s
200:	learn: 3.8537351	total: 2.56s	remaining: 8.92s
300:	learn: 3.8177686	total: 3.7s	remaining: 7.37s
400:	learn: 3.7792069	total: 4.87s	remaining: 6.06s
500:	learn: 3.7418762	total: 6.04s	remaining: 4.81s
600:	learn: 3.7050616	total: 7.23s	remaining: 3.6s
700:	learn: 3.6748319	total: 8.41s	remaining: 2.39s
800:	learn: 3.6461959	total: 9.64s	remaining: 1.19s
899:	learn: 3.6190808	total: 10.8s	remaining: 0us
Train MAE: 2.793505276142296
Valid MAE: 3.102727957405644


In [8]:
model = VotingRegressor([
    ('svr', svr_model),
    #('linearsvr', linearsvr_model),
    #('catboost', catboost_model),
    ('regression', regression_model)
])
model.fit(X_train, y_train)

print(f'Train MAE: {mean_absolute_error(y_train, model.predict(X_train))}')
print(f'Valid MAE: {mean_absolute_error(y_valid, model.predict(X_valid))}')

Train MAE: 2.8877960410252324
Valid MAE: 3.107045344841004


In [9]:
model = VotingRegressor([
    #('svr', svr_model),
    ('linearsvr', linearsvr_model),
    ('catboost', catboost_model),
    #('regression', regression_model)
])
model.fit(X_train, y_train)

print(f'Train MAE: {mean_absolute_error(y_train, model.predict(X_train))}')
print(f'Valid MAE: {mean_absolute_error(y_valid, model.predict(X_valid))}')



0:	learn: 10.4657631	total: 19.3ms	remaining: 17.4s
100:	learn: 3.9695458	total: 1.32s	remaining: 10.5s
200:	learn: 3.8537351	total: 2.56s	remaining: 8.89s
300:	learn: 3.8177686	total: 3.71s	remaining: 7.38s
400:	learn: 3.7792069	total: 4.9s	remaining: 6.1s
500:	learn: 3.7418762	total: 6.16s	remaining: 4.9s
600:	learn: 3.7050616	total: 7.36s	remaining: 3.66s
700:	learn: 3.6748319	total: 8.55s	remaining: 2.43s
800:	learn: 3.6461959	total: 9.72s	remaining: 1.2s
899:	learn: 3.6190808	total: 10.9s	remaining: 0us
Train MAE: 2.7832661222341817
Valid MAE: 3.087741799506241


In [10]:
model = VotingRegressor([
    #('svr', svr_model),
    ('linearsvr', linearsvr_model),
    #('catboost', catboost_model),
    ('regression', regression_model)
])
model.fit(X_train, y_train)

print(f'Train MAE: {mean_absolute_error(y_train, model.predict(X_train))}')
print(f'Valid MAE: {mean_absolute_error(y_valid, model.predict(X_valid))}')

Train MAE: 2.887611357684345
Valid MAE: 3.1003048243890476




In [11]:
model = VotingRegressor([
    #('svr', svr_model),
    #('linearsvr', linearsvr_model),
    ('catboost', catboost_model),
    ('regression', regression_model)
])
model.fit(X_train, y_train)

print(f'Train MAE: {mean_absolute_error(y_train, model.predict(X_train))}')
print(f'Valid MAE: {mean_absolute_error(y_valid, model.predict(X_valid))}')

0:	learn: 10.4657631	total: 19.1ms	remaining: 17.1s
100:	learn: 3.9695458	total: 1.31s	remaining: 10.3s
200:	learn: 3.8537351	total: 2.51s	remaining: 8.74s
300:	learn: 3.8177686	total: 3.65s	remaining: 7.28s
400:	learn: 3.7792069	total: 4.83s	remaining: 6.01s
500:	learn: 3.7418762	total: 6.02s	remaining: 4.79s
600:	learn: 3.7050616	total: 7.28s	remaining: 3.62s
700:	learn: 3.6748319	total: 8.47s	remaining: 2.4s
800:	learn: 3.6461959	total: 9.65s	remaining: 1.19s
899:	learn: 3.6190808	total: 10.8s	remaining: 0us
Train MAE: 2.804702528787616
Valid MAE: 3.1115293855172776


# 3 voting models

In [12]:
model = VotingRegressor([
    #('svr', svr_model),
    ('linearsvr', linearsvr_model),
    ('catboost', catboost_model),
    ('regression', regression_model)
])
model.fit(X_train, y_train)

print(f'Train MAE: {mean_absolute_error(y_train, model.predict(X_train))}')
print(f'Valid MAE: {mean_absolute_error(y_valid, model.predict(X_valid))}')



0:	learn: 10.4657631	total: 19.4ms	remaining: 17.4s
100:	learn: 3.9695458	total: 1.31s	remaining: 10.3s
200:	learn: 3.8537351	total: 2.52s	remaining: 8.75s
300:	learn: 3.8177686	total: 3.72s	remaining: 7.4s
400:	learn: 3.7792069	total: 4.9s	remaining: 6.09s
500:	learn: 3.7418762	total: 6.07s	remaining: 4.84s
600:	learn: 3.7050616	total: 7.27s	remaining: 3.62s
700:	learn: 3.6748319	total: 8.46s	remaining: 2.4s
800:	learn: 3.6461959	total: 9.64s	remaining: 1.19s
899:	learn: 3.6190808	total: 10.8s	remaining: 0us
Train MAE: 2.820861372872819
Valid MAE: 3.095404459887378


In [13]:
model = VotingRegressor([
    ('svr', svr_model),
    #('linearsvr', linearsvr_model),
    ('catboost', catboost_model),
    ('regression', regression_model)
])
model.fit(X_train, y_train)

print(f'Train MAE: {mean_absolute_error(y_train, model.predict(X_train))}')
print(f'Valid MAE: {mean_absolute_error(y_valid, model.predict(X_valid))}')

0:	learn: 10.4657631	total: 19ms	remaining: 17s
100:	learn: 3.9695458	total: 1.31s	remaining: 10.4s
200:	learn: 3.8537351	total: 2.54s	remaining: 8.82s
300:	learn: 3.8177686	total: 3.7s	remaining: 7.36s
400:	learn: 3.7792069	total: 4.94s	remaining: 6.15s
500:	learn: 3.7418762	total: 6.13s	remaining: 4.88s
600:	learn: 3.7050616	total: 7.38s	remaining: 3.67s
700:	learn: 3.6748319	total: 9s	remaining: 2.56s
800:	learn: 3.6461959	total: 10.3s	remaining: 1.27s
899:	learn: 3.6190808	total: 11.5s	remaining: 0us
Train MAE: 2.8234494593637387
Valid MAE: 3.1007188741200453


In [14]:
model = VotingRegressor([
    ('svr', svr_model),
    ('linearsvr', linearsvr_model),
    #('catboost', catboost_model),
    ('regression', regression_model)
])
model.fit(X_train, y_train)

print(f'Train MAE: {mean_absolute_error(y_train, model.predict(X_train))}')
print(f'Valid MAE: {mean_absolute_error(y_valid, model.predict(X_valid))}')



Train MAE: 2.8824097629492234
Valid MAE: 3.099280601313419


In [15]:
model = VotingRegressor([
    ('svr', svr_model),
    ('linearsvr', linearsvr_model),
    ('catboost', catboost_model),
    #('regression', regression_model)
])
model.fit(X_train, y_train)

print(f'Train MAE: {mean_absolute_error(y_train, model.predict(X_train))}')
print(f'Valid MAE: {mean_absolute_error(y_valid, model.predict(X_valid))}')



0:	learn: 10.4657631	total: 20ms	remaining: 18s
100:	learn: 3.9695458	total: 1.32s	remaining: 10.4s
200:	learn: 3.8537351	total: 2.56s	remaining: 8.91s
300:	learn: 3.8177686	total: 3.79s	remaining: 7.55s
400:	learn: 3.7792069	total: 4.99s	remaining: 6.21s
500:	learn: 3.7418762	total: 6.17s	remaining: 4.91s
600:	learn: 3.7050616	total: 7.39s	remaining: 3.67s
700:	learn: 3.6748319	total: 8.58s	remaining: 2.43s
800:	learn: 3.6461959	total: 9.77s	remaining: 1.21s
899:	learn: 3.6190808	total: 11s	remaining: 0us
Train MAE: 2.815382195154274
Valid MAE: 3.092082956214196


# 4 voting models

In [16]:
model = VotingRegressor([
    ('svr', svr_model),
    ('linearsvr', linearsvr_model),
    ('catboost', catboost_model),
    ('regression', regression_model)
])
model.fit(X_train, y_train)

print(f'Train MAE: {mean_absolute_error(y_train, model.predict(X_train))}')
print(f'Valid MAE: {mean_absolute_error(y_valid, model.predict(X_valid))}')



0:	learn: 10.4657631	total: 19.6ms	remaining: 17.6s
100:	learn: 3.9695458	total: 1.33s	remaining: 10.5s
200:	learn: 3.8537351	total: 2.56s	remaining: 8.89s
300:	learn: 3.8177686	total: 3.71s	remaining: 7.39s
400:	learn: 3.7792069	total: 4.9s	remaining: 6.09s
500:	learn: 3.7418762	total: 6.1s	remaining: 4.86s
600:	learn: 3.7050616	total: 7.37s	remaining: 3.67s
700:	learn: 3.6748319	total: 8.56s	remaining: 2.43s
800:	learn: 3.6461959	total: 9.76s	remaining: 1.21s
899:	learn: 3.6190808	total: 10.9s	remaining: 0us
Train MAE: 2.833328620843351
Valid MAE: 3.0943743240824473
