In [54]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import mean_squared_error, r2_score


In [55]:

# Load the dataset
data = pd.read_csv('/home/alish/code/amelietatin/Predicting_land_cover/data/interpolated_final.csv')
data.drop("Unnamed: 0", axis=1, inplace=True)

In [56]:
data

Unnamed: 0,SITECODE,date,Bare Ground,Built Area,Crops,Flooded Vegetation,Grass,Shrub and Scrub,Snow and Ice,Trees,Water,lat,lon,temperature_quarterly_mean,precipitation_quarterly_mean,water-vapor-pressure_quarterly_mean,cloud-cover_quarterly_mean
0,AT1301000,2015-07-01,0.004731,0.306967,9.740933,0.434059,3.750812,1.101940,0.000000,79.647261,4.151655,48.172354,16.530699,18.86,2.788798e-08,942.75470,0.536892
1,AT1301000,2015-10-01,0.369930,0.000000,9.580551,0.000000,5.368118,0.000000,0.032538,75.791308,5.394485,48.176358,16.224738,18.12,3.174909e-08,948.60770,0.553719
2,AT1301000,2016-01-01,1.024919,0.599189,9.420169,0.724955,6.985424,1.552991,0.000000,71.935356,6.637315,48.313875,16.399310,18.12,3.174909e-08,948.60770,0.553719
3,AT1301000,2016-04-01,0.334118,0.199806,8.961541,0.216983,5.762732,0.793234,0.000000,78.101251,5.523506,46.591177,13.696092,13.15,5.000581e-08,891.69970,0.644521
4,AT1301000,2016-07-01,0.010046,0.170694,7.763708,0.156480,6.612075,0.171722,0.000000,79.428145,4.506132,46.603491,13.503149,13.15,5.000581e-08,891.69970,0.644521
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22376,SE0820616,2023-04-01,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,99.996812,0.000000,0.000000,65.631796,22.080295,4.58,2.618655e-08,643.23303,0.658696
22377,SE0820616,2023-07-01,0.000000,0.000000,0.073207,0.698822,0.384433,6.433196,0.000000,90.517663,1.557420,67.150809,18.212619,0.02,3.101494e-08,562.88360,0.794345
22378,SE0820616,2023-10-01,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,99.985115,0.005196,0.000000,66.053867,20.350211,5.57,2.898485e-08,619.70465,0.720756
22379,SE0820616,2024-01-01,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,99.996812,0.000000,0.000000,66.373559,20.949495,6.00,2.736414e-08,622.68720,0.716387


In [57]:

# Convert date to datetime
data['date'] = pd.to_datetime(data['date'])
# Define features and targets
features = ['temperature_quarterly_mean', 'precipitation_quarterly_mean', 'water-vapor-pressure_quarterly_mean', 'cloud-cover_quarterly_mean']
targets = ['Bare Ground', 'Built Area', 'Crops', 'Flooded Vegetation', 'Grass', 'Shrub and Scrub', 'Snow and Ice', 'Trees', 'Water']

# Ensure the date column is in datetime format
data['date'] = pd.to_datetime(data['date'])

# Sort by SITECODE and date
data = data.sort_values(by=['SITECODE', 'date'])

# Group by SITECODE and date to ensure unique combinations
grouped_data = data.groupby(['SITECODE', 'date'])[features + targets].mean().reset_index()


In [58]:
# Function to create lag features
def create_lag_features(df, features, lags):
    for feature in features:
        for lag in lags:
            df[f'{feature}_lag_{lag}'] = df.groupby('SITECODE')[feature].shift(lag)
    return df

# Define the number of lags
lags = [1, 2, 3]

# Create lag features
grouped_data = create_lag_features(grouped_data, features + targets, lags)

# Drop rows with NaN values created by lagging
grouped_data = grouped_data.dropna().reset_index(drop=True)


In [59]:
# Split the data into training and testing sets based on date
train_size = int(len(grouped_data) * 0.8)
train_data = grouped_data.iloc[:train_size]
test_data = grouped_data.iloc[train_size:]

# Splitting the dataset into features (X) and targets (y)
X_train = train_data.drop(columns=targets + ['Bare Ground_lag_1',
       'Bare Ground_lag_2', 'Bare Ground_lag_3', 'Built Area_lag_1',
       'Built Area_lag_2', 'Built Area_lag_3', 'Crops_lag_1', 'Crops_lag_2',
       'Crops_lag_3', 'Flooded Vegetation_lag_1', 'Flooded Vegetation_lag_2',
       'Flooded Vegetation_lag_3', 'Grass_lag_1', 'Grass_lag_2', 'Grass_lag_3',
       'Shrub and Scrub_lag_1', 'Shrub and Scrub_lag_2',
       'Shrub and Scrub_lag_3', 'Snow and Ice_lag_1', 'Snow and Ice_lag_2',
       'Snow and Ice_lag_3', 'Trees_lag_1', 'Trees_lag_2', 'Trees_lag_3',
       'Water_lag_1', 'Water_lag_2', 'Water_lag_3','SITECODE', 'date'])
y_train = train_data[targets]
X_test = test_data.drop(columns=targets + ['Bare Ground_lag_1',
       'Bare Ground_lag_2', 'Bare Ground_lag_3', 'Built Area_lag_1',
       'Built Area_lag_2', 'Built Area_lag_3', 'Crops_lag_1', 'Crops_lag_2',
       'Crops_lag_3', 'Flooded Vegetation_lag_1', 'Flooded Vegetation_lag_2',
       'Flooded Vegetation_lag_3', 'Grass_lag_1', 'Grass_lag_2', 'Grass_lag_3',
       'Shrub and Scrub_lag_1', 'Shrub and Scrub_lag_2',
       'Shrub and Scrub_lag_3', 'Snow and Ice_lag_1', 'Snow and Ice_lag_2',
       'Snow and Ice_lag_3', 'Trees_lag_1', 'Trees_lag_2', 'Trees_lag_3',
       'Water_lag_1', 'Water_lag_2', 'Water_lag_3','SITECODE', 'date'])
y_test = test_data[targets]


In [60]:
X_train.shape, y_train.shape, X_test.shape, y_test.shape

((16309, 16), (16309, 9), (4078, 16), (4078, 9))

In [62]:
X_train

Unnamed: 0,temperature_quarterly_mean,precipitation_quarterly_mean,water-vapor-pressure_quarterly_mean,cloud-cover_quarterly_mean,temperature_quarterly_mean_lag_1,temperature_quarterly_mean_lag_2,temperature_quarterly_mean_lag_3,precipitation_quarterly_mean_lag_1,precipitation_quarterly_mean_lag_2,precipitation_quarterly_mean_lag_3,water-vapor-pressure_quarterly_mean_lag_1,water-vapor-pressure_quarterly_mean_lag_2,water-vapor-pressure_quarterly_mean_lag_3,cloud-cover_quarterly_mean_lag_1,cloud-cover_quarterly_mean_lag_2,cloud-cover_quarterly_mean_lag_3
0,13.15,5.000581e-08,891.69970,0.644521,18.12,18.12,18.86,3.174909e-08,3.174909e-08,2.788798e-08,948.60770,948.60770,942.75470,0.553719,0.553719,0.536892
1,13.15,5.000581e-08,891.69970,0.644521,13.15,18.12,18.12,5.000581e-08,3.174909e-08,3.174909e-08,891.69970,948.60770,948.60770,0.644521,0.553719,0.553719
2,13.42,7.548327e-08,904.18555,0.666106,13.15,13.15,18.12,5.000581e-08,5.000581e-08,3.174909e-08,891.69970,891.69970,948.60770,0.644521,0.644521,0.553719
3,13.42,7.548327e-08,904.18555,0.666106,13.42,13.15,13.15,7.548327e-08,5.000581e-08,5.000581e-08,904.18555,891.69970,891.69970,0.666106,0.644521,0.644521
4,18.18,3.878364e-08,972.81665,0.524583,13.42,13.42,13.15,7.548327e-08,7.548327e-08,5.000581e-08,904.18555,904.18555,891.69970,0.666106,0.666106,0.644521
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16304,14.07,3.813824e-08,843.04510,0.683100,14.08,14.08,14.17,3.010337e-08,3.010337e-08,2.775418e-08,841.24445,841.24445,846.70060,0.680449,0.680449,0.669048
16305,15.15,2.466899e-08,830.64090,0.614336,15.64,15.64,15.51,2.208680e-08,2.208680e-08,2.200682e-08,854.85010,854.85010,847.98096,0.572189,0.572189,0.591189
16306,15.15,2.466899e-08,830.64090,0.614336,15.15,15.64,15.64,2.466899e-08,2.208680e-08,2.208680e-08,830.64090,854.85010,854.85010,0.614336,0.572189,0.572189
16307,15.92,2.211454e-08,882.51520,0.557252,15.15,15.15,15.64,2.466899e-08,2.466899e-08,2.208680e-08,830.64090,830.64090,854.85010,0.614336,0.614336,0.572189


In [63]:
import xgboost as xgb
from sklearn.multioutput import MultiOutputRegressor

# Initialize XGBoost regressor
xgb_regressor = xgb.XGBRegressor(objective='reg:squarederror', n_estimators=100, learning_rate=0.1, max_depth=5, random_state=42)

# MultiOutputRegressor allows handling multiple target variables
model = MultiOutputRegressor(xgb_regressor)

# Train the model
model.fit(X_train, y_train)


In [64]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np

# Predict the target values for the test set
y_pred = model.predict(X_test)

# Calculate MAE, RMSE, and average values for each target
mae_scores = []
rmse_scores = []

for i, target in enumerate(targets):
    mae = mean_absolute_error(y_test.iloc[:, i], y_pred[:, i])
    rmse = np.sqrt(mean_squared_error(y_test.iloc[:, i], y_pred[:, i]))
    mae_scores.append(mae)
    rmse_scores.append(rmse)
    print(f"{target} - MAE: {mae:.4f}, RMSE: {rmse:.4f}")

# Calculate average MAE and RMSE across all targets
average_mae = np.mean(mae_scores)
average_rmse = np.mean(rmse_scores)

print(f"\nAverage MAE: {average_mae:.4f}")
print(f"Average RMSE: {average_rmse:.4f}")


Bare Ground - MAE: 2.3697, RMSE: 3.8661
Built Area - MAE: 1.2892, RMSE: 1.9421
Crops - MAE: 8.9413, RMSE: 10.4389
Flooded Vegetation - MAE: 2.3559, RMSE: 3.2880
Grass - MAE: 6.1200, RMSE: 10.5916
Shrub and Scrub - MAE: 10.3113, RMSE: 13.8045
Snow and Ice - MAE: 22.9031, RMSE: 37.0055
Trees - MAE: 32.2485, RMSE: 36.3361
Water - MAE: 27.6128, RMSE: 36.0327

Average MAE: 12.6835
Average RMSE: 17.0340


## Importing Future features

In [65]:
data_p = pd.read_csv('/home/alish/code/amelietatin/Predicting_land_cover/data/all_pa_future_values_2100.csv')
data_p

Unnamed: 0,SITECODE,lon,lat,date,temperature_quarterly_mean,precipitation_quarterly_mean,water-vapor-pressure_quarterly_mean,cloud-cover_quarterly_mean
0,IT5320009,13.292268,43.532192,2025-01-01,8.65,1.178506e-08,650.85570,0.500273
1,IT4050014,11.128795,44.319651,2025-01-01,5.11,1.629546e-08,606.74290,0.544266
2,IT4060015,12.256245,44.849636,2025-01-01,7.75,9.604958e-09,666.82480,0.515485
3,IT5330008,13.132352,42.878579,2025-01-01,2.14,2.546247e-08,538.16170,0.572809
4,IT4080001,11.801841,43.847954,2025-01-01,4.39,2.139190e-08,592.76886,0.588862
...,...,...,...,...,...,...,...,...
290923,AT2212000,14.183066,47.553708,2100-10-01,3.28,4.927227e-08,640.39496,0.644599
290924,AT3423000,9.701289,47.444386,2100-10-01,2.81,5.021011e-08,641.44490,0.616283
290925,AT2233000,15.549436,47.233696,2100-10-01,5.45,3.184268e-08,660.13380,0.641130
290926,SE0820616,21.194107,66.652113,2100-10-01,-4.13,1.396879e-08,519.12160,0.712890


In [66]:
data_p.drop(columns=["lon","lat"], axis=1, inplace=True)
data_p

Unnamed: 0,SITECODE,date,temperature_quarterly_mean,precipitation_quarterly_mean,water-vapor-pressure_quarterly_mean,cloud-cover_quarterly_mean
0,IT5320009,2025-01-01,8.65,1.178506e-08,650.85570,0.500273
1,IT4050014,2025-01-01,5.11,1.629546e-08,606.74290,0.544266
2,IT4060015,2025-01-01,7.75,9.604958e-09,666.82480,0.515485
3,IT5330008,2025-01-01,2.14,2.546247e-08,538.16170,0.572809
4,IT4080001,2025-01-01,4.39,2.139190e-08,592.76886,0.588862
...,...,...,...,...,...,...
290923,AT2212000,2100-10-01,3.28,4.927227e-08,640.39496,0.644599
290924,AT3423000,2100-10-01,2.81,5.021011e-08,641.44490,0.616283
290925,AT2233000,2100-10-01,5.45,3.184268e-08,660.13380,0.641130
290926,SE0820616,2100-10-01,-4.13,1.396879e-08,519.12160,0.712890


In [67]:
# Function to create lag features
def create_lag_features(df, features, lags):
    for feature in features:
        for lag in lags:
            df[f'{feature}_lag_{lag}'] = df.groupby('SITECODE')[feature].shift(lag)
    return df



In [68]:
features={"temperature_quarterly_mean","precipitation_quarterly_mean","water-vapor-pressure_quarterly_mean","cloud-cover_quarterly_mean"}
# Define the number of lags
lags = [1, 2, 3]

# Create lag features
temp1 = create_lag_features(data_p, features, lags)

# Drop rows with NaN values created by lagging
temp1_p = temp1.dropna().reset_index(drop=True)


In [69]:
latest_data=temp1_p
latest_data

Unnamed: 0,SITECODE,date,temperature_quarterly_mean,precipitation_quarterly_mean,water-vapor-pressure_quarterly_mean,cloud-cover_quarterly_mean,precipitation_quarterly_mean_lag_1,precipitation_quarterly_mean_lag_2,precipitation_quarterly_mean_lag_3,temperature_quarterly_mean_lag_1,temperature_quarterly_mean_lag_2,temperature_quarterly_mean_lag_3,cloud-cover_quarterly_mean_lag_1,cloud-cover_quarterly_mean_lag_2,cloud-cover_quarterly_mean_lag_3,water-vapor-pressure_quarterly_mean_lag_1,water-vapor-pressure_quarterly_mean_lag_2,water-vapor-pressure_quarterly_mean_lag_3
0,IT7222296,2025-04-01,20.42,8.068373e-09,1035.28970,0.583280,1.768768e-08,3.068992e-08,2.640631e-08,17.33,19.29,6.76,0.487315,0.607995,0.656820,900.15106,959.80786,667.59530
1,GR4310003,2025-04-01,20.42,8.068373e-09,1035.28970,0.583280,1.577908e-08,3.068992e-08,2.870836e-08,19.92,19.29,10.91,0.243490,0.607995,0.737163,893.10620,959.80786,729.47490
2,ES0000037,2025-04-01,20.42,8.068373e-09,1035.28970,0.583280,3.167483e-09,3.068992e-08,1.032832e-08,20.94,19.29,14.76,0.323312,0.607995,0.479654,1052.99500,959.80786,841.39850
3,IT5180016,2025-04-01,20.42,8.068373e-09,1035.28970,0.583280,2.177784e-08,3.068992e-08,1.970965e-08,18.02,19.29,6.14,0.449978,0.607995,0.552045,872.13430,959.80786,610.38410
4,DE5529302,2025-04-01,20.42,8.068373e-09,1035.28970,0.583280,2.952898e-08,3.068992e-08,2.136334e-08,13.59,19.29,2.07,0.629127,0.607995,0.720617,802.21990,959.80786,611.50850
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
288139,AT2212000,2100-10-01,3.28,4.927227e-08,640.39496,0.644599,5.748021e-08,5.469469e-08,5.895132e-08,16.50,12.34,-4.90,0.608164,0.666039,0.728767,935.14470,795.78900,497.38757
288140,AT3423000,2100-10-01,2.81,5.021011e-08,641.44490,0.616283,6.494817e-08,6.398297e-08,4.948346e-08,15.56,10.78,-4.56,0.584787,0.687916,0.682023,914.52410,780.48340,493.97134
288141,AT2233000,2100-10-01,5.45,3.184268e-08,660.13380,0.641130,2.735040e-08,2.936090e-08,2.999227e-08,19.39,15.50,-2.95,0.507645,0.588548,0.702215,963.38690,834.11260,511.98325
288142,SE0820616,2100-10-01,-4.13,1.396879e-08,519.12160,0.712890,3.731042e-08,2.292757e-08,8.557523e-09,10.88,6.94,-6.23,0.790892,0.710243,0.741276,780.79130,651.59784,463.40604


In [70]:
latest_data.shape

(288144, 18)

In [73]:
# Drop SITECODE and date columns for model input
X_new = latest_data.drop(columns=['SITECODE', 'date'])
X_new

Unnamed: 0,temperature_quarterly_mean,precipitation_quarterly_mean,water-vapor-pressure_quarterly_mean,cloud-cover_quarterly_mean,precipitation_quarterly_mean_lag_1,precipitation_quarterly_mean_lag_2,precipitation_quarterly_mean_lag_3,temperature_quarterly_mean_lag_1,temperature_quarterly_mean_lag_2,temperature_quarterly_mean_lag_3,cloud-cover_quarterly_mean_lag_1,cloud-cover_quarterly_mean_lag_2,cloud-cover_quarterly_mean_lag_3,water-vapor-pressure_quarterly_mean_lag_1,water-vapor-pressure_quarterly_mean_lag_2,water-vapor-pressure_quarterly_mean_lag_3
0,20.42,8.068373e-09,1035.28970,0.583280,1.768768e-08,3.068992e-08,2.640631e-08,17.33,19.29,6.76,0.487315,0.607995,0.656820,900.15106,959.80786,667.59530
1,20.42,8.068373e-09,1035.28970,0.583280,1.577908e-08,3.068992e-08,2.870836e-08,19.92,19.29,10.91,0.243490,0.607995,0.737163,893.10620,959.80786,729.47490
2,20.42,8.068373e-09,1035.28970,0.583280,3.167483e-09,3.068992e-08,1.032832e-08,20.94,19.29,14.76,0.323312,0.607995,0.479654,1052.99500,959.80786,841.39850
3,20.42,8.068373e-09,1035.28970,0.583280,2.177784e-08,3.068992e-08,1.970965e-08,18.02,19.29,6.14,0.449978,0.607995,0.552045,872.13430,959.80786,610.38410
4,20.42,8.068373e-09,1035.28970,0.583280,2.952898e-08,3.068992e-08,2.136334e-08,13.59,19.29,2.07,0.629127,0.607995,0.720617,802.21990,959.80786,611.50850
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
288139,3.28,4.927227e-08,640.39496,0.644599,5.748021e-08,5.469469e-08,5.895132e-08,16.50,12.34,-4.90,0.608164,0.666039,0.728767,935.14470,795.78900,497.38757
288140,2.81,5.021011e-08,641.44490,0.616283,6.494817e-08,6.398297e-08,4.948346e-08,15.56,10.78,-4.56,0.584787,0.687916,0.682023,914.52410,780.48340,493.97134
288141,5.45,3.184268e-08,660.13380,0.641130,2.735040e-08,2.936090e-08,2.999227e-08,19.39,15.50,-2.95,0.507645,0.588548,0.702215,963.38690,834.11260,511.98325
288142,-4.13,1.396879e-08,519.12160,0.712890,3.731042e-08,2.292757e-08,8.557523e-09,10.88,6.94,-6.23,0.790892,0.710243,0.741276,780.79130,651.59784,463.40604


In [75]:
correct_order = ['temperature_quarterly_mean', 'precipitation_quarterly_mean', 'water-vapor-pressure_quarterly_mean', 'cloud-cover_quarterly_mean', 
                 'temperature_quarterly_mean_lag_1', 'temperature_quarterly_mean_lag_2', 'temperature_quarterly_mean_lag_3', 
                 'precipitation_quarterly_mean_lag_1', 'precipitation_quarterly_mean_lag_2', 'precipitation_quarterly_mean_lag_3', 
                 'water-vapor-pressure_quarterly_mean_lag_1', 'water-vapor-pressure_quarterly_mean_lag_2', 'water-vapor-pressure_quarterly_mean_lag_3', 
                 'cloud-cover_quarterly_mean_lag_1', 'cloud-cover_quarterly_mean_lag_2', 'cloud-cover_quarterly_mean_lag_3']

X_new = X_new[correct_order]

In [76]:


# Predict the target values
predictions = model.predict(X_new)

# Combine predictions with SITECODE and date for better clarity
predicted_df = pd.DataFrame(predictions, columns=targets)
predicted_df['SITECODE'] = latest_data['SITECODE']
predicted_df['date'] = latest_data['date']

predicted_df


Unnamed: 0,Bare Ground,Built Area,Crops,Flooded Vegetation,Grass,Shrub and Scrub,Snow and Ice,Trees,Water,SITECODE,date
0,3.778708,1.350034,18.708063,3.760984,4.555117,18.216835,8.141091,45.091091,21.688145,IT7222296,2025-04-01
1,3.650899,1.446865,10.139294,3.471645,4.433885,21.073172,15.156964,42.059540,22.975611,GR4310003,2025-04-01
2,4.348904,1.624042,12.886868,5.223231,5.789265,9.718904,10.110093,41.447987,16.492441,ES0000037,2025-04-01
3,4.064131,1.521357,25.325544,4.682368,2.799828,22.527302,7.974885,46.666054,29.473221,IT5180016,2025-04-01
4,3.655873,1.310767,19.328148,4.219501,4.827249,24.614723,6.937073,43.862209,31.196226,DE5529302,2025-04-01
...,...,...,...,...,...,...,...,...,...,...,...
288139,2.603660,1.489033,6.579873,2.924538,4.513779,10.767961,11.033201,35.254951,23.581120,AT2212000,2100-10-01
288140,1.431213,1.213516,7.071641,3.079754,6.306626,6.936909,13.835559,44.271328,22.009089,AT3423000,2100-10-01
288141,1.616824,1.306934,12.428360,3.825971,5.956913,8.754708,7.050357,45.026943,38.538475,AT2233000,2100-10-01
288142,8.858170,1.604594,10.050813,3.736875,6.971332,10.378896,19.415543,34.154007,25.431738,SE0820616,2100-10-01


In [77]:
predicted_df.to_csv('Predictions_XGBoost_V01.csv', index=False)


In [78]:
from sklearn.model_selection import GridSearchCV
from xgboost import XGBRegressor

# Define the parameter grid
param_grid = {
    'learning_rate': [0.01, 0.1, 0.3],
    'max_depth': [3, 5, 7],
    'subsample': [0.6, 0.8, 1.0],
    'colsample_bytree': [0.6, 0.8, 1.0],
    'n_estimators': [100, 200, 300]
}

# Initialize XGBoost regressor
xgb = XGBRegressor()

# Perform grid search with cross-validation
grid_search = GridSearchCV(estimator=xgb, param_grid=param_grid, cv=5, scoring='neg_mean_absolute_error', n_jobs=-1)

# Fit the grid search to the data
grid_search.fit(X_train, y_train)

# Get the best hyperparameters
best_params = grid_search.best_params_
print("Best hyperparameters:", best_params)

# Train the model with the best hyperparameters
best_xgb = XGBRegressor(**best_params)
best_xgb.fit(X_train, y_train)

# Evaluate the model on the test set
test_predictions = best_xgb.predict(X_test)
mae = mean_absolute_error(y_test, test_predictions)
rmse = mean_squared_error(y_test, test_predictions, squared=False)
print("MAE on test set:", mae)
print("RMSE on test set:", rmse)


Best hyperparameters: {'colsample_bytree': 1.0, 'learning_rate': 0.01, 'max_depth': 3, 'n_estimators': 300, 'subsample': 1.0}
MAE on test set: 12.508154946805812
RMSE on test set: 16.38921901161445


