#**Машинное обучение ИБ-2024**

#**Домашнее задание 1.**
#Регрессия, KNN, LinearRegression.

В данной домашней работе мы будем строить модели для предсказания цены квартиры в России. Ниже приведено описание некоторых колонок набора данных.

date - дата публикации объявления

price - цена в рублях

level- этаж, на котором находится квартира

levels - количество этажей в квартире

rooms - количество комнат в квартире. Если значение -1, то квартира считается апартаментами.

area - площадь квартиры.

kitchen_area - площадь кухни.

geo_lat - Latitude

geo_lon - Longitude

building_type - материал застройки. 0 - Don't know. 1 - Other. 2 - Panel. 3 - Monolithic. 4 - Brick. 5 - Blocky. 6 - Wooden

#Часть 0. Начало работы

Для начала работы с данными импортируем библиотеки, которые понадобятся в данном задании.

In [1]:
import math
import pandas as pd
import numpy as np
import matplotlib as plt
import sklearn
import seaborn as sns

Загрузим библиотеку folium для отображения данных на карте по координатам.

In [2]:
!pip install folium



Распакуем наши данные из архива.

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
!unzip /content/drive/MyDrive/HSE/archive_dataset.zip

Archive:  /content/drive/MyDrive/HSE/archive_dataset.zip
  inflating: input_data.csv          


Загрузим данные из csv файла в датафрейм.

In [5]:
df = pd.read_csv('input_data.csv', sep=';')
df = df.sample(n=30000, random_state=42).reset_index(drop=True)
display(df.head(11))

Unnamed: 0,date,price,level,levels,rooms,area,kitchen_area,geo_lat,geo_lon,building_type,object_type,postal_code,street_id,id_region,house_id
0,2021-08-17,5740535,8,17,1,42.9,9.5,55.591058,37.752858,0,2,142717.0,,50,1100977.0
1,2021-06-12,4750000,12,16,2,49.0,18.0,57.191065,65.581201,0,0,625031.0,526777.0,72,2779946.0
2,2021-07-05,5695000,6,9,3,67.3,8.0,57.123336,65.557107,2,0,625007.0,190545.0,72,2260949.0
3,2021-03-10,2650000,5,9,2,44.0,0.0,56.848916,53.283994,5,0,426072.0,168301.0,18,1198196.0
4,2021-06-07,950000,1,5,1,33.0,6.0,56.856968,40.550985,2,0,155048.0,,37,
5,2021-09-14,6212745,4,5,1,38.0,16.3,59.699049,30.402232,0,2,196602.0,,78,
6,2021-05-13,5753000,9,9,2,59.0,-100.0,50.316358,127.506593,4,0,675520.0,,28,
7,2021-01-06,6500000,4,6,3,98.6,14.2,54.170537,45.18504,4,0,430004.0,201539.0,13,886869.0
8,2021-12-21,2600000,7,9,2,35.7,16.1,59.190223,39.82573,0,2,160034.0,,35,
9,2021-03-15,1900000,4,9,2,48.5,0.0,51.535456,45.973225,0,0,410054.0,,64,


Отобразим на карте координаты наших построек.

In [6]:
import folium
from IPython.display import display

map_df = df.loc[:1000]

m = folium.Map(location=[55.751244, 37.618423], zoom_start=10)

# Список точек с широтой и долготой
lats = map_df['geo_lat'].loc[:1000]
longs = map_df['geo_lon'].loc[:1000]
# Добавляем точки на карту
for point in zip(lats, longs):
    folium.Marker(
        location=[point[0], point[1]]
    ).add_to(m)

display(m)

# Часть 1. Подготовим данные для обработки моделями машинного обучения.

**0.5 Балл**. География наших наблюдений в наборе данных крайне большая. Однако мы знаем, что стоимость квартир в Москве и Санкт-Петербурге намного выше, чем в среднем по России. Давайте сделаем признаки, который показывают, находится ли квартира в 20 килиметрах от центра Москвы или находится ли квартира в 20 килиметрах от центра Санкт-Петербурга.

Создайте два признака is_Moscow и is_Saint_Peterburg. Для нахождения расстояния по координатам используйте функцию haversine_distance.

In [7]:
def haversine_distance(lat1, lon1, lat2, lon2):
    earth_radius = 6371.0
    lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = np.sin(dlat / 2.0)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon / 2.0)**2
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))
    return earth_radius * c

moscow_coords = (55.751244, 37.618423)
spb_coords = (59.9342802, 30.3350986)

def cheak_city_center(lat, lon, city_lat, city_lon):
    dist = haversine_distance(lat, lon, city_lat, city_lon)
    if dist <= 20:
        return 1
    else:
        return 0

df['is_Moscow'] = df.apply(lambda row: cheak_city_center(row['geo_lat'], row['geo_lon'], moscow_coords[0], moscow_coords[1]), axis=1)
df['is_Saint_Peterburg'] = df.apply(lambda row: cheak_city_center(row['geo_lat'], row['geo_lon'], spb_coords[0], spb_coords[1]), axis=1)

display(df[['geo_lat', 'geo_lon', 'is_Moscow', 'is_Saint_Peterburg']].head(30))

Unnamed: 0,geo_lat,geo_lon,is_Moscow,is_Saint_Peterburg
0,55.591058,37.752858,1,0
1,57.191065,65.581201,0,0
2,57.123336,65.557107,0,0
3,56.848916,53.283994,0,0
4,56.856968,40.550985,0,0
5,59.699049,30.402232,0,0
6,50.316358,127.506593,0,0
7,54.170537,45.18504,0,0
8,59.190223,39.82573,0,0
9,51.535456,45.973225,0,0


**0.5 Балла**. В нашем наборе данных есть признаки, которые мы теоретически можем использовать, например postal_code, но мы это будем делать в рамках домашней работы очень-очень долго. Поэтому предлагается удалить ненужные признаки из датафрейма.

Удалим geo_lat,	geo_lon,	object_type,	postal_code,	street_id,	id_region,	house_id.

In [8]:
df = df.drop(columns=['geo_lat', 'geo_lon', 'object_type', 'postal_code', 'street_id', 'id_region', 'house_id'])
display(df.head(30))

Unnamed: 0,date,price,level,levels,rooms,area,kitchen_area,building_type,is_Moscow,is_Saint_Peterburg
0,2021-08-17,5740535,8,17,1,42.9,9.5,0,1,0
1,2021-06-12,4750000,12,16,2,49.0,18.0,0,0,0
2,2021-07-05,5695000,6,9,3,67.3,8.0,2,0,0
3,2021-03-10,2650000,5,9,2,44.0,0.0,5,0,0
4,2021-06-07,950000,1,5,1,33.0,6.0,2,0,0
5,2021-09-14,6212745,4,5,1,38.0,16.3,0,0,0
6,2021-05-13,5753000,9,9,2,59.0,-100.0,4,0,0
7,2021-01-06,6500000,4,6,3,98.6,14.2,4,0,0
8,2021-12-21,2600000,7,9,2,35.7,16.1,0,0,0
9,2021-03-15,1900000,4,9,2,48.5,0.0,0,0,0


**0.5 Балл**. Для начала Вам предлагается проанализировать Ваши оставшиеся признаки (колонки) в наборе данных. Какие колонки категориальные? Какие числовые?

Категориальные: building_type,is_Moscow, is_Saint_Peterburg

Числовые: price, level, levels, area, kitchen_area
Давайте закодируем категориальные признаки с помощью OneHot-Encoding. https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.OneHotEncoder.html

In [9]:
from sklearn.preprocessing import OneHotEncoder

encoder = OneHotEncoder(sparse_output=False)
categorical_columns = ['building_type', 'is_Moscow', 'is_Saint_Peterburg']
encoded_features = encoder.fit_transform(df[categorical_columns])

encoded_df = pd.DataFrame(encoded_features, columns=encoder.get_feature_names_out(categorical_columns))
df = pd.concat([df.drop(columns=categorical_columns), encoded_df], axis=1)

display(df.head(30))

Unnamed: 0,date,price,level,levels,rooms,area,kitchen_area,building_type_0,building_type_1,building_type_2,building_type_3,building_type_4,building_type_5,building_type_6,is_Moscow_0,is_Moscow_1,is_Saint_Peterburg_0,is_Saint_Peterburg_1
0,2021-08-17,5740535,8,17,1,42.9,9.5,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0
1,2021-06-12,4750000,12,16,2,49.0,18.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0
2,2021-07-05,5695000,6,9,3,67.3,8.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0
3,2021-03-10,2650000,5,9,2,44.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0
4,2021-06-07,950000,1,5,1,33.0,6.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0
5,2021-09-14,6212745,4,5,1,38.0,16.3,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0
6,2021-05-13,5753000,9,9,2,59.0,-100.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0
7,2021-01-06,6500000,4,6,3,98.6,14.2,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0
8,2021-12-21,2600000,7,9,2,35.7,16.1,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0
9,2021-03-15,1900000,4,9,2,48.5,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0


**0.5 Балл**. Поработаем с числовыми признаками:


1.   Добавьте в ваш датасет два признака: количество дней со дня первого наблюдения (разница между датами объявлений). Возможно, для предсказания цены не так важен этаж, как важно отношение этажа квартиры на количество этажей в доме, добавьте этот признак. После добавления нового признака колонку date можно удалить.
2.   Числовые признаки могут иметь разные порядки. Давайте отнормируем числовые признаки с помощью StandartScaller https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html.



In [10]:
df['date'] = pd.to_datetime(df['date'])
df['days_after_checkpoint'] = (df['date'] - df['date'].min()).dt.days
df['floor_ratio'] = df['level'] / df['levels']

df = df.drop(columns=['date'])

In [11]:
display(df.head(30))

Unnamed: 0,price,level,levels,rooms,area,kitchen_area,building_type_0,building_type_1,building_type_2,building_type_3,building_type_4,building_type_5,building_type_6,is_Moscow_0,is_Moscow_1,is_Saint_Peterburg_0,is_Saint_Peterburg_1,days_after_checkpoint,floor_ratio
0,5740535,8,17,1,42.9,9.5,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,228,0.470588
1,4750000,12,16,2,49.0,18.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,162,0.75
2,5695000,6,9,3,67.3,8.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,185,0.666667
3,2650000,5,9,2,44.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,68,0.555556
4,950000,1,5,1,33.0,6.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,157,0.2
5,6212745,4,5,1,38.0,16.3,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,256,0.8
6,5753000,9,9,2,59.0,-100.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,132,1.0
7,6500000,4,6,3,98.6,14.2,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,5,0.666667
8,2600000,7,9,2,35.7,16.1,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,354,0.777778
9,1900000,4,9,2,48.5,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,73,0.444444


Здесь возникала проблема.

Дело в том, что выскакивала такая ошибка:
ValueError: Input X contains infinity or a value too large for dtype('float64')

Видимо, где-то в датасете встречались значения inf, поскольку происходило деление на ноль в отношении этажа квартиры на количество этажей. Я меняю их на средние по столбцу значения

In [12]:
from sklearn.preprocessing import StandardScaler

num_columns = ['price', 'level', 'levels', 'area', 'kitchen_area', 'days_after_checkpoint', 'floor_ratio', 'rooms']

df[num_columns] = df[num_columns].replace([np.inf, -np.inf], np.nan)
df[num_columns] = df[num_columns].fillna(df[num_columns].mean())

scaler = StandardScaler()
df[num_columns] = scaler.fit_transform(df[num_columns])

display(df.head(30))

Unnamed: 0,price,level,levels,rooms,area,kitchen_area,building_type_0,building_type_1,building_type_2,building_type_3,building_type_4,building_type_5,building_type_6,is_Moscow_0,is_Moscow_1,is_Saint_Peterburg_0,is_Saint_Peterburg_1,days_after_checkpoint,floor_ratio
0,-0.020993,0.300362,0.72845,-0.611154,-0.370664,0.379208,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.404906,-0.355684
1,-0.036284,1.058101,0.589328,0.246486,-0.148374,0.639899,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,-0.249325,0.604034
2,-0.021696,-0.078508,-0.384523,1.104125,0.518497,0.333204,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,-0.021336,0.317802
3,-0.068701,-0.267943,-0.384523,0.246486,-0.330579,0.087848,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,-1.18111,-0.06384
4,-0.094944,-1.025683,-0.941009,-0.611154,-0.73143,0.271865,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,-0.298888,-1.285095
5,-0.013704,-0.457378,-0.941009,-0.611154,-0.549225,0.587761,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.682459,0.775773
6,-0.020801,0.489797,-0.384523,0.246486,0.216037,-2.979104,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,-0.546704,1.462729
7,-0.009269,-0.457378,-0.801887,1.104125,1.659102,0.523355,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,-1.805604,0.317802
8,-0.069473,0.110927,-0.384523,0.246486,-0.633039,0.581627,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.653895,0.699444
9,-0.080279,-0.457378,-0.384523,0.246486,-0.166594,0.087848,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,-1.131547,-0.445482


**2 Балла**. Реализуйте класс KNNRegressor, который должен делать регрессию методом k ближайших соседей.

In [13]:
import numpy as np
from sklearn.metrics import pairwise_distances

class KNNRegressor:
    def __init__(self, n_neighbors=5, metric='euclidean'):
        self.n_neighbors = n_neighbors
        self.metric = metric
        self.X_train = None
        self.y_train = None

    def fit(self, X, y):
        self.X_train = X
        self.y_train = y

    def predict(self, X):
        distances = pairwise_distances(X, self.X_train, metric=self.metric)
        neighbor_idxs = np.argsort(distances, axis=1)[:, :self.n_neighbors]
        neighbor_values = self.y_train[neighbor_idxs]
        predictions = np.mean(neighbor_values, axis=1)
        return predictions


**3 Балла**. Реализуйте класс LinearRegression, поддерживающий обучение градиентными спусками SGD, Momentum, AdaGrad. Используйте градиент для оптимизации функции потерь MSE.

In [14]:
import numpy as np

class LinearRegression:
    def __init__(self, learning_rate=0.01, optimization='SGD', epsilon=1e-8, decay_rate=0.9, max_iter=1000):
        self.learning_rate = learning_rate
        self.optimization = optimization
        self.epsilon = epsilon
        self.decay_rate = decay_rate
        self.max_iter = max_iter
        self.weights = None
        self.bias = None

    def fit(self, X, y):
        n_samples, n_features = X.shape

        self.weights = np.zeros(n_features, dtype=np.float32)
        self.bias = 0.0

        if self.optimization == 'Momentum':
            v_w = np.zeros(n_features, dtype=np.float32)
            v_b = 0.0
        elif self.optimization == 'AdaGrad':
            cache_w = np.zeros(n_features, dtype=np.float32)
            cache_b = 0.0

        for i in range(self.max_iter):
            y_pred = np.dot(X, self.weights) + self.bias

            errors = y_pred - y

            grad_w = (2 / n_samples) * np.dot(X.T, errors)
            grad_b = (2 / n_samples) * np.sum(errors)

            if self.optimization == 'SGD':
                self.weights -= self.learning_rate * grad_w
                self.bias -= self.learning_rate * grad_b

            elif self.optimization == 'Momentum':
                v_w = self.decay_rate * v_w - self.learning_rate * grad_w
                v_b = self.decay_rate * v_b - self.learning_rate * grad_b
                self.weights += v_w
                self.bias += v_b

            elif self.optimization == 'AdaGrad':
                cache_w += grad_w ** 2
                cache_b += grad_b ** 2
                adjusted_lr_w = self.learning_rate / (np.sqrt(cache_w) + self.epsilon)
                adjusted_lr_b = self.learning_rate / (np.sqrt(cache_b) + self.epsilon)
                self.weights -= adjusted_lr_w * grad_w
                self.bias -= adjusted_lr_b * grad_b

            else:
                raise ValueError("Метод оптимизации должен быть 'SGD', 'Momentum' или 'AdaGrad'.")

    def predict(self, X):
        return np.dot(X, self.weights) + self.bias


# Часть 2. Эксперименты с моделями машинного обучения.

**3 Балла**. Проведите эксперименты с написанными Вами методами машинного обучения. Выделите обучающую и тестовую выборки в отношении 0,8 и 0,2 соответственно. Измерьте ошибку MSE, MAE, RMSE. Заиспользуйте методы KNNRegressor и LinearRegression из библиотеки sklearn, сравните качество Ваших решений и библиотечных.

# **Подготовка**

Импортируем необходимые для эксприментов модули

In [15]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.neighbors import KNeighborsRegressor as SklearnKNNRegressor
from sklearn.linear_model import LinearRegression as SklearnLinearRegression

Разделим данные на признаки и целевую переменную. Поскольку мы опрделяем цену, целевая переменная - price.

In [16]:
X = df.drop('price', axis=1).values
y = df['price'].values

Разделим данные на обучающую и тестовую выборку в соотношении:
0,8 - обучающая
0,2 - тестовая

In [17]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# **Инициализация и обучение моделей**

**Собственные реализации KNNRegressor и LinearRegression**

In [18]:
my_knn = KNNRegressor(n_neighbors=5, metric='euclidean')
my_knn.fit(X_train, y_train)
my_knn_predictions = my_knn.predict(X_test)
print(my_knn_predictions)

[-0.07327061 -0.07172693  0.12933076 ...  0.01720054 -0.00963011
 -0.07542544]


In [19]:
my_lr_sgd = LinearRegression(learning_rate=0.01, optimization='SGD', max_iter=1000)
my_lr_sgd.fit(X_train, y_train)
my_lr_sgd_predictions = my_lr_sgd.predict(X_test)
print("Предсказания My LinearRegression (SGD):")
print(my_lr_sgd_predictions)

Предсказания My LinearRegression (SGD):
[-0.11948753 -0.08484121  0.12949872 ... -0.04753402 -0.0423809
 -0.03313481]


In [20]:
my_lr_momentum = LinearRegression(learning_rate=0.01, optimization='Momentum', decay_rate=0.9, max_iter=1000)
my_lr_momentum.fit(X_train, y_train)
my_lr_momentum_predictions = my_lr_momentum.predict(X_test)
print(my_lr_momentum_predictions)


[-0.12122638 -0.08900966  0.14588093 ... -0.0511209  -0.03346787
 -0.04046715]


In [21]:
my_lr_adagrad = LinearRegression(learning_rate=0.01, optimization='AdaGrad', epsilon=1e-8, max_iter=1000)
my_lr_adagrad.fit(X_train, y_train)
my_lr_adagrad_predictions = my_lr_adagrad.predict(X_test)
print(my_lr_adagrad_predictions)


[-0.12120425 -0.0889885   0.14589385 ... -0.05109838 -0.03349627
 -0.04048653]


**Библиотечные реализации**

In [22]:
sklearn_knn = SklearnKNNRegressor(n_neighbors=5, metric='euclidean')
sklearn_knn.fit(X_train, y_train)
sklearn_knn_predictions = sklearn_knn.predict(X_test)
print(sklearn_knn_predictions)

[-0.07327061 -0.07172693  0.12933076 ...  0.01720054 -0.00963011
 -0.07542544]


In [23]:
sklearn_lr = SklearnLinearRegression()
sklearn_lr.fit(X_train, y_train)
sklearn_lr_predictions = sklearn_lr.predict(X_test)
print(sklearn_lr_predictions)

[-0.12120425 -0.08898815  0.14589344 ... -0.05109869 -0.0334965
 -0.04048613]


# **Метрики для оценки**

In [24]:
def evaluate_model(y_true, y_pred, model_name):
    mse = mean_squared_error(y_true, y_pred)
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    print(f"{model_name} - MSE: {mse:.4f}, MAE: {mae:.4f}, RMSE: {rmse:.4f}")


# **Оценка моделей**

In [25]:
print("Оценка моделей KNNRegressor:")
evaluate_model(y_test, my_knn_predictions, "My KNNRegressor")
evaluate_model(y_test, sklearn_knn_predictions, "Sklearn KNeighborsRegressor")

print("\nОценка моделей LinearRegression:")
evaluate_model(y_test, my_lr_sgd_predictions, "My LinearRegression (SGD)")
evaluate_model(y_test, my_lr_momentum_predictions, "My LinearRegression (Momentum)")
evaluate_model(y_test, my_lr_adagrad_predictions, "My LinearRegression (AdaGrad)")
evaluate_model(y_test, sklearn_lr_predictions, "Sklearn LinearRegression")

Оценка моделей KNNRegressor:
My KNNRegressor - MSE: 0.9992, MAE: 0.0593, RMSE: 0.9996
Sklearn KNeighborsRegressor - MSE: 0.9992, MAE: 0.0593, RMSE: 0.9996

Оценка моделей LinearRegression:
My LinearRegression (SGD) - MSE: 0.8853, MAE: 0.0768, RMSE: 0.9409
My LinearRegression (Momentum) - MSE: 0.8851, MAE: 0.0776, RMSE: 0.9408
My LinearRegression (AdaGrad) - MSE: 0.8851, MAE: 0.0776, RMSE: 0.9408
Sklearn LinearRegression - MSE: 0.8851, MAE: 0.0776, RMSE: 0.9408
