In [None]:
import numpy as np
import pandas as pd

from sklearn.linear_model import LinearRegression
from LinearRegressionGD import LinearRegressorGD
from MLPRegressor import MLPRegressor

from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.preprocessing import MinMaxScaler

In [2]:
diamonds_df = pd.read_csv('diamonds.csv').drop('Unnamed: 0', axis=1)
diamonds_df.head()

Unnamed: 0,carat,cut,color,clarity,depth,table,price,x,y,z
0,0.23,Ideal,E,SI2,61.5,55.0,326,3.95,3.98,2.43
1,0.21,Premium,E,SI1,59.8,61.0,326,3.89,3.84,2.31
2,0.23,Good,E,VS1,56.9,65.0,327,4.05,4.07,2.31
3,0.29,Premium,I,VS2,62.4,58.0,334,4.2,4.23,2.63
4,0.31,Good,J,SI2,63.3,58.0,335,4.34,4.35,2.75


In [3]:
features = ['carat', 'cut', 'color', 'clarity', 'depth', 'table', 'x', 'y', 'z']
target = ['price']

cut_transform = {'Fair': 0, 'Good': 1, 'Very Good': 2, 'Premium': 3, 'Ideal': 4}
clarity_transform = {'I1': 0, 'SI2': 1, 'SI1': 2, 'VS2': 3, 'VS1': 4, 'VVS2': 5, 'VVS1': 6, 'IF': 7}
color_transorm = {'D': 0, 'E': 1, 'F': 2, 'G': 3, 'H': 4, 'I': 5, 'J': 6}

diamonds_df['cut'] = diamonds_df['cut'].apply(lambda x: cut_transform.get(x))
diamonds_df['color'] = diamonds_df['color'].apply(lambda x: color_transorm.get(x))
diamonds_df['clarity'] = diamonds_df['clarity'].apply(lambda x: clarity_transform.get(x))

X = diamonds_df[features].copy().values
y = diamonds_df[target].values

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=47, test_size=0.3)

scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

<b>Сравним метрики написанной линейной регрессии с аналогом из sklearn</b>

In [4]:
my_linear_model = LinearRegressorGD(learning_rate=0.25).fit(X_train, y_train)
sklearn_linear_model = LinearRegression().fit(X_train, y_train)

my_y_pred = my_linear_model.predict(X_test)
sklearn_y_pred = sklearn_linear_model.predict(X_test)

my_r2_score = r2_score(my_y_pred, y_test)
sklearn_r2_score = r2_score(sklearn_y_pred, y_test)

print(f"Sklearn R2 score: {sklearn_r2_score}")
print(f"My R2 score: {my_r2_score}")

Sklearn R2 score: 0.8977468673125746
My R2 score: 0.8477095992276711


<b>Теперь обучим MLPRegressor</b>

In [None]:
model = MLPRegressor(hidden_layer_sizes=(40,), learning_rate=1.3, n_iter=2000).fit(X_train, y_train)

Epoch 0, loss: 31098404.58275074
Epoch 100, loss: 14952621.029348036
Epoch 200, loss: 12228113.542238735
Epoch 300, loss: 10262919.287506819
Epoch 400, loss: 8701792.468147153
Epoch 500, loss: 7504569.729486556
Epoch 600, loss: 6589751.999320841
Epoch 700, loss: 5889228.492780946
Epoch 800, loss: 5309066.663640527
Epoch 900, loss: 4838822.428011296
Epoch 1000, loss: 4416352.920133644
Epoch 1100, loss: 4068436.8021082296
Epoch 1200, loss: 3791086.666616011
Epoch 1300, loss: 3538825.4429052556
Epoch 1400, loss: 3364778.228285344
Epoch 1500, loss: 3257736.9323890237
Epoch 1600, loss: 3053746.806735704
Epoch 1700, loss: 2956519.090160222
Epoch 1800, loss: 2947186.5786858248
Epoch 1900, loss: 2853036.0330827725


In [13]:
my_y_pred = model.predict(X_test)

my_r2_score = r2_score(my_y_pred, y_test)
print(f"My R2 score: {my_r2_score}")

My R2 score: 0.7817169740408562
