In [83]:
# Importowanie niezbędnych bibliotek
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import SGDRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from math import sqrt
from sklearn.preprocessing import StandardScaler

In [84]:
# Wczytanie danych
cars = pd.read_csv("auto_mpg.csv", sep=";", na_values=' ')
cars.dropna(how="any", inplace=True)

In [85]:
# Przetwarzanie danych
cars.displacement = cars.displacement.str.replace(',', '.').astype(float)
cars.acceleration = cars.acceleration.str.replace(',', '.').astype(float)
cars.mpg = cars.mpg.str.replace(',', '.').astype(float)
cars.set_index('car_name', inplace=True)

In [86]:
# Ustawienie ziarna losowości
index_values = cars.index.to_numpy()
seed_value = sum([ord(x) for x in index_values[0]])

In [87]:
# Podział danych na zbiór treningowy i testowy
X = cars.drop('mpg', axis=1)
y = cars.mpg
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed_value)

In [88]:
# Klasyfikator CART
cart_params = {'max_depth': range(1, 10),
               'min_samples_split': [15, 20, 30, 40, 50],
               'min_samples_leaf': [5, 10, 15],
               'max_leaf_nodes': [250]}
cart_model = GridSearchCV(DecisionTreeRegressor(random_state=seed_value), cart_params, n_jobs=-1)
cart_model.fit(X_train, y_train)

In [89]:
# Klasyfikator SGD
sgd_params = {
    'alpha': [0.0001, 0.001, 0.01],
    'max_iter': [1000, 3000, 5000],
    'tol': [0.001, 0.01, 0.025],
    'penalty': ['l2', 'l1', 'elasticnet'],
    'l1_ratio': [0.15, 0.5, 0.85],
    'learning_rate': ['constant', 'optimal', 'invscaling', 'adaptive'],
    'eta0': [0.001, 0.005, 0.01]
}

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

sgd_model = GridSearchCV(SGDRegressor(random_state=seed_value), sgd_params, n_jobs=-1, cv=5)
sgd_model.fit(X_train_scaled, y_train)

In [90]:
# Wyniki dla klasyfikatora CART
cart_best_model = cart_model.best_estimator_
cart_y_train_pred = cart_best_model.predict(X_train)
cart_y_test_pred = cart_best_model.predict(X_test)

In [91]:
# Wyniki dla klasyfikatora SGD
sgd_best_model = sgd_model.best_estimator_
sgd_y_train_pred = sgd_best_model.predict(X_train_scaled)
sgd_y_test_pred = sgd_best_model.predict(X_test_scaled)

In [92]:
# Ewaluacja modeli
def evaluate_model(y_true, y_pred, dataset_name):
    print(f"{dataset_name}:")
    print("RMSE: ", round(sqrt(mean_squared_error(y_true, y_pred)), 4))
    print("MAE: ", round(mean_absolute_error(y_true, y_pred), 4))
    print("MAPE: ", round(100 * mean_absolute_error(y_true, y_pred) / y_true.mean(), 4))
    print("R^2: ", round(r2_score(y_true, y_pred), 4))

print("Ewaluacja modelu CART:")
evaluate_model(y_train, cart_y_train_pred, 'Zbiór uczący - CART')
evaluate_model(y_test, cart_y_test_pred, 'Zbiór testowy - CART')

print("\nEwaluacja modelu SGD:")
evaluate_model(y_train, sgd_y_train_pred, 'Zbiór uczący - SGD')
evaluate_model(y_test, sgd_y_test_pred, 'Zbiór testowy - SGD')

Ewaluacja modelu CART:
Zbiór uczący - CART:
RMSE:  2.2666
MAE:  1.6571
MAPE:  6.9964
R^2:  0.9155
Zbiór testowy - CART:
RMSE:  3.5461
MAE:  2.32
MAPE:  10.2899
R^2:  0.7618

Ewaluacja modelu SGD:
Zbiór uczący - SGD:
RMSE:  3.2653
MAE:  2.5092
MAPE:  10.5943
R^2:  0.8246
Zbiór testowy - SGD:
RMSE:  3.2856
MAE:  2.5418
MAPE:  11.2734
R^2:  0.7955
