<a href="https://colab.research.google.com/github/UznetDev/Data-science-home-work/blob/main/07_Avg_2024_home_work.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import warnings
from sklearn.model_selection import train_test_split, RandomizedSearchCV, GridSearchCV
from sklearn.feature_selection import RFECV
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.ensemble import StackingRegressor
from sklearn.metrics import mean_squared_error
warnings.filterwarnings('ignore')

In [None]:
data = pd.read_csv('car_prices_clean.csv')

In [None]:
numeric_columns = ['model_year', 'milage', 'hp', 'litr', 'Cylinder',
                   'brand_rank', 'model_rank', 'fuel_type_rank', 'engine_rank',
                   'transmission_rank', 'ext_col_rank', 'int_col_rank',
                   'accident_rank', 'motor_rank', 'fuel_rank']

In [None]:
X = data[numeric_columns]
y = data['price']

In [None]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

model = LinearRegression()
rfecv = RFECV(estimator=model, step=1, cv=5, scoring='neg_mean_squared_error')
rfecv.fit(X_train, y_train)

In [None]:
selected_features = X.columns[rfecv.support_]
X_train_selected = rfecv.transform(X_train)
X_test_selected = rfecv.transform(X_test)

In [None]:
poly = PolynomialFeatures(degree=2, include_bias=False)
X_poly_train = poly.fit_transform(X_train_selected)
X_poly_test = poly.transform(X_test_selected)

In [None]:
base_models = [
    ('lr', LinearRegression()),
    ('ridge', Ridge()),
    ('lasso', Lasso()),
    ('elasticnet', ElasticNet())
]

In [None]:
stacking_regressor = StackingRegressor(estimators=base_models,
                                       final_estimator=LinearRegression())

In [None]:
param_dist = {
    'ridge__alpha': np.logspace(0, 10, 5),
    'lasso__alpha': np.logspace(0, 1, 5),
    'elasticnet__alpha': np.logspace(0, 10, 5),
    'elasticnet__l1_ratio': np.linspace(0, 1, 5)
}

In [None]:
random_search = RandomizedSearchCV(estimator=stacking_regressor,
                                   param_distributions=param_dist,
                                   n_iter=50,
                                   cv=5,
                                   scoring='neg_mean_squared_error',
                                   n_jobs=-1,
                                   random_state=42)


random_search.fit(X_poly_train, y_train)

In [None]:
best_params = random_search.best_params_

param_grid = {
    'ridge__alpha': [best_params['ridge__alpha'] * 0.5, best_params['ridge__alpha'], best_params['ridge__alpha'] * 1.5],
    'lasso__alpha': [best_params['lasso__alpha'] * 0.5, best_params['lasso__alpha'], best_params['lasso__alpha'] * 1.5],
    'elasticnet__alpha': [best_params['elasticnet__alpha'] * 0.5, best_params['elasticnet__alpha'], best_params['elasticnet__alpha'] * 1.5],
    'elasticnet__l1_ratio': [best_params['elasticnet__l1_ratio'] * 0.9, best_params['elasticnet__l1_ratio'], best_params['elasticnet__l1_ratio'] * 1.1]
}

grid_search = GridSearchCV(estimator=random_search.best_estimator_,
                           param_grid=param_grid,
                           cv=5,
                           scoring='neg_mean_squared_error',
                           n_jobs=-1)

In [None]:
grid_search.fit(X_poly_train, y_train)

In [None]:
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_poly_test)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = best_model.score(y_pred, y_test)
mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100

print(f'MAPE: {mape}')
print(f'R2 Score: {r2}')
print(f'Best Parameters: {grid_search.best_params_}')
print(f'Best RMSE on Test Data: {rmse}')