In [1]:
from sklearn.model_selection import learning_curve, RandomizedSearchCV, GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import SelectKBest, f_classif, chi2
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from sklearn.linear_model import LinearRegression, Lasso
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
df = pd.read_excel("machineLearning_excel.xlsx")

# Assuming 'Price' is the target variable
X = df.drop(['Price'], axis=1)
y = df['Price']

# Split the dataset into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Print the shapes of the datasets
print('x_train:', x_train.shape)
print('x_test:', x_test.shape)
print('y_train:', y_train.shape)
print('y_test:', y_test.shape)


x_train: (738, 5)
x_test: (185, 5)
y_train: (738,)
y_test: (185,)


In [3]:
# Lineer Regresyon Train
lr = LinearRegression()
lr.fit(x_train,y_train)
# 
y_predict = lr.predict(x_test) # üretilen modeli (lr) test verisi ile deneyelim
y_predict  #tahmin ettiği değerleri görelim

ValueError: could not convert string to float: 'Mercedes-Benz of Barrington'

In [None]:
# metrikleri görelim
print('mean absolute error: ',mean_absolute_error(y_test, y_predict))
print(' root mean squared error: ',np.sqrt(mean_squared_error(y_test,y_predict)))
print('Coefficient of determination R^2: ',r2_score(y_test,y_predict))

In [None]:
fig= plt.figure(figsize=(12,6))
result = pd.DataFrame({'Predicted':y_predict,'Actual':y_test})
result = result.reset_index()
result = result.drop(['index'],axis=1)
plt.plot(result[:100])
plt.legend(['Actual','Predicted'])
plt.title('Actual vs Predicted')
plt.show()

In [None]:
from sklearn.tree import DecisionTreeRegressor

# Decision Tree Regression
dt = DecisionTreeRegressor()
dt.fit(x_train, y_train)
y_predict_dt = dt.predict(x_test)

# Print metrics
print('Mean Absolute Error: ', mean_absolute_error(y_test, y_predict_dt))
print('Root Mean Squared Error: ', np.sqrt(mean_squared_error(y_test, y_predict_dt)))
print('Coefficient of determination R^2: ', r2_score(y_test, y_predict_dt))

# Plot Actual vs Predicted values
fig = plt.figure(figsize=(12, 6))
result = pd.DataFrame({'Predicted': y_predict_dt, 'Actual': y_test})
result = result.reset_index()
result = result.drop(['index'], axis=1)
plt.plot(result[:100])
plt.legend(['Actual', 'Predicted'])
plt.title('Actual vs Predicted')
plt.show()

In [None]:
# Lasso regressyon için aynı uygulamayı yapalım 
lasso = Lasso(alpha=0.001)
lasso.fit(x_train,y_train)
y_predict_lasso = lasso.predict(x_test) 
print('mean absolute error: ',mean_absolute_error(y_test, y_predict_lasso))
print(' root mean squared error: ',np.sqrt(mean_squared_error(y_test,y_predict_lasso)))
print('Coefficient of determination R^2: ',r2_score(y_test,y_predict_lasso))

# Lasso İçin Tahmin Grafiği Çizelim
fig= plt.figure(figsize=(12,6))
result = pd.DataFrame({'Predicted':y_predict_lasso,'Actual':y_test})
result = result.reset_index()
result = result.drop(['index'],axis=1)
plt.plot(result[:100])
plt.legend(['Actual','Predicted'])
plt.title('Actual vs Predicted')
plt.show()

In [None]:
from sklearn.ensemble import RandomForestRegressor
rf = RandomForestRegressor()
rf.fit(x_train,y_train)
y_predict_rf = rf.predict(x_test)
print('mean absolute error: ',mean_absolute_error(y_test, y_predict_rf))
print('root mean squared error: ',np.sqrt(mean_squared_error(y_test,y_predict_rf)))
print('Coefficient of determination R^2: ',r2_score(y_test,y_predict_rf))

fig= plt.figure(figsize=(12,6))
result = pd.DataFrame({'Predicted':y_predict_rf,'Actual':y_test})
result = result.reset_index()
result = result.drop(['index'],axis=1)
plt.plot(result[:100])
plt.legend(['Actual','Predicted'])
plt.title('Actual vs Predicted')
plt.show()

In [None]:
final_RMSE = pd.DataFrame( data  = [[61399.61, 61399.61, 62998.31, 45344.87],
                                    [0.604,0.604,0.583,0.784]],
                          columns  = ['Linear Regression','Lasso','Decision Tree Regressor','Random Forest Regressor'],
                         index =['RMSE','R2'])


final_RMSE = final_RMSE.T

cm = sns.light_palette('green', as_cmap=True)

final_RMSE = final_RMSE.style.background_gradient(cmap=cm)
final_RMSE

In [None]:
from sklearn.model_selection import GridSearchCV
# tüm parametreler için değil sadece iki temel parametre için arama yapıyoruz!!!
params_grid = {
    'n_estimators':[10,20,50],
    'max_leaf_nodes':list(range(0,5))}


grid_search = GridSearchCV(RandomForestRegressor(min_samples_split=2,bootstrap=False,random_state=42), params_grid, verbose=1, cv=3)

grid_search.fit(x_train, y_train)

In [None]:
grid_search.best_params_    # en uygun parametreleri yazdır 

In [4]:
# bulduğumuz parametrelerle test edelim
from sklearn.ensemble import RandomForestRegressor
rf = RandomForestRegressor(n_estimators=10,max_leaf_nodes=4)
rf.fit(x_train,y_train)
y_predict_rf = rf.predict(x_test)
print('mean absolute error: ',mean_absolute_error(y_test, y_predict_rf))
print('root mean squared error: ',np.sqrt(mean_squared_error(y_test,y_predict_rf)))
print('Coefficient of determination R^2: ',r2_score(y_test,y_predict_rf))

fig= plt.figure(figsize=(12,6))
result = pd.DataFrame({'Predicted':y_predict_rf,'Actual':y_test})
result = result.reset_index()
result = result.drop(['index'],axis=1)
plt.plot(result[:100])
plt.legend(['Actual','Predicted'])
plt.title('Actual vs Predicted')
plt.show()

ValueError: could not convert string to float: 'Mercedes-Benz of Barrington'