In [6]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split,cross_val_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error,mean_absolute_error, r2_score
import numpy as np

data = pd.read_csv('preprocess.csv')  
 
data = data.drop(columns=['Weight'])

X = data.drop(columns=['Price'])
y = data['Price']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

rf_model = RandomForestRegressor(max_depth=20,min_samples_leaf=1,min_samples_split=2,n_estimators=100,random_state=42)
rf_model.fit(X_train, y_train)

y_pred = rf_model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
print(f"Root Mean Squared Error (RMSE): {rmse}")

mae = mean_absolute_error(y_test, y_pred)
print(f"Mean Absolute Error (MAE): {mae}")

r2 = r2_score(y_test, y_pred)
print(f"R-squared (R2): {r2}")

def mean_squared_percentage_error(y_true, y_pred):
    return np.mean(np.square((y_true - y_pred) / y_true)) * 100

mspe = mean_squared_percentage_error(y_test, y_pred)
print(f"Mean Squared Percentage Error (MSPE): {mspe}")

def mean_absolute_percentage_error(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

mape = mean_absolute_percentage_error(y_test, y_pred)
print(f"Mean Absolute Percentage Error (MAPE): {mape}%")
data


Root Mean Squared Error (RMSE): 341.07435590628813
Mean Absolute Error (MAE): 194.32078569266923
R-squared (R2): 0.7756378092382846
Mean Squared Percentage Error (MSPE): 16.966423564611155
Mean Absolute Percentage Error (MAPE): 21.461919453649266%


Unnamed: 0,CPU Mark,GPU Mark,Monitor,RAM,Storage Amount,Encoded_Brand,Price,Encoded_OS,Resolution
0,26454,14691,15.6,16.0,2048.0,23,1529.00,6,2073600
1,34306,19560,15.6,16.0,4000.0,11,2308.81,6,1049088
2,3642,4253,14.0,8.0,256.0,11,614.99,5,2073600
3,19333,2690,16.0,24.0,1024.0,11,1079.00,5,2304000
4,28003,8274,15.6,32.0,1024.0,11,1907.00,5,2304000
...,...,...,...,...,...,...,...,...,...
3963,15287,2690,15.6,48.0,2048.0,16,1129.99,6,2073600
3964,1556,1476,14.0,4.0,128.0,16,197.00,6,1049088
3965,13554,4253,14.0,16.0,512.0,1,1099.99,0,4096000
3966,26454,7320,16.0,16.0,512.0,21,1289.00,5,2304000


In [None]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

grid_search = GridSearchCV(RandomForestRegressor(random_state=42), param_grid, cv=5, scoring='neg_mean_squared_error')
grid_search.fit(X_train, y_train)

# # Extract results
# results = grid_search.cv_results_

# # Plot the scores of each iteration
# plt.figure(figsize=(10, 6))
# plt.plot(results['mean_test_score'], marker='o')
# plt.title('Grid Search CV Results')
# plt.xlabel('Iteration')
# plt.ylabel('Mean Test Score (Negative MAE)')
# plt.show()

best_params = grid_search.best_params_
print(f"Best Hyperparameters: {best_params}")

tuned_y_pred = grid_search.best_estimator_.predict(X_test)
tuned_mse = mean_squared_error(y_test, tuned_y_pred)
tuned_r2 = r2_score(y_test, tuned_y_pred)

print(f"Tuned Model Mean Squared Error: {tuned_mse}")
print(f"Tuned Model R-squared (R2): {tuned_r2}")


In [7]:
import pandas as pd

new_input_data = pd.read_csv('new_preprocess.csv') #first 5 in dataset

new_input_data.drop(columns=['Unnamed: 0'], inplace=True)
features = ['CPU Mark', 'GPU Mark', 'Monitor', 'RAM', 'Storage Amount','Encoded_Brand','Encoded_OS', 'Resolution']

X_new_input = new_input_data[features]
predictions_new_input = rf_model.predict(X_new_input)

new_input_data['Predicted_Price'] = predictions_new_input
 
new_input_data[['Predicted_Price', 'Price']]


Unnamed: 0,Predicted_Price,Price
0,1808.8515,1529.0
1,2264.941068,2308.81
2,637.524983,614.99
3,1119.259492,1079.0
4,1972.65321,1907.0
5,360.4359,279.99
6,2100.355533,1499.0
7,1854.3349,1974.99
8,1035.8095,1399.99
9,1784.5051,1715.6
