In [19]:

# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error,mean_absolute_error, r2_score
import numpy as np

# Load the dataset
data = pd.read_csv('missing_value_free.csv')  # Replace 'your_dataset.csv' with your file name

# Data preprocessing
# Drop irrelevant columns like 'Brand', 'CPU Name', etc., that are not numerical or might not contribute significantly to the price prediction
data = data.drop(columns=['Unnamed: 0','Brand', 'CPU Name', 'GPU Name', 'OS','Storage Type','Weight'])

# Splitting into features and target variable
X = data.drop(columns=['Price'])
y = data['Price']

# Splitting the dataset into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initializing and training the Random Forest model
rf_model = RandomForestRegressor(random_state=42)
rf_model.fit(X_train, y_train)

# Model evaluation
y_pred = rf_model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
print(f"Root Mean Squared Error (RMSE): {rmse}")

# Mean Absolute Error (MAE)
mae = mean_absolute_error(y_test, y_pred)
print(f"Mean Absolute Error (MAE): {mae}")

# R-squared (R2)
r2 = r2_score(y_test, y_pred)
print(f"R-squared (R2): {r2}")

# Adjusted R-squared (for demonstration purposes; ensure this is applicable for your dataset)
# n = number of samples, p = number of predictors
n = X_test.shape[0]
p = X_test.shape[1]
adj_r2 = 1 - ((1 - r2) * (n - 1)) / (n - p - 1)
print(f"Adjusted R-squared: {adj_r2}")
data


Root Mean Squared Error (RMSE): 336.55043542631813
Mean Absolute Error (MAE): 192.78779245811293
R-squared (R2): 0.7979899510241674
Adjusted R-squared: 0.7955221144747244


Unnamed: 0,CPU Mark,GPU Mark,Monitor,Width,Height,RAM,Storage Amount,Price
0,26454.0,7320.0,15.6,1920.0,1080.0,16384.0,2097152.0,1529.00
1,19333.0,1449.0,16.0,1920.0,1200.0,24576.0,1048576.0,1079.00
2,16550.0,8274.0,15.6,1920.0,1200.0,32768.0,1048576.0,1907.00
3,55651.0,26876.0,17.3,2560.0,1400.0,16384.0,1048576.0,1959.99
4,24887.0,22459.0,16.0,2560.0,1600.0,16384.0,1048576.0,1499.99
...,...,...,...,...,...,...,...,...
2897,387.0,1476.0,15.6,1366.0,768.0,8192.0,262144.0,299.00
2898,15287.0,1449.0,15.6,1920.0,1080.0,49152.0,2097152.0,1129.99
2899,1556.0,1476.0,14.0,1366.0,768.0,4096.0,1024000.0,197.00
2900,10329.0,7320.0,16.0,1920.0,1200.0,16384.0,524288.0,1289.00


In [20]:
import pandas as pd

# Load the new input data from 'new.csv'
new_input_data = pd.read_csv('new.csv')

# Display the loaded data
print("Loaded New Input Data:")
print(new_input_data)
new_input_data.drop(columns=['Unnamed: 0'], inplace=True)
# Selecting features used in the model for prediction
features = ['CPU Mark', 'GPU Mark', 'Monitor', 'Width', 'Height', 'RAM', 'Storage Amount']

# Assume 'rf_model' is the trained Random Forest Regression model from before
# Assuming 'features' contain the same columns used during training

# Use the trained model to predict prices for the new input data
X_new_input = new_input_data[features]
predictions_new_input = rf_model.predict(X_new_input)

# Add the predicted prices to the new input data DataFrame
new_input_data['Predicted_Price'] = predictions_new_input

# Display the new input data with predicted prices
new_input_data


Loaded New Input Data:
   Unnamed: 0 Brand               CPU Name  CPU Mark  \
0           0   MSI   Intel Core i7 13620H   26454.0   
1           3  Dell    Intel Core i7 1360P   19333.0   
2           5  Asus     AMD Ryzen 9 7945HX   55651.0   
3           7    HP    Intel Core i5 1335U   16980.0   
4          10   MSI  Intel Core i9 13950HX   44905.0   
5          11  Acer      AMD Ryzen 3 3250U    3855.0   

                       GPU Name  GPU Mark  Monitor   Width  Height      RAM  \
0              GeForce RTX 2050    7320.0     15.6  1920.0  1080.0  16384.0   
1  Intel Iris Pro Graphics 6200    1449.0     16.0  1920.0  1200.0  24576.0   
2              GeForce RTX 4070   26876.0     17.3  2560.0  1400.0  16384.0   
3                 Intel Iris Xe    2690.0     15.6  1920.0  1080.0   8192.0   
4   GeForce RTX 4060 Laptop GPU   17984.0     16.0  2560.0  1600.0  16384.0   
5          Radeon Ryzen 3 3250U     806.0     14.0  1366.0   768.0   8192.0   

   Storage Amount Storage Type

Unnamed: 0,Brand,CPU Name,CPU Mark,GPU Name,GPU Mark,Monitor,Width,Height,RAM,Storage Amount,Storage Type,OS,Weight,Price,Predicted_Price
0,MSI,Intel Core i7 13620H,26454.0,GeForce RTX 2050,7320.0,15.6,1920.0,1080.0,16384.0,2097152.0,SSD,Windows 11,3.03,1529.0,1460.729119
1,Dell,Intel Core i7 1360P,19333.0,Intel Iris Pro Graphics 6200,1449.0,16.0,1920.0,1200.0,24576.0,1048576.0,SSD,Windows 10,2.47,1079.0,1158.7833
2,Asus,AMD Ryzen 9 7945HX,55651.0,GeForce RTX 4070,26876.0,17.3,2560.0,1400.0,16384.0,1048576.0,SSD,Windows 11,2.68,1959.99,2037.9001
3,HP,Intel Core i5 1335U,16980.0,Intel Iris Xe,2690.0,15.6,1920.0,1080.0,8192.0,262144.0,SSD,Windows 11,3.52,660.0,613.25226
4,MSI,Intel Core i9 13950HX,44905.0,GeForce RTX 4060 Laptop GPU,17984.0,16.0,2560.0,1600.0,16384.0,8796093.0,SSD,Windows 11,5.24,2323.0,2432.5029
5,Acer,AMD Ryzen 3 3250U,3855.0,Radeon Ryzen 3 3250U,806.0,14.0,1366.0,768.0,8192.0,131072.0,SSD,Windows 11,4.19,279.99,398.828033
