In [1]:
import pandas as pd
import numpy as np
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score,mean_absolute_error



In [2]:
# Load the data from the CSV file
data = pd.read_csv('data/preprocess.csv')

# Split the data into features (X) and the target variable (y)
data.drop(columns=['Weight'], inplace=True)
X = data.drop(columns=['Price'])  # Features
y = data['Price']  # Target variable

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize MLPRegressor
mlp = MLPRegressor(random_state=42,early_stopping=True)

# Define hyperparameters for grid search
param_grid = {
    'hidden_layer_sizes': [(64,64), (64,32), (32, 32), (128, 32),(128, 128), (32, 32, 32), (128, 32, 32), (128,64,32),(128, 128, 32), (128, 128, 128)],
    'activation': ['relu','logistic'],
    'alpha': [0.0001, 0.001, 0.01],
    'learning_rate_init': [0.1,0.001, 0.01,0.0001]
}


# GridSearchCV for hyperparameter tuning
grid_search = GridSearchCV(mlp, param_grid, cv=5, scoring='neg_mean_absolute_error', n_jobs=-1)
grid_search.fit(X_train_scaled, y_train)

# Best parameters found
print("Best parameters:", grid_search.best_params_)

# Predict on the test set with the best model
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test_scaled)



Best parameters: {'activation': 'logistic', 'alpha': 0.001, 'hidden_layer_sizes': (64, 64), 'learning_rate_init': 0.1}


In [3]:
import numpy as np
# Calculate accuracy metrics
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
mas=mean_absolute_error(y_test, y_pred)

print(f"Mean Squared Error (MSE): {mse}")
print(f"R-squared (R2) score: {r2}")
print(f"Mean Absolute Error (MAE): {mas}")

def mean_squared_percentage_error(y_true, y_pred):
    return np.mean(np.square((y_true - y_pred) / y_true)) * 100

mspe = mean_squared_percentage_error(y_test, y_pred)
print(f"Mean Squared Percentage Error (MSPE): {mspe}")

def mean_absolute_percentage_error(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

mape = mean_absolute_percentage_error(y_test, y_pred)
print(f"Mean Absolute Percentage Error (MAPE): {mape}")
data


Mean Squared Error (MSE): 153005.10672416983
R-squared (R2) score: 0.6115211574000745
Mean Absolute Error (MAE): 225.01934768853377
Mean Squared Percentage Error (MSPE): 37.36938295212233
Mean Absolute Percentage Error (MAPE): 29.611290460170935


Unnamed: 0,CPU Mark,GPU Mark,Monitor,RAM,Storage Amount,Encoded_Brand,Price,Encoded_OS,Resolution
0,28003,8274,15.6,64,3072.0,13,2319.00,10,2304000
1,21525,2690,15.6,16,512.0,5,579.99,13,2073600
2,22681,10181,15.6,16,1024.0,5,2319.00,13,2073600
3,29690,7320,14.5,32,1024.0,5,2183.27,13,5184000
4,24971,4253,15.6,8,1000.0,13,1067.21,10,2073600
...,...,...,...,...,...,...,...,...,...
5699,658,2,14.0,4,32.0,23,199.00,10,1049088
5700,2350,4,13.4,32,1024.0,13,1727.26,5,1049088
5701,230,4,14.6,12,512.0,40,299.99,3,2073600
5702,203,2,10.1,1,250.0,5,369.99,15,614400


In [4]:
# Assuming you have already trained and obtained the best_model using the previous code

# Load the new dataset for prediction
new_data = pd.read_csv('data/new_preprocess.csv')

# Preprocess the new data (assuming it has similar features as the training data)
X_new = new_data.drop(columns=['Price','Unnamed: 0','Weight'])  # Extract features

# Scale the new data using the same scaler from the training data 
X_new_scaled = scaler.transform(X_new)

# Make predictions using the trained model on the new dataset
predictions = best_model.predict(X_new_scaled)

# Add the predictions to the new dataset
new_data['Predicted_Price'] = predictions
# Show actual price and predicted price
new_data[['Predicted_Price', 'Price']]


Unnamed: 0,Predicted_Price,Price
0,1057.612523,1292.39
1,1028.704027,1049.00
2,502.490903,446.92
3,1137.854158,692.99
4,995.866031,1845.80
...,...,...
565,1070.731498,749.99
566,1997.541920,1950.00
567,296.306240,439.99
568,755.784995,629.79


In [5]:
mape = np.mean(np.abs((new_data['Price'] - new_data['Predicted_Price']) / new_data['Price'])) * 100

# Calculate accuracy
accuracy = 100 - mape

print(f"Accuracy: {accuracy:.2f}%")

Accuracy: 77.52%
