# 1. Import required libraries

In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

#for preprocessing
from sklearn.preprocessing import OneHotEncoder, StandardScaler, OrdinalEncoder , LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split

#for evaluation
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score , accuracy_score , classification_report,silhouette_score , precision_score


#models
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.svm import SVC


import warnings

# Ignore all warnings
warnings.filterwarnings("ignore")




# 2.Load dataset

In [4]:
file_path = r'C:\Users\Ahmed Ashraf\Desktop\archive (3)\laptop_price - dataset.csv' 
laptop_data = pd.read_csv(file_path)
laptop_data.head()

Unnamed: 0,Company,Product,TypeName,Inches,ScreenResolution,CPU_Company,CPU_Type,CPU_Frequency (GHz),RAM (GB),Memory,GPU_Company,GPU_Type,OpSys,Weight (kg),Price (Euro)
0,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel,Core i5,2.3,8,128GB SSD,Intel,Iris Plus Graphics 640,macOS,1.37,1339.69
1,Apple,Macbook Air,Ultrabook,13.3,1440x900,Intel,Core i5,1.8,8,128GB Flash Storage,Intel,HD Graphics 6000,macOS,1.34,898.94
2,HP,250 G6,Notebook,15.6,Full HD 1920x1080,Intel,Core i5 7200U,2.5,8,256GB SSD,Intel,HD Graphics 620,No OS,1.86,575.0
3,Apple,MacBook Pro,Ultrabook,15.4,IPS Panel Retina Display 2880x1800,Intel,Core i7,2.7,16,512GB SSD,AMD,Radeon Pro 455,macOS,1.83,2537.45
4,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel,Core i5,3.1,8,256GB SSD,Intel,Iris Plus Graphics 650,macOS,1.37,1803.6


# 3. Data Cleaning and Preprocessing

In [6]:
# Checking for missing values
print(laptop_data.isnull().sum())

# Handling categorical data
label_encoders = {}
for col in ['Company', 'TypeName', 'CPU_Company', 'CPU_Type', 'GPU_Company', 'GPU_Type', 'OpSys']:
    le = LabelEncoder()
    laptop_data[col] = le.fit_transform(laptop_data[col])
    label_encoders[col] = le

# Scaling numerical features
scaler = StandardScaler()
laptop_data[['Inches', 'CPU_Frequency (GHz)', 'RAM (GB)', 'Weight (kg)']] = scaler.fit_transform(
    laptop_data[['Inches', 'CPU_Frequency (GHz)', 'RAM (GB)', 'Weight (kg)']]
)



Company                0
Product                0
TypeName               0
Inches                 0
ScreenResolution       0
CPU_Company            0
CPU_Type               0
CPU_Frequency (GHz)    0
RAM (GB)               0
Memory                 0
GPU_Company            0
GPU_Type               0
OpSys                  0
Weight (kg)            0
Price (Euro)           0
dtype: int64


# 4.Splitting into features (X) and target (y)

In [7]:
X = laptop_data.drop(columns=['Price (Euro)', 'Product', 'ScreenResolution', 'Memory'])
y = laptop_data['Price (Euro)']

# Splitting into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train.shape, X_test.shape, y_train.shape, y_test.shape

((1020, 11), (255, 11), (1020,), (255,))

# 5.Model Training
We will use a Random Forest Regressor to train the model.

In [12]:
# Initialize the model
model = RandomForestRegressor(random_state=42)

# Train the model
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)

# Evaluate the model
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

print(f'Mean Absolute Error: {mae}')
print(f'Root Mean Squared Error: {rmse}')
print("R2-score: ", f"{(r2_score(y_test, y_pred) * 100):0.1f}", "%")


Mean Absolute Error: 184.53975466211133
Root Mean Squared Error: 271.10539850650775
R2-score:  85.2 %
