In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import OneHotEncoder, MinMaxScaler, StandardScaler, OrdinalEncoder
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestRegressor

from sklearn.tree import DecisionTreeRegressor

from sklearn.metrics import mean_absolute_percentage_error

In [None]:
df = pd.read_csv("https://raw.githubusercontent.com/jamshid-ds/laptop_price_prediction/main/dataset_model_pipeline/laptops.csv")

In [None]:
# df.shape
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 11 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   CompanyName       1000 non-null   object 
 1   TypeOfLaptop      1000 non-null   object 
 2   Inches            1000 non-null   float64
 3   ScreenResolution  1000 non-null   object 
 4   Cpu               1000 non-null   object 
 5   Ram               1000 non-null   object 
 6   Memory            1000 non-null   object 
 7   Gpu               1000 non-null   object 
 8   OpSys             1000 non-null   object 
 9   Weight            1000 non-null   float64
 10  Price             1000 non-null   float64
dtypes: float64(3), object(8)
memory usage: 86.1+ KB


In [None]:
df.head()

Unnamed: 0,CompanyName,TypeOfLaptop,Inches,ScreenResolution,Cpu,Ram,Memory,Gpu,OpSys,Weight,Price
0,MSI,Business Laptop,17.04068,IPS Panel Retina Display 2560x1600,Intel Core i7,12GB,512GB SSD,Intel Iris Xe Graphics,Linux,2.064834,35844.099371
1,Chuwi,2 in 1 Convertible,16.542395,Full HD,Intel Core i5,12GB,128GB PCIe SSD,Intel Iris Xe Graphics,No OS,4.060656,37019.059051
2,hp,WorkStation,17.295294,Full HD,Intel Xeon E3-1505M,8GB,1TB HDD,Intel Iris Xe Graphics,Linux,2.901689,33329.360341
3,MSI,2 in 1 Convertible,11.526203,2K,Intel Core i7,16GB,512GB NVMe SSD,Intel Iris Xe Graphics,Windows 10,2.914843,68631.102486
4,Microsoft,Gaming,12.649634,Full HD,Intel Core i5,8GB,512GB SSD,AMD Radeon RX 5600M,Windows 10,4.341995,33842.479566


In [None]:
df['CompanyName'].value_counts()

MSI          128
Microsoft    118
Apple        113
lenevo       113
Asus         112
Chuwi        109
Acer         107
Dell         101
hp            99
Name: CompanyName, dtype: int64

In [None]:
df['TypeOfLaptop'].value_counts()

Business Laptop       176
WorkStation           175
Gaming                168
UltraBook             166
2 in 1 Convertible    162
NoteBook              153
Name: TypeOfLaptop, dtype: int64

In [None]:
df['ScreenResolution'].value_counts()

2K                                           181
4K                                           179
IPS Panel Full HD / Touchscreen 1920x1080    176
Full HD                                      162
HD 1920x1080                                 160
IPS Panel Retina Display 2560x1600           142
Name: ScreenResolution, dtype: int64

In [None]:
df['Cpu'].value_counts()

Intel Xeon E3-1505M               114
Intel Atom x5-Z8550               111
Intel Core i5                     105
Intel Pentium Quad Core N4200     101
Intel Celeron Dual Core 3855U      98
Intel Core i7                      96
Intel Core i9                      95
AMD Ryzen 5                        94
AMD Ryzen 7                        94
AMD A9-Series 9420                 92
Name: Cpu, dtype: int64

In [None]:
df['Ram'].value_counts()

8GB     270
12GB    247
4GB     243
16GB    240
Name: Ram, dtype: int64

In [None]:
df['Memory'].value_counts()

1TB HDD                105
2TB SATA SSD            63
1TB SSHD                61
4TB HDD                 60
512GB eMMC              59
128GB PCIe SSD          58
1TB NVMe SSD            55
256GB PCIe SSD          55
512GB NVMe SSD          54
512GB SSD               51
256GB SSD               50
2TB HDD                 50
1TB Fusion Drive        50
2TB NVMe SSD            49
256GB Flash Storage     46
6TB HDD                 45
256GB eMMC              45
128GB SSD               44
Name: Memory, dtype: int64

In [None]:
df['Gpu'].value_counts()

NVIDIA GeForce GTX 1650    348
AMD Radeon RX 5600M        339
Intel Iris Xe Graphics     313
Name: Gpu, dtype: int64

In [None]:
df['OpSys'].value_counts()

No OS         224
macOS         219
Windows 10    194
Linux         187
Windows 11    176
Name: OpSys, dtype: int64

In [None]:
df.isna().sum()

CompanyName         0
TypeOfLaptop        0
Inches              0
ScreenResolution    0
Cpu                 0
Ram                 0
Memory              0
Gpu                 0
OpSys               0
Weight              0
Price               0
dtype: int64

In [None]:
list(df.columns)

['CompanyName',
 'TypeOfLaptop',
 'Inches',
 'ScreenResolution',
 'Cpu',
 'Ram',
 'Memory',
 'Gpu',
 'OpSys',
 'Weight',
 'Price']

In [None]:
x = df.drop("Price",axis=1)
y = df["Price"]

In [None]:
train_x, test_x, train_y, test_y = train_test_split(x, y, test_size=0.2, random_state=3)

In [None]:
# test_y

In [None]:
#Pipeline

nums = ["Inches", "Weight"]
cats = ["CompanyName",	"TypeOfLaptop",	"Inches",	"ScreenResolution",	"Cpu",	"Ram",	"Memory",	"Gpu",	"OpSys"]

num_pipeline = Pipeline([
    ("sc", StandardScaler())
])

cats_pipeline = Pipeline([
    ('ohe', OrdinalEncoder())
])

full_pipeline = ColumnTransformer([
    ('nums', num_pipeline, nums),
    ('cats', cats_pipeline, cats),
])


In [None]:
train_x = full_pipeline.fit_transform(train_x)

In [None]:
train_x

array([[-0.19769109, -0.10080759,  8.        , ...,  9.        ,
         1.        ,  3.        ],
       [-1.21941384,  1.59013913,  5.        , ...,  5.        ,
         1.        ,  4.        ],
       [ 1.20666961, -0.96448658,  5.        , ...,  9.        ,
         1.        ,  2.        ],
       ...,
       [ 0.3799274 , -1.51442874,  8.        , ..., 13.        ,
         1.        ,  1.        ],
       [-0.34643915, -1.42112893,  5.        , ..., 12.        ,
         0.        ,  1.        ],
       [-0.77185255, -1.60558315,  7.        , ..., 17.        ,
         0.        ,  0.        ]])

In [None]:
#Model

dt_model = DecisionTreeRegressor()
dt_model.fit(train_x,train_y)

In [None]:
test_x = full_pipeline.fit_transform(test_x)

In [None]:
test_predict = dt_model.predict(test_x)

In [None]:
test_x.shape

(200, 11)

In [None]:
pd.DataFrame(test_y,test_predict)

Unnamed: 0,Price
56565.943161,
41785.607517,
61628.226153,
56565.943161,
30737.906553,
...,...
30718.822304,
31757.948098,
51687.767367,
30998.221490,


In [None]:
mean_absolute_percentage_error(test_y,test_predict)

0.3026104889996789

In [None]:
RF_model = RandomForestRegressor()
RF_model.fit(train_x, train_y)

In [None]:
test_predict_rf = RF_model.predict(test_x)

In [None]:
mean_absolute_percentage_error(test_y,test_predict_rf)

0.2136622577674332

In [None]:
import joblib
joblib.dump(RF_model, 'rf_model.joblib')


['rf_model.joblib']