# LAPTOP PRICE PREDICTOR

In [82]:
!pip install numpy
!pip install pandas



In [83]:
import numpy as np
import pandas as pd

In [84]:
data = pd.read_csv('laptop_price.csv', encoding='latin-1')

In [85]:
# Data Analysis
data.head(2)

Unnamed: 0,laptop_ID,Company,Product,TypeName,Inches,ScreenResolution,Cpu,Ram,Gpu,OpSys,Weight,Price_euros
0,1,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 2.3GHz,8GB,Intel Iris Plus Graphics 640,macOS,1.37kg,1339.69
1,2,Apple,Macbook Air,Ultrabook,13.3,1440x900,Intel Core i5 1.8GHz,8GB,Intel HD Graphics 6000,macOS,1.34kg,898.94


In [86]:
data.shape

(1303, 12)

In [87]:
data.isnull().sum()

Unnamed: 0,0
laptop_ID,0
Company,0
Product,0
TypeName,0
Inches,0
ScreenResolution,0
Cpu,0
Ram,0
Gpu,0
OpSys,0


In [88]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1303 entries, 0 to 1302
Data columns (total 12 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   laptop_ID         1303 non-null   int64  
 1   Company           1303 non-null   object 
 2   Product           1303 non-null   object 
 3   TypeName          1303 non-null   object 
 4   Inches            1303 non-null   float64
 5   ScreenResolution  1303 non-null   object 
 6   Cpu               1303 non-null   object 
 7   Ram               1303 non-null   object 
 8   Gpu               1303 non-null   object 
 9   OpSys             1303 non-null   object 
 10  Weight            1303 non-null   object 
 11  Price_euros       1303 non-null   float64
dtypes: float64(2), int64(1), object(9)
memory usage: 122.3+ KB


# Data Preprocessing

In [89]:
# erase the "GB" in ram and "kg" in weight columns
data['Ram'] = data['Ram'].str.replace('GB','').astype('int32')
data['Weight'] = data['Weight'].str.replace('kg','').astype('float32')

In [91]:
data["Price_euros"]

Unnamed: 0,Price_euros
0,1339.69
1,898.94
2,575.00
3,2537.45
4,1803.60
...,...
1298,638.00
1299,1499.00
1300,229.00
1301,764.00


In [92]:
# Company Column
data['Company'].value_counts()

Unnamed: 0_level_0,count
Company,Unnamed: 1_level_1
Dell,297
Lenovo,297
HP,274
Asus,158
Acer,103
MSI,54
Toshiba,48
Apple,21
Samsung,9
Mediacom,7


In [93]:
# if company count is low, add other
def add_company(inpt):
    if inpt == 'Samsung' or inpt == 'Razer' or inpt == 'Mediacom' or inpt == 'Microsoft'or inpt == 'Xiaomi'or inpt == 'Vero'or inpt == 'Chuwi'or inpt == 'Google'or inpt == 'Fujitsu'or inpt == 'LG'or inpt == 'Huawei':
        return 'Other'
    else:
        return inpt

In [95]:
data['Company'] = data['Company'].apply(add_company)

In [96]:
data['Company'].value_counts()

Unnamed: 0_level_0,count
Company,Unnamed: 1_level_1
Dell,297
Lenovo,297
HP,274
Asus,158
Acer,103
MSI,54
Other,51
Toshiba,48
Apple,21


In [97]:
# Product column
len(data['Product'].value_counts())

618

In [98]:
# Type column
data['TypeName'].value_counts()

Unnamed: 0_level_0,count
TypeName,Unnamed: 1_level_1
Notebook,727
Gaming,205
Ultrabook,196
2 in 1 Convertible,121
Workstation,29
Netbook,25


In [99]:
# ScreenResolution Column
data['ScreenResolution'].value_counts()

Unnamed: 0_level_0,count
ScreenResolution,Unnamed: 1_level_1
Full HD 1920x1080,507
1366x768,281
IPS Panel Full HD 1920x1080,230
IPS Panel Full HD / Touchscreen 1920x1080,53
Full HD / Touchscreen 1920x1080,47
1600x900,23
Touchscreen 1366x768,16
Quad HD+ / Touchscreen 3200x1800,15
IPS Panel 4K Ultra HD 3840x2160,12
IPS Panel 4K Ultra HD / Touchscreen 3840x2160,11


In [101]:
# create new 2 columns for touchscreen and IPS ...
data['Touchscreen'] = data['ScreenResolution'].apply(lambda x:1 if 'Touchscreen' in x else 0)  # if "Touchscreen" word found, add 1 else 0
data['Ips'] = data['ScreenResolution'].apply(lambda x:1 if 'IPS' in x else 0)  # if "IPS" word found, add 1 else 0

In [103]:
# Cpu column
data['Cpu'].value_counts()

Unnamed: 0_level_0,count
Cpu,Unnamed: 1_level_1
Intel Core i5 7200U 2.5GHz,190
Intel Core i7 7700HQ 2.8GHz,146
Intel Core i7 7500U 2.7GHz,134
Intel Core i7 8550U 1.8GHz,73
Intel Core i5 8250U 1.6GHz,72
...,...
Intel Core i5 7200U 2.70GHz,1
Intel Core M M7-6Y75 1.2GHz,1
Intel Core M 6Y54 1.1GHz,1
AMD E-Series 9000 2.2GHz,1


In [104]:
# add only first 3 elements in CPU column into new Cpu name column
data['cpu_name'] = data['Cpu'].apply(lambda x:" ".join(x.split()[0:3]))

In [106]:
data['cpu_name'].value_counts()

Unnamed: 0_level_0,count
cpu_name,Unnamed: 1_level_1
Intel Core i7,527
Intel Core i5,423
Intel Core i3,136
Intel Celeron Dual,80
Intel Pentium Quad,27
Intel Core M,19
AMD A9-Series 9420,12
AMD A6-Series 9220,8
Intel Celeron Quad,8
AMD A12-Series 9720P,7


In [107]:
# in cpu 'name colum', low number cpu names changed to 'other'

def set_processor(name):
    if name == 'Intel Core i7' or name == 'Intel Core i5' or name == 'Intel Core i3':
        return name
    else:
        if name.split()[0] == 'AMD':
            return 'AMD'
        else:
            return 'Other'

In [108]:
data['cpu_name'] = data['cpu_name'].apply(set_processor)

In [109]:
data['cpu_name'].value_counts()

Unnamed: 0_level_0,count
cpu_name,Unnamed: 1_level_1
Intel Core i7,527
Intel Core i5,423
Other,155
Intel Core i3,136
AMD,62


In [111]:
# Ram column
data['Ram'].value_counts()

Unnamed: 0_level_0,count
Ram,Unnamed: 1_level_1
8,619
4,375
16,200
6,41
12,25
2,22
32,17
24,3
64,1


In [112]:
# Gpu column
data['Gpu'].value_counts()

Unnamed: 0_level_0,count
Gpu,Unnamed: 1_level_1
Intel HD Graphics 620,281
Intel HD Graphics 520,185
Intel UHD Graphics 620,68
Nvidia GeForce GTX 1050,66
Nvidia GeForce GTX 1060,48
...,...
Nvidia Quadro M500M,1
AMD Radeon R7 M360,1
Nvidia Quadro M3000M,1
Nvidia GeForce 960M,1


In [114]:
# reduce the GPU name lengths.. and add to  new column "gpu_name"
data['gpu_name'] = data['Gpu'].apply(lambda x:" ".join(x.split()[0:1]))

In [115]:
data['gpu_name'].value_counts()

Unnamed: 0_level_0,count
gpu_name,Unnamed: 1_level_1
Intel,722
Nvidia,400
AMD,180
ARM,1


In [119]:
data.shape

(1302, 16)

In [120]:
# remove ARAM (because only 1 value)
data = data[data['gpu_name'] != 'ARM']

In [121]:
data.shape

(1302, 16)

In [123]:
# Opsys column
data['OpSys'].value_counts()

Unnamed: 0_level_0,count
OpSys,Unnamed: 1_level_1
Windows 10,1072
No OS,66
Linux,62
Windows 7,45
Chrome OS,26
macOS,13
Mac OS X,8
Windows 10 S,8
Android,2


In [125]:
# operating system column
def set_os(inpt):
    if inpt == 'Windows 10' or inpt == 'Windows 7' or inpt == 'Windows 10 S':
        return 'Windows'
    elif inpt == 'macOS' or inpt == 'Mac OS X':
        return 'Mac'
    elif inpt == 'Linux':
        return inpt
    else:
        return 'Other'

In [126]:
data['OpSys'] = data['OpSys'].apply(set_os)

In [127]:
data['OpSys'].value_counts()

Unnamed: 0_level_0,count
OpSys,Unnamed: 1_level_1
Windows,1125
Other,94
Linux,62
Mac,21


In [128]:
# Drop unnescessary columns
data = data.drop(columns=['laptop_ID', 'Inches', 'Product', 'ScreenResolution', 'Cpu', 'Gpu'])

In [129]:
data.head()

Unnamed: 0,Company,TypeName,Ram,OpSys,Weight,Price_euros,Touchscreen,Ips,cpu_name,gpu_name
0,Apple,Ultrabook,8,Mac,1.37,1339.69,0,1,Intel Core i5,Intel
1,Apple,Ultrabook,8,Mac,1.34,898.94,0,0,Intel Core i5,Intel
2,HP,Notebook,8,Other,1.86,575.0,0,0,Intel Core i5,Intel
3,Apple,Ultrabook,16,Mac,1.83,2537.45,0,1,Intel Core i7,AMD
4,Apple,Ultrabook,8,Mac,1.37,1803.6,0,1,Intel Core i5,Intel


In [130]:
# one hot encoding
# convert all data into numerical..
data = pd.get_dummies(data)

In [131]:
data.shape

(1302, 32)

In [132]:
data

Unnamed: 0,Ram,Weight,Price_euros,Touchscreen,Ips,Company_Acer,Company_Apple,Company_Asus,Company_Dell,Company_HP,...,OpSys_Other,OpSys_Windows,cpu_name_AMD,cpu_name_Intel Core i3,cpu_name_Intel Core i5,cpu_name_Intel Core i7,cpu_name_Other,gpu_name_AMD,gpu_name_Intel,gpu_name_Nvidia
0,8,1.37,1339.69,0,1,False,True,False,False,False,...,False,False,False,False,True,False,False,False,True,False
1,8,1.34,898.94,0,0,False,True,False,False,False,...,False,False,False,False,True,False,False,False,True,False
2,8,1.86,575.00,0,0,False,False,False,False,True,...,True,False,False,False,True,False,False,False,True,False
3,16,1.83,2537.45,0,1,False,True,False,False,False,...,False,False,False,False,False,True,False,True,False,False
4,8,1.37,1803.60,0,1,False,True,False,False,False,...,False,False,False,False,True,False,False,False,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1298,4,1.80,638.00,1,1,False,False,False,False,False,...,False,True,False,False,False,True,False,False,True,False
1299,16,1.30,1499.00,1,1,False,False,False,False,False,...,False,True,False,False,False,True,False,False,True,False
1300,2,1.50,229.00,0,0,False,False,False,False,False,...,False,True,False,False,False,False,True,False,True,False
1301,6,2.19,764.00,0,0,False,False,False,False,True,...,False,True,False,False,False,True,False,True,False,False


In [134]:
# computes the correlation
# show the correlation between target column and other columns.
data.corr()['Price_euros']

Unnamed: 0,Price_euros
Ram,0.742905
Weight,0.209867
Price_euros,1.0
Touchscreen,0.192917
Ips,0.25332
Company_Acer,-0.208541
Company_Apple,0.080636
Company_Asus,-0.010568
Company_Dell,0.048245
Company_HP,-0.041566


# Divide data into Training and Testing

In [145]:
# Model building and selection
X = data.drop('Price_euros', axis=1)
y = data['Price_euros']

In [136]:
!pip install sklearn

Collecting sklearn
  Using cached sklearn-0.0.post12.tar.gz (2.6 kB)
  [1;31merror[0m: [1msubprocess-exited-with-error[0m
  
  [31m×[0m [32mpython setup.py egg_info[0m did not run successfully.
  [31m│[0m exit code: [1;36m1[0m
  [31m╰─>[0m See above for output.
  
  [1;35mnote[0m: This error originates from a subprocess, and is likely not a problem with pip.
  Preparing metadata (setup.py) ... [?25l[?25herror
[1;31merror[0m: [1mmetadata-generation-failed[0m

[31m×[0m Encountered error while generating package metadata.
[31m╰─>[0m See above for output.

[1;35mnote[0m: This is an issue with the package mentioned above, not pip.
[1;36mhint[0m: See above for details.


In [137]:
!pip install scikit-learn



In [146]:
# Divide dataset into training and testing set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

In [147]:
X_train.shape, X_test.shape

((1041, 31), (261, 31))

# Model Building

In [148]:
# model training
def model_acc(model):
    model.fit(X_train, y_train)
    acc = model.score(X_test, y_test)
    print(str(model)+ ' --> ' +str(acc))

In [149]:
# Train with different algorithms

from sklearn.linear_model import LinearRegression
lr = LinearRegression()
model_acc(lr)

from sklearn.linear_model import Lasso
lasso = Lasso()
model_acc(lasso)

from sklearn.tree import DecisionTreeRegressor
dt = DecisionTreeRegressor()
model_acc(dt)

from sklearn.ensemble import RandomForestRegressor
rf = RandomForestRegressor()
model_acc(rf)

LinearRegression() --> 0.7058360657408362
Lasso() --> 0.7061763460995345
DecisionTreeRegressor() --> 0.7110905744064842
RandomForestRegressor() --> 0.824588536035431


# Hyperparameter Tuning

In [150]:
# Hyperparameter testing

from sklearn.model_selection import GridSearchCV

parameters = {'n_estimators':[10, 50, 100],
              'criterion':['squared_error','absolute_error','poisson']}

grid_obj = GridSearchCV(estimator=rf, param_grid=parameters)

grid_fit = grid_obj.fit(X_train, y_train)

best_model = grid_fit.best_estimator_

best_model.score(X_test, y_test)

0.8255119449739822

In [151]:
X_test.columns

Index(['Ram', 'Weight', 'Touchscreen', 'Ips', 'Company_Acer', 'Company_Apple',
       'Company_Asus', 'Company_Dell', 'Company_HP', 'Company_Lenovo',
       'Company_MSI', 'Company_Other', 'Company_Toshiba',
       'TypeName_2 in 1 Convertible', 'TypeName_Gaming', 'TypeName_Netbook',
       'TypeName_Notebook', 'TypeName_Ultrabook', 'TypeName_Workstation',
       'OpSys_Linux', 'OpSys_Mac', 'OpSys_Other', 'OpSys_Windows',
       'cpu_name_AMD', 'cpu_name_Intel Core i3', 'cpu_name_Intel Core i5',
       'cpu_name_Intel Core i7', 'cpu_name_Other', 'gpu_name_AMD',
       'gpu_name_Intel', 'gpu_name_Nvidia'],
      dtype='object')

In [152]:
# Save model
import pickle
with open('predictor.pickle', 'wb') as file:
    pickle.dump(best_model, file)

# Check the Model

In [157]:
pred_value = best_model.predict([[16, 1.3, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0]])
pred_value



array([2094.01791667])

In [162]:
# price in sri lankan rupee
print("Price in LKR :", (pred_value * 342).item())

Price in LKR : 716154.1275000001
