In [1]:
# // p2112646
# Justin Wong Juin Hng
# DAAA/1B/04
from math import sqrt

import pandas as pd
import numpy as np
import sklearn as sk
from sklearn.impute import SimpleImputer
import plotly.express as px
from IPython.core.display import display
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import RobustScaler

pd.set_option('display.max_rows', 30)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

# Part B

This dataset contains computer price for different PC models, ranging from Apple, HP,
Dell, and others.

We want to build a machine learning model to predict the computer
price based on their configuration.
<br>

In [2]:
df = pd.read_csv('pc_data (regression).csv')
display(df.head())

Unnamed: 0,Product ID,Brand,Type,Screen Size,Screen Specs,CPU,RAM,Hard Disk,GPU,Operating System,Weight,Price ($)
0,0,Apple,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 2.3GHz,8GB,128GB SSD,Intel Iris Plus Graphics 640,macOS,1.37kg,3568.93416
1,1,Apple,Ultrabook,13.3,1440x900,Intel Core i5 1.8GHz,8GB,128GB Flash Storage,Intel HD Graphics 6000,macOS,1.34kg,2394.77616
2,2,HP,Notebook,15.6,Full HD 1920x1080,Intel Core i5 7200U 2.5GHz,8GB,256GB SSD,Intel HD Graphics 620,No OS,1.86kg,1531.8
3,3,Apple,Ultrabook,15.4,IPS Panel Retina Display 2880x1800,Intel Core i7 2.7GHz,16GB,512GB SSD,AMD Radeon Pro 455,macOS,1.83kg,6759.7668
4,4,Apple,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 3.1GHz,8GB,256GB SSD,Intel Iris Plus Graphics 650,macOS,1.37kg,4804.7904


## (A) Data Cleaning

### (A1) Drop Useless Columns



In [3]:
df= df.drop(['Product ID'],axis=1)
display(df.head())

Unnamed: 0,Brand,Type,Screen Size,Screen Specs,CPU,RAM,Hard Disk,GPU,Operating System,Weight,Price ($)
0,Apple,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 2.3GHz,8GB,128GB SSD,Intel Iris Plus Graphics 640,macOS,1.37kg,3568.93416
1,Apple,Ultrabook,13.3,1440x900,Intel Core i5 1.8GHz,8GB,128GB Flash Storage,Intel HD Graphics 6000,macOS,1.34kg,2394.77616
2,HP,Notebook,15.6,Full HD 1920x1080,Intel Core i5 7200U 2.5GHz,8GB,256GB SSD,Intel HD Graphics 620,No OS,1.86kg,1531.8
3,Apple,Ultrabook,15.4,IPS Panel Retina Display 2880x1800,Intel Core i7 2.7GHz,16GB,512GB SSD,AMD Radeon Pro 455,macOS,1.83kg,6759.7668
4,Apple,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 3.1GHz,8GB,256GB SSD,Intel Iris Plus Graphics 650,macOS,1.37kg,4804.7904


### (A2) Split Data & Feature Engineering

#### (A2.1) New Features
1. Screen
   1. DPI
   2. Aspect ratio
   3. Resolution
   4. Touchscreen
   5. TN or IPS
   6. Screen size

2. Laptop Brand

3. Laptop Type

4. GPU
   1. GPU Brand (Nvidia)
    - Default vs M vs MX vs GT vs GTX vs GTX MX vs Quadro gpu (find out purpose and document)
    - gpu generation
    - gpu quality

   2. GPU Brand (Intel)
    - Intel Graphics vs Intel HD vs Intel UHD vs Iris Plus vs Intel Iris Pro
    - gpu generation
    - gpu quality

   3. GPU Brand (AMD)
    - Radeon R# vs Radeon RX vs FirePro vs Radeon ##-Series vs Radeon Pro vs Radeon FX
    - gpu generation
    - gpu quality

5. Storage
    1. Base Storage Type (SSD or HDD or Flash Storage)
    2. Upgraded Storage (Yes or No)
        - SSD or HDD
        - Therefore, max storage

6. RAM

7. Operating System



In [4]:
# count number of laptop models per brand
print(df['Brand'].value_counts())


Lenovo       3532
Dell         3492
HP           3228
Asus         1844
Acer         1204
MSI           640
Toshiba       576
Apple         200
Samsung       108
Razer          84
Mediacom       84
Microsoft      68
Xiaomi         48
Vero           48
Google         36
Fujitsu        36
LG             36
Chuwi          32
Huawei         24
Name: Brand, dtype: int64


In [5]:
from sklearn.preprocessing import OneHotEncoder
import re
import math

In [6]:
# Feature Engineering Screen data
df['IPS Check'] = df['Screen Specs'].str.contains('IPS',case=False).astype('str')
df['IPS Check'] = df['IPS Check'].str.replace('False','TN Screen')
df['IPS Check'] = df['IPS Check'].str.replace('True','IPS Screen')
df['Touchscreen Check'] = df['Screen Specs'].str.contains('Touchscreen',case=False).astype('str')
df['Touchscreen Check'] = df['Touchscreen Check'].str.replace('False','No Touchscreen')
df['Touchscreen Check'] = df['Touchscreen Check'].str.replace('True','Touchscreen Available')
df['Screen Specs'] = df['Screen Specs'].str.replace('4K','')
df['Screen Width']=df['Screen Specs'].str.extract(r'(\d+)').astype(int)
df['Screen Height']=df['Screen Specs'].str.extract(r'(x\d+)')
df['Screen Height'] = df['Screen Height'].str.replace('x','').astype(int)
df['Aspect Ratio'] = df['Screen Width']/df['Screen Height']
df['Screen DPI'] = np.sqrt((df['Screen Width'])**2+(df['Screen Height'])**2)/df['Screen Size']

def res_check(row):
    if row['Screen Width'] * row['Screen Height'] < 1200000 :
        val = 'HD'
    elif 1200000 < row['Screen Width'] * row['Screen Height'] < 2200000:
        val = 'Full HD'
    elif 2200000 < row['Screen Width'] * row['Screen Height'] < 6000000:
        val = 'Quad HD'
    elif row['Screen Width'] * row['Screen Height'] > 7900000:
        val = 'Ultra HD'
    else:
        val = None
    return val

df['Resolution Type'] = df.apply(res_check,axis=1)


df = df[['Brand', 'Type', 'Screen Size', 'Screen Specs', 'Touchscreen Check','IPS Check', 'Screen Width', 'Screen Height', 'Aspect Ratio', 'Screen DPI','Resolution Type','CPU', 'RAM', 'Hard Disk', 'GPU', 'Operating System', 'Weight', 'Price ($)']]



In [7]:
# Feature Engineering CPU

#cpu brand
def cpu_brand(row):
    if 'AMD' in row['CPU']:
        val = 'AMD'
    elif 'Intel' in row['CPU']:
        val = 'Intel'

    else:
        val = None
    return val


df['CPU Brand']= df.apply(cpu_brand,axis=1)

## AMD
df['AMD A Series'] = df['CPU'].str.extract(r'(AMD [a-zA-Z]\d{1,2})').astype('str')
df['AMD A Series'].replace('nan', np.nan,inplace=True)

df['AMD E Series'] = df['CPU'].str.extract(r'(AMD E-Series \d)').astype('str')
df['AMD E Series'] = df['AMD E Series'].str.replace('-Series ','',regex=True,case=False)
df['AMD E Series'].replace('nan', np.nan,inplace=True)

df['AMD FX Series'] = df['CPU'].str.extract(r'([FX]{2} \d{1})').astype('str')
df['AMD FX Series'] = df['AMD FX Series'].str.replace('FX ','AMD FX ',regex=True,case=False)
df['AMD FX Series'].replace('nan', np.nan,inplace=True)


df['AMD Ryzen Series'] = df['CPU'].str.extract(r'(Ryzen \d{4})').astype('str')
df['AMD Ryzen Series'] = df['AMD Ryzen Series'].str.replace('Ryzen 1600','Ryzen 5',regex=True,case=False)
df['AMD Ryzen Series'] = df['AMD Ryzen Series'].str.replace('Ryzen 1700','Ryzen 7',regex=True,case=False)
df['AMD Ryzen Series'].replace('nan', np.nan,inplace=True)




##Intel

df['Intel Atom Series'] = df['CPU'].str.extract(r'(Atom [a-zA-Z]{1}\d{1}-[a-zA-Z]{1}\d{4}|Atom [a-zA-Z]{1}\d{4})').astype('str')
df['Intel Atom Series'] = df['Intel Atom Series'].str.replace('X5-','',regex=True,case=False)
df['Intel Atom Series'].replace('nan', np.nan,inplace=True)

df['Intel Celeron Series'] = df['CPU'].str.extract(r'(Celeron [a-zA-Z]{1,5} Core [a-zA-Z]{1}\d{4}|Celeron [a-zA-Z]{1,5} Core \d{4}[a-zA-Z]{1})').astype('str')
df['Intel Celeron Series'] = df['Intel Celeron Series'].str.replace('[a-zA-Z]{1,5} Core','',regex=True,case=False)
df['Intel Celeron Series'] = df['Intel Celeron Series'].str.replace('Celeron ','Celeron',regex=True,case=False)
df['Intel Celeron Series'] = df['Intel Celeron Series'].str.replace('Celeron \d{4}U','Celeron U',regex=True,case=False)
df['Intel Celeron Series'] = df['Intel Celeron Series'].str.replace('Celeron N\d{4}','Celeron N',regex=True,case=False)
df['Intel Celeron Series'].replace('nan', np.nan,inplace=True)

df['Intel Core M Series'] = df['CPU'].str.extract(r'(Core M [a-zA-Z]\d{1}-\d{1}|Core M [a-zA-Z]\d{1}|Core M|Core M \d{1}[a-zA-Z]{1}\d{1}\d{1})').astype('str')
df['Intel Core M Series'] = df['Intel Core M Series'].str.replace('m','M',regex=True,case=True)
df['Intel Core M Series'].replace('nan', np.nan,inplace=True)

df['Intel Core Series'] = df['CPU'].str.extract(r'(Intel Core [a-zA-Z]\d{1})').astype('str')
df['Intel Core Series'] = df['Intel Core Series'].str.replace('Intel Core ','',regex=True,case=True)
df['Intel Core Series'].replace('nan', np.nan,inplace=True)

df['Intel Core Series Generation'] = df['CPU'].str.extract(r'(Intel Core [a-zA-Z]\d{1} [5-9]{1})').astype('str')
df['Intel Core Series Generation'] = df['Intel Core Series Generation'].str.replace('Intel Core [a-zA-Z][2-9]{1} ','',regex=True,case=True)
df['Intel Core Series Generation'] = df['Intel Core Series Generation'].apply(lambda x:  "{}{}".format(x,'th gen') if x!='nan' else x)
df['Intel Core Series Generation'].replace('nan', np.nan,inplace=True)

df['Intel Core Series Performance Type'] = df['CPU'].str.extract(r'([a-zA-Z]\d \d{4}[a-zA-Z]{1,2}|[a-zA-Z]\d \dY)').astype('str')
df['Intel Core Series Performance Type'] = df['Intel Core Series Performance Type'].str.replace('[a-zA-Z]\d \d{4}','',regex=True,case=True)
df['Intel Core Series Performance Type'] = df['Intel Core Series Performance Type'].str.replace('[a-zA-Z]\d \d{1}','',regex=True,case=True)
df['Intel Core Series Performance Type'].replace('nan', np.nan,inplace=True)

df['Intel Pentium Core Type'] = df['CPU'].str.extract(r'(Intel Pentium [a-zA-Z]{1,5} Core \d{4}[a-zA-Z]{1}|Intel Pentium [a-zA-Z]{1,5} Core [a-zA-Z]{1})').astype('str')
df['Intel Pentium Core Type'] = df['Intel Pentium Core Type'].str.replace('Intel Pentium [a-zA-Z]{1,5} Core \d{4}','',regex=True,case=True)
df['Intel Pentium Core Type'] = df['Intel Pentium Core Type'].str.replace('Intel Pentium [a-zA-Z]{1,5} Core ','',regex=True,case=True)
df['Intel Pentium Core Type'] = df['Intel Pentium Core Type'].apply(lambda x:  "{}{}".format('Pentium ',x,) if x!='nan' else x)
df['Intel Pentium Core Type'].replace('nan', np.nan,inplace=True)


df['Intel Pentium Core Generation'] = df['CPU'].str.extract(r'(Pentium [a-zA-Z]{4} Core \d{1}|Pentium [a-zA-Z]{4} Core [a-zA-Z]{1}\d{1})').astype('str')
df['Intel Pentium Core Generation'] = df['Intel Pentium Core Generation'].str.replace('Pentium [a-zA-Z]{4} Core N','',regex=True,case=True)
df['Intel Pentium Core Generation'] = df['Intel Pentium Core Generation'].str.replace('Pentium [a-zA-Z]{4} Core ','',regex=True,case=True)
df['Intel Pentium Core Generation'] = df['Intel Pentium Core Generation'].apply(lambda x:  "{}{}".format(x,"th gen") if x!='nan' and x!='3' else x)
df['Intel Pentium Core Generation'] = df['Intel Pentium Core Generation'].apply(lambda x:  "{}{}".format(x,"rd gen") if x!='nan' and x=='3' else x)
df['Intel Pentium Core Generation'].replace('nan', np.nan,inplace=True)


df['Intel Xeon Version'] = df['CPU'].str.extract(r'(Intel Xeon E3-\d{4}[a-zA-Z]{1} [a-zA-Z]{1}\d{1})').astype('str')
df['Intel Xeon Version'] = df['Intel Xeon Version'].str.replace('Intel Xeon E3-\d{4}[a-zA-Z]{1} [a-zA-Z]{1}','',regex=True,case=True)
df['Intel Xeon Version'] = df['Intel Xeon Version'].apply(lambda x:  "{}{}{}".format('Xeon ',x,"th gen") if x!='nan' else x)
df['Intel Xeon Version'].replace('nan', np.nan,inplace=True)

df['CPU Base Clock'] = df['CPU'].str.extract(r'(\d*\.?\d+)GHz')



# core count check
def core_count(row):
    if row['AMD A Series'] == 'AMD A10':
        val = '4 Cores'
    elif row['AMD A Series'] == 'AMD A4':
        val = '4 Cores'
    elif row['AMD A Series'] == 'AMD A6':
        val = '2 Cores'
    elif row['AMD A Series'] == 'AMD A8':
        val = '4 Cores'
    elif row['AMD A Series'] == 'AMD A9':
        val = '2 Cores'
    elif row['AMD A Series'] == 'AMD A12':
        val = '4 Cores'
    elif row['AMD E Series'] == 'AMD E6':
        val = '2 Cores'
    elif row['AMD E Series'] == 'AMD E7':
        val = '2 Cores'
    elif row['AMD E Series'] == 'AMD E9':
        val = '4 Cores'
    elif row['AMD FX Series'] == 'AMD FX 8':
        val = '8 Cores'
    elif row['AMD FX Series'] == 'AMD FX 9':
        val = '8 Cores'
    elif row['AMD Ryzen Series'] == 'Ryzen 5':
        val = '4 Cores'
    elif row['AMD Ryzen Series'] == 'Ryzen 7':
        val = '6 Cores'
    elif row['Intel Atom Series'] == 'Atom Z8300':
        val = '2 Cores'
    elif row['Intel Atom Series'] == 'Atom Z8350':
        val = '2 Cores'
    elif row['Intel Atom Series'] == 'Atom Z8550':
        val = '2 Cores'
    elif row['Intel Celeron Series'] == 'Celeron N':
        val = '2 Cores'
    elif row['Intel Celeron Series'] == 'Celeron U':
        val = '2 Cores'
    elif row['Intel Core M Series'] == 'Core M M3-7':
        val = '2 Cores'
    elif row['Intel Core M Series'] == 'Core M':
        val = '2 Cores'
    elif row['Intel Core M Series'] == 'Core M M3':
        val = '2 Cores'
    elif row['Intel Core M Series'] == 'Core M M3-6':
        val = '2 Cores'
    elif row['Intel Core M Series'] == 'Core M M7-6':
        val = '2 Cores'
    elif row['Intel Core Series'] == 'i3':
        val = '2 Cores'
    elif row['Intel Core Series'] == 'i5':
        val = '4 Cores'
    elif row['Intel Core Series'] == 'i7':
        val = '6 Cores'
    elif row['Intel Pentium Core Type'] == 'Pentium N':
        val = '4 Cores'
    elif row['Intel Pentium Core Type'] == 'Pentium U':
        val = '4 Cores'
    elif row['Intel Pentium Core Type'] == 'Pentium Y':
        val = '4 Cores'
    elif row['Intel Xeon Version'] == 'Xeon 5th gen':
        val = '4 Cores'
    elif row['Intel Xeon Version'] == 'Xeon 6th gen':
        val = '4 Cores'
    else:
        val = None
    return val

df['Core Count'] = df.apply(core_count,axis=1).astype('str')

df = df[df['Core Count']!='None']


df = df[['Brand', 'Type', 'Screen Size', 'Touchscreen Check', 'IPS Check', 'Screen Width', 'Screen Height', 'Aspect Ratio', 'Screen DPI', 'Resolution Type', 'CPU Base Clock', 'Core Count','CPU', 'CPU Brand','AMD A Series', 'AMD E Series', 'AMD FX Series', 'AMD Ryzen Series', 'Intel Atom Series', 'Intel Celeron Series', 'Intel Core M Series', 'Intel Core Series', 'Intel Core Series Generation', 'Intel Core Series Performance Type', 'Intel Pentium Core Type', 'Intel Pentium Core Generation', 'Intel Xeon Version', 'RAM', 'Hard Disk', 'GPU', 'Operating System', 'Weight', 'Price ($)']]

df['Screen Size'] =df['Screen Size'].astype('float')
df['Touchscreen Check'] =df['Touchscreen Check'].astype('str')
df['IPS Check'] =df['IPS Check'].astype('str')
df['Screen Width'] =df['Screen Width'].astype('int')
df['Screen Height'] =df['Screen Height'].astype('int')
df['Aspect Ratio'] =df['Aspect Ratio'].astype('float')
df['Screen DPI'] =df['Screen DPI'].astype('float')
df['CPU Base Clock'] =df['CPU Base Clock'].astype('float')



df['AMD Series'] = df['AMD A Series'].fillna(df['AMD E Series'])
df['AMD Series'] = df['AMD Series'].fillna(df['AMD FX Series'])
df['AMD Series'] = df['AMD Series'].fillna(df['AMD Ryzen Series'])
df = df.drop(['AMD Ryzen Series','AMD FX Series','AMD E Series','AMD A Series'],axis=1)

df['Intel Series'] = df['Intel Atom Series'].combine_first(df['Intel Celeron Series'])
df['Intel Series'] = df['Intel Series'].combine_first(df['Intel Core M Series'])
df['Intel Series'] = df['Intel Series'].combine_first(df['Intel Core Series'])
df['Intel Series'] = df['Intel Series'].combine_first(df['Intel Pentium Core Type'])
df['Intel Series'] = df['Intel Series'].combine_first(df['Intel Xeon Version'])

df['Intel Generation'] = df['Intel Core Series Generation'].combine_first(df['Intel Pentium Core Generation']).astype('str')
df['Intel Generation'].replace('nan', np.nan,inplace=True)

df['Intel Core Series Performance Type'] = df['Intel Core Series Performance Type'].astype('str')
df['Intel Core Series Performance Type'].replace('nan', np.nan,inplace=True)


df = df.drop(['Intel Celeron Series','Intel Atom Series','Intel Core M Series','Intel Core Series','Intel Pentium Core Type'],axis=1)
df['CPU Series'] = df['AMD Series'].fillna(df['Intel Series'])
df = df.drop(['AMD Series','Intel Series'],axis=1)


display(df)

Unnamed: 0,Brand,Type,Screen Size,Touchscreen Check,IPS Check,Screen Width,Screen Height,Aspect Ratio,Screen DPI,Resolution Type,CPU Base Clock,Core Count,CPU,CPU Brand,Intel Core Series Generation,Intel Core Series Performance Type,Intel Pentium Core Generation,Intel Xeon Version,RAM,Hard Disk,GPU,Operating System,Weight,Price ($),Intel Generation,CPU Series
0,Apple,Ultrabook,13.3,No Touchscreen,IPS Screen,2560,1600,1.600000,226.983005,Quad HD,2.3,4 Cores,Intel Core i5 2.3GHz,Intel,,,,,8GB,128GB SSD,Intel Iris Plus Graphics 640,macOS,1.37kg,3568.93416,,i5
1,Apple,Ultrabook,13.3,No Touchscreen,TN Screen,1440,900,1.600000,127.677940,Full HD,1.8,4 Cores,Intel Core i5 1.8GHz,Intel,,,,,8GB,128GB Flash Storage,Intel HD Graphics 6000,macOS,1.34kg,2394.77616,,i5
2,HP,Notebook,15.6,No Touchscreen,TN Screen,1920,1080,1.777778,141.211998,Full HD,2.5,4 Cores,Intel Core i5 7200U 2.5GHz,Intel,7th gen,U,,,8GB,256GB SSD,Intel HD Graphics 620,No OS,1.86kg,1531.80000,7th gen,i5
3,Apple,Ultrabook,15.4,No Touchscreen,IPS Screen,2880,1800,1.600000,220.534624,Quad HD,2.7,6 Cores,Intel Core i7 2.7GHz,Intel,,,,,16GB,512GB SSD,AMD Radeon Pro 455,macOS,1.83kg,6759.76680,,i7
4,Apple,Ultrabook,13.3,No Touchscreen,IPS Screen,2560,1600,1.600000,226.983005,Quad HD,3.1,4 Cores,Intel Core i5 3.1GHz,Intel,,,,,8GB,256GB SSD,Intel Iris Plus Graphics 650,macOS,1.37kg,4804.79040,,i5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15315,Lenovo,2 in 1 Convertible,14.0,Touchscreen Available,IPS Screen,1920,1080,1.777778,157.350512,Full HD,2.5,6 Cores,Intel Core i7 6500U 2.5GHz,Intel,6th gen,U,,,4GB,128GB SSD,Intel HD Graphics 520,Windows 10,1.8kg,1706.40200,6th gen,i7
15316,Lenovo,2 in 1 Convertible,13.3,Touchscreen Available,IPS Screen,3200,1800,1.777778,276.053530,Quad HD,2.5,6 Cores,Intel Core i7 6500U 2.5GHz,Intel,6th gen,U,,,16GB,512GB SSD,Intel HD Graphics 520,Windows 10,1.3kg,4000.10600,6th gen,i7
15317,Lenovo,Notebook,14.0,No Touchscreen,TN Screen,1366,768,1.778646,111.935204,HD,1.6,2 Cores,Intel Celeron Dual Core N3050 1.6GHz,Intel,,,,,2GB,64GB Flash Storage,Intel HD Graphics,Windows 10,1.5kg,616.82600,,Celeron N
15318,HP,Notebook,15.6,No Touchscreen,TN Screen,1366,768,1.778646,100.454670,HD,2.5,6 Cores,Intel Core i7 6500U 2.5GHz,Intel,6th gen,U,,,6GB,1TB HDD,AMD Radeon R5 M330,Windows 10,2.19kg,2042.06600,6th gen,i7


In [8]:
 # feature engineering RAM

df['RAM'] = df['RAM'].str.replace('GB','',regex=True,case=True)
df['RAM'] =df['RAM'].astype('int')


In [9]:
display(df)

Unnamed: 0,Brand,Type,Screen Size,Touchscreen Check,IPS Check,Screen Width,Screen Height,Aspect Ratio,Screen DPI,Resolution Type,CPU Base Clock,Core Count,CPU,CPU Brand,Intel Core Series Generation,Intel Core Series Performance Type,Intel Pentium Core Generation,Intel Xeon Version,RAM,Hard Disk,GPU,Operating System,Weight,Price ($),Intel Generation,CPU Series
0,Apple,Ultrabook,13.3,No Touchscreen,IPS Screen,2560,1600,1.600000,226.983005,Quad HD,2.3,4 Cores,Intel Core i5 2.3GHz,Intel,,,,,8,128GB SSD,Intel Iris Plus Graphics 640,macOS,1.37kg,3568.93416,,i5
1,Apple,Ultrabook,13.3,No Touchscreen,TN Screen,1440,900,1.600000,127.677940,Full HD,1.8,4 Cores,Intel Core i5 1.8GHz,Intel,,,,,8,128GB Flash Storage,Intel HD Graphics 6000,macOS,1.34kg,2394.77616,,i5
2,HP,Notebook,15.6,No Touchscreen,TN Screen,1920,1080,1.777778,141.211998,Full HD,2.5,4 Cores,Intel Core i5 7200U 2.5GHz,Intel,7th gen,U,,,8,256GB SSD,Intel HD Graphics 620,No OS,1.86kg,1531.80000,7th gen,i5
3,Apple,Ultrabook,15.4,No Touchscreen,IPS Screen,2880,1800,1.600000,220.534624,Quad HD,2.7,6 Cores,Intel Core i7 2.7GHz,Intel,,,,,16,512GB SSD,AMD Radeon Pro 455,macOS,1.83kg,6759.76680,,i7
4,Apple,Ultrabook,13.3,No Touchscreen,IPS Screen,2560,1600,1.600000,226.983005,Quad HD,3.1,4 Cores,Intel Core i5 3.1GHz,Intel,,,,,8,256GB SSD,Intel Iris Plus Graphics 650,macOS,1.37kg,4804.79040,,i5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15315,Lenovo,2 in 1 Convertible,14.0,Touchscreen Available,IPS Screen,1920,1080,1.777778,157.350512,Full HD,2.5,6 Cores,Intel Core i7 6500U 2.5GHz,Intel,6th gen,U,,,4,128GB SSD,Intel HD Graphics 520,Windows 10,1.8kg,1706.40200,6th gen,i7
15316,Lenovo,2 in 1 Convertible,13.3,Touchscreen Available,IPS Screen,3200,1800,1.777778,276.053530,Quad HD,2.5,6 Cores,Intel Core i7 6500U 2.5GHz,Intel,6th gen,U,,,16,512GB SSD,Intel HD Graphics 520,Windows 10,1.3kg,4000.10600,6th gen,i7
15317,Lenovo,Notebook,14.0,No Touchscreen,TN Screen,1366,768,1.778646,111.935204,HD,1.6,2 Cores,Intel Celeron Dual Core N3050 1.6GHz,Intel,,,,,2,64GB Flash Storage,Intel HD Graphics,Windows 10,1.5kg,616.82600,,Celeron N
15318,HP,Notebook,15.6,No Touchscreen,TN Screen,1366,768,1.778646,100.454670,HD,2.5,6 Cores,Intel Core i7 6500U 2.5GHz,Intel,6th gen,U,,,6,1TB HDD,AMD Radeon R5 M330,Windows 10,2.19kg,2042.06600,6th gen,i7


In [10]:
# feature engineering hard disk
print(np.unique(df['Hard Disk']))

# splitting into base and additional
df[['Base Storage','Additional Storage']] =df['Hard Disk'].str.split('+',expand=True).astype('str')
df['Additional Storage'] =df['Additional Storage'].str.replace('  ','',regex=True)
df['Additional Storage'].replace('nan', np.nan,inplace=True)



## base storage space number (128, 256, 1)
df['Base Storage Space Value'] = df['Base Storage'].str.extract(r'(\d*\.\d{1,4}|\d{1,3})').astype('str')
df['Base Storage Space Value'] = df['Base Storage Space Value'].str.replace('1.0','1',regex=False).astype('int')


## base storage size type (GB,TB)
df['Base Storage Size Type'] = df['Base Storage'].str.extract(r'(\d*\.\d{1,4}[A-Z]{2}|\d{1,3}[A-Z]{2})').astype('str')
df['Base Storage Size Type'] = df['Base Storage Size Type'].str.replace('\d*\.\d{1,4}|\d{1,3}','',regex=True)


## base storage type
df['Base Storage Type'] = df['Base Storage'].astype('str')
df['Base Storage Type'] = df['Base Storage Type'].str.replace('\d*\.\d{1,4}[A-Z]{2} |\d{1,3}[A-Z]{2} |\d{1,3}[A-Z]{2} |\d{1,3}[A-Z]{2} ','',regex=True)
df['Base Storage Type'] = df['Base Storage Type'].str.replace('Flash Storage ','Flash Storage',regex=True)
df['Base Storage Type'] = df['Base Storage Type'].str.replace('SSD ','SSD',regex=True)
df['Base Storage Type'] = df['Base Storage Type'].str.replace('HDD ','HDD',regex=True)


## additional (if applicable) storage space number (128, 256, 1)
df['Additional Storage Space Value'] = df['Additional Storage'].str.extract(r'(\d*\.\d{1,4}|\d{1,3})').astype('str')
df['Additional Storage Space Value'] = df['Additional Storage Space Value'].str.replace('1.0','1',regex=True).astype('float')
df['Additional Storage Space Value'].replace('nan', np.nan,inplace=True)

## additional (if applicable) storage size type (GB,TB)
df['Additional Storage Size Type'] = df['Additional Storage'].str.extract(r'(\d*\.\d{1,4}[A-Z]{2}|\d{1,3}[A-Z]{2})').astype('str')
df['Additional Storage Size Type'] = df['Additional Storage Size Type'].str.replace('\d*\.\d{1,4}|\d{1,3}','',regex=True)
df['Additional Storage Size Type'].replace('nan', np.nan,inplace=True)

## additional (if applicable) storage  type
df['Additional Storage Type'] = df['Additional Storage'].astype('str')
df['Additional Storage Type'] = df['Additional Storage Type'].str.replace('\d*\.\d{1,4}[A-Z]{2} |\d{1,3}[A-Z]{2} |\d{1,3}[A-Z]{2} |\d{1,3}[A-Z]{2} ','',regex=True)
df['Additional Storage Type'].replace('None', np.nan,inplace=True)



df = df[['Brand', 'Type', 'Screen Size', 'Touchscreen Check', 'IPS Check', 'Screen Width', 'Screen Height', 'Aspect Ratio', 'Screen DPI', 'Resolution Type', 'CPU Base Clock', 'Core Count','CPU', 'CPU Brand','CPU Series', 'Intel Generation', 'Intel Core Series Performance Type', 'RAM', 'Base Storage Space Value', 'Hard Disk','Base Storage Size Type', 'Base Storage Type', 'Additional Storage Space Value', 'Additional Storage Size Type', 'Additional Storage Type', 'GPU', 'Operating System', 'Weight', 'Price ($)']]

display(df)

['1.0TB HDD' '1.0TB Hybrid' '128GB Flash Storage' '128GB HDD' '128GB SSD'
 '128GB SSD +  1TB HDD' '128GB SSD +  2TB HDD' '16GB Flash Storage'
 '16GB SSD' '180GB SSD' '1TB HDD' '1TB HDD +  1TB HDD' '1TB SSD'
 '1TB SSD +  1TB HDD' '240GB SSD' '256GB Flash Storage' '256GB SSD'
 '256GB SSD +  1.0TB Hybrid' '256GB SSD +  1TB HDD'
 '256GB SSD +  256GB SSD' '256GB SSD +  2TB HDD' '256GB SSD +  500GB HDD'
 '2TB HDD' '32GB Flash Storage' '32GB HDD' '32GB SSD' '500GB HDD'
 '508GB Hybrid' '512GB Flash Storage' '512GB SSD'
 '512GB SSD +  1.0TB Hybrid' '512GB SSD +  1TB HDD'
 '512GB SSD +  256GB SSD' '512GB SSD +  2TB HDD' '512GB SSD +  512GB SSD'
 '64GB Flash Storage' '64GB Flash Storage +  1TB HDD' '64GB SSD' '8GB SSD']


Unnamed: 0,Brand,Type,Screen Size,Touchscreen Check,IPS Check,Screen Width,Screen Height,Aspect Ratio,Screen DPI,Resolution Type,CPU Base Clock,Core Count,CPU,CPU Brand,CPU Series,Intel Generation,Intel Core Series Performance Type,RAM,Base Storage Space Value,Hard Disk,Base Storage Size Type,Base Storage Type,Additional Storage Space Value,Additional Storage Size Type,Additional Storage Type,GPU,Operating System,Weight,Price ($)
0,Apple,Ultrabook,13.3,No Touchscreen,IPS Screen,2560,1600,1.600000,226.983005,Quad HD,2.3,4 Cores,Intel Core i5 2.3GHz,Intel,i5,,,8,128,128GB SSD,GB,SSD,,,,Intel Iris Plus Graphics 640,macOS,1.37kg,3568.93416
1,Apple,Ultrabook,13.3,No Touchscreen,TN Screen,1440,900,1.600000,127.677940,Full HD,1.8,4 Cores,Intel Core i5 1.8GHz,Intel,i5,,,8,128,128GB Flash Storage,GB,Flash Storage,,,,Intel HD Graphics 6000,macOS,1.34kg,2394.77616
2,HP,Notebook,15.6,No Touchscreen,TN Screen,1920,1080,1.777778,141.211998,Full HD,2.5,4 Cores,Intel Core i5 7200U 2.5GHz,Intel,i5,7th gen,U,8,256,256GB SSD,GB,SSD,,,,Intel HD Graphics 620,No OS,1.86kg,1531.80000
3,Apple,Ultrabook,15.4,No Touchscreen,IPS Screen,2880,1800,1.600000,220.534624,Quad HD,2.7,6 Cores,Intel Core i7 2.7GHz,Intel,i7,,,16,512,512GB SSD,GB,SSD,,,,AMD Radeon Pro 455,macOS,1.83kg,6759.76680
4,Apple,Ultrabook,13.3,No Touchscreen,IPS Screen,2560,1600,1.600000,226.983005,Quad HD,3.1,4 Cores,Intel Core i5 3.1GHz,Intel,i5,,,8,256,256GB SSD,GB,SSD,,,,Intel Iris Plus Graphics 650,macOS,1.37kg,4804.79040
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15315,Lenovo,2 in 1 Convertible,14.0,Touchscreen Available,IPS Screen,1920,1080,1.777778,157.350512,Full HD,2.5,6 Cores,Intel Core i7 6500U 2.5GHz,Intel,i7,6th gen,U,4,128,128GB SSD,GB,SSD,,,,Intel HD Graphics 520,Windows 10,1.8kg,1706.40200
15316,Lenovo,2 in 1 Convertible,13.3,Touchscreen Available,IPS Screen,3200,1800,1.777778,276.053530,Quad HD,2.5,6 Cores,Intel Core i7 6500U 2.5GHz,Intel,i7,6th gen,U,16,512,512GB SSD,GB,SSD,,,,Intel HD Graphics 520,Windows 10,1.3kg,4000.10600
15317,Lenovo,Notebook,14.0,No Touchscreen,TN Screen,1366,768,1.778646,111.935204,HD,1.6,2 Cores,Intel Celeron Dual Core N3050 1.6GHz,Intel,Celeron N,,,2,64,64GB Flash Storage,GB,Flash Storage,,,,Intel HD Graphics,Windows 10,1.5kg,616.82600
15318,HP,Notebook,15.6,No Touchscreen,TN Screen,1366,768,1.778646,100.454670,HD,2.5,6 Cores,Intel Core i7 6500U 2.5GHz,Intel,i7,6th gen,U,6,1,1TB HDD,TB,HDD,,,,AMD Radeon R5 M330,Windows 10,2.19kg,2042.06600


In [11]:
# feature engineer weight column

def weight_check(row):
    if row['Weight'] <1:
        val = 'Ultralight'
    elif 1< row['Weight']<2:
        val = 'Business'
    elif 2 <  row['Weight'] < 3:
        val = 'Heavy Duty / Gaming'
    elif row['Weight'] > 3:
        val = 'Workstation'
    else:
        val = np.NaN
    return val

df['Weight'] = df['Weight'].str.replace('kg','',regex=True,case=False).astype(float)
df['Weight Category'] = df.apply(weight_check, axis=1)

In [12]:
# feature engineer gpu

def gpu_brand(row):
    if 'AMD' in row['GPU']:
        val = 'AMD'
    elif 'Intel' in row['GPU']:
        val = 'Intel'
    elif 'Nvidia' in row['GPU']:
        val = 'Nvidia'
    else:
        val = None
    return val

def default_replace(row):
    if row['GPU'] == 'AMD Radeon R2':
        val = 'AMD Radeon R2 Default'
    elif row['GPU'] == 'AMD Radeon R3':
        val = 'AMD Radeon R3 Default'
    elif row['GPU'] == 'AMD Radeon R4':
        val = 'AMD Radeon R4 Default'
    elif row['GPU'] == 'AMD Radeon R5':
        val = 'AMD Radeon R5 Default'
    elif row['GPU'] == 'AMD Radeon R7':
        val = 'AMD Radeon R7 Default'
    elif row['GPU'] == 'Nvidia GeForce GTX 960<U+039C>':
        val = 'Nvidia GeForce GTX 960M'
    elif row['GPU'] == 'Intel HD Graphics':
        val = 'Intel HD Graphics Default'
    else:
        val = row['GPU']
    return val

df['GPU Brand']= df.apply(gpu_brand,axis=1)
df = df[['Brand', 'Type', 'Screen Size', 'Touchscreen Check', 'IPS Check', 'Screen Width', 'Screen Height', 'Aspect Ratio', 'Screen DPI', 'Resolution Type', 'CPU Base Clock', 'Core Count', 'CPU', 'CPU Brand', 'CPU Series', 'Intel Generation', 'Intel Core Series Performance Type', 'RAM', 'Base Storage Space Value', 'Hard Disk','Base Storage Size Type', 'Base Storage Type', 'Additional Storage Space Value', 'Additional Storage Size Type', 'Additional Storage Type', 'GPU', 'GPU Brand', 'Operating System', 'Weight', 'Weight Category','Price ($)']]

df['GPU'] = df['GPU'].str.replace('AMD R4 Graphics','AMD R4 Graphics Beema')
df['GPU'] = df['GPU'].str.replace('R17M-M1-70','AMD Radeon R7')
df['GPU'] = df['GPU'].str.replace('AMD Radeon R7 Graphics','AMD Radeon R7')
df['GPU'] = df['GPU'].str.replace('AMD Radeon R2 Graphics','AMD Radeon R2')
df['GPU'] = df['GPU'].str.replace('AMD Radeon R4 Graphics','AMD Radeon R4')
df['GPU'] = df['GPU'].str.replace('Nvidia GeForce 920MX ','Nvidia GeForce 920MX')
df['GPU'] = df['GPU'].str.replace('Nvidia GeForce 930MX ','Nvidia GeForce 930MX')
df['GPU'] = df['GPU'].str.replace('Intel Graphics 620','Intel HD Graphics 620')
df['GPU'] = df['GPU'].str.replace('Intel HD Graphics 620 ','Intel HD Graphics 620')
df['GPU'] = df['GPU'].str.replace('Nvidia GeForce GTX 1050 Ti','Nvidia GeForce GTX 1050Ti')
df['GPU'] = df['GPU'].str.replace('Nvidia GeForce GTX 980 ','Nvidia GeForce GTX 980')
df['GPU'] = df['GPU'].str.replace('AMD AMD Radeon R7','AMD Radeon R7')
df['GPU'] = df['GPU'].str.replace('AMD FirePro W4190M ','AMD FirePro W4190M')
df['GPU'] = df['GPU'].str.replace('AMD R4 Graphics Beema','AMD Radeon R4 Beema')
df['GPU'] = df['GPU'].str.replace('Nvidia GTX 980 SLI','Nvidia GeForce GTX 980')
df['GPU'] = df['GPU'].str.replace('Nvidia GeForce GTX1050 Ti','Nvidia GeForce GTX 1050Ti')
df['GPU'] = df['GPU'].str.replace('Nvidia GeForce GTX1060','Nvidia GeForce GTX 1060')
df['GPU'] = df['GPU'].str.replace('Nvidia GeForce GTX1080','Nvidia GeForce GTX 1080')
df['GPU'] = df['GPU'].str.replace('Intel HD Graphics 5300','Intel HD Graphics 530')
df['GPU'] = df['GPU'].str.replace('Intel HD Graphics 6000','Intel HD Graphics 600')
df['GPU'] = df['GPU'].str.replace('Intel Iris Pro Graphics','Intel Iris Pro Graphics 6200')

df['GPU']= df.apply(default_replace,axis=1)
df['GPU'] = df['GPU'].str.replace('Nvidia GeForce GTX 960<U+039C>','Nvidia GeForce GTX 960M')




In [13]:
# continuation of gpu feature engineering

df['AMD FirePro Series'] = df['GPU'].str.extract(r'(AMD FirePro)').astype('str')
df['AMD FirePro Series'] = df['AMD FirePro Series'].str.replace('AMD ','')
df['AMD FirePro Series'].replace('nan', np.nan,inplace=True)

df['AMD FirePro Model'] = df['GPU'].str.extract(r'(AMD FirePro [A-Z]{1}\d{4}[A-Z]{1})').astype('str')
df['AMD FirePro Model'] = df['AMD FirePro Model'].str.replace('AMD FirePro ','')
df['AMD FirePro Model'].replace('nan', np.nan,inplace=True)


df['AMD Radeon Pro Series'] = df['GPU'].str.extract(r'(AMD Radeon Pro)').astype('str')
df['AMD Radeon Pro Series'] = df['AMD Radeon Pro Series'].str.replace('AMD ','')
df['AMD Radeon Pro Series'].replace('nan', np.nan,inplace=True)

df['AMD Radeon Pro Model'] = df['GPU'].str.extract(r'(AMD Radeon Pro \d{3})').astype('str')
df['AMD Radeon Pro Model'] = df['AMD Radeon Pro Model'].str.replace('AMD Radeon Pro ','')
df['AMD Radeon Pro Model'].replace('nan', np.nan,inplace=True)


df['AMD Radeon R Series'] = df['GPU'].str.extract(r'(AMD Radeon R)').astype('str')
df['AMD Radeon R Series'] = df['AMD Radeon R Series'].str.replace('AMD ','')
df['AMD Radeon R Series'].replace('nan', np.nan,inplace=True)

df['AMD Radeon R Model'] = df['GPU'].str.extract(r'(AMD Radeon R\d{1}|AMD Radeon R[A-Z]{1})').astype('str')
df['AMD Radeon R Model'] = df['AMD Radeon R Model'].str.replace('AMD Radeon ','')
df['AMD Radeon R Model'].replace('nan', np.nan,inplace=True)

df['AMD Radeon Series'] = df['GPU'].str.extract(r'(AMD Radeon)').astype('str')
df['AMD Radeon Series'] = df['AMD Radeon Series'].str.replace('AMD ','')
df['AMD Radeon Series'].replace('nan', np.nan,inplace=True)

df['AMD Radeon Model'] = df['GPU'].str.extract(r'(AMD Radeon \d{3})').astype('str')
df['AMD Radeon Model'] = df['AMD Radeon Model'].str.replace('AMD Radeon ','')
df['AMD Radeon Model'].replace('nan', np.nan,inplace=True)


# dont join with the rest
df['AMD Radeon R Model Number'] = df['GPU'].str.extract(r'(AMD Radeon R\d{1} \d{3}|AMD Radeon R\d{1} [a-zA-Z]{1}\d{3}[a-zA-Z]|AMD Radeon R\d{1} [a-zA-Z]{1}\d{3}|AMD Radeon R\d{1} [a-zA-Z]{1,10}|AMD Radeon R[A-Z]{1} \d{3})').astype('str')
df['AMD Radeon R Model Number'] = df['AMD Radeon R Model Number'].str.replace('AMD Radeon R\d{1} |AMD Radeon R\d{1} |AMD Radeon R\d{1} |AMD Radeon R[A-Z]{1} ','')
df['AMD Radeon R Model Number'].replace('nan', np.nan,inplace=True)

# dont join with the rest


df['Intel HD Graphics Series'] = df['GPU'].str.extract(r'(Intel HD Graphics)').astype('str')
df['Intel HD Graphics Series'] = df['Intel HD Graphics Series'].str.replace('Intel ','')
df['Intel HD Graphics Series'].replace('nan', np.nan,inplace=True)

df['Intel HD Graphics Model'] = df['GPU'].str.extract(r'(Intel HD Graphics \d{3}|Intel HD Graphics Default)').astype('str')
df['Intel HD Graphics Model'] = df['Intel HD Graphics Model'].str.replace('Intel HD Graphics ','')
df['Intel HD Graphics Model'].replace('nan', np.nan,inplace=True)


df['Intel Iris Graphics Series'] = df['GPU'].str.extract(r'(Intel Iris Graphics)').astype('str')
df['Intel Iris Graphics Series'] = df['Intel Iris Graphics Series'].str.replace('Intel ','')
df['Intel Iris Graphics Series'].replace('nan', np.nan,inplace=True)

df['Intel Iris Graphics Model'] = df['GPU'].str.extract(r'(Intel Iris Graphics \d{3})').astype('str')
df['Intel Iris Graphics Model'] = df['Intel Iris Graphics Model'].str.replace('Intel Iris Graphics ','')
df['Intel Iris Graphics Model'].replace('nan', np.nan,inplace=True)


df['Intel Iris Plus Graphics Series'] = df['GPU'].str.extract(r'(Intel Iris Plus Graphics)').astype('str')
df['Intel Iris Plus Graphics Series'] = df['Intel Iris Plus Graphics Series'].str.replace('Intel ','')
df['Intel Iris Plus Graphics Series'].replace('nan', np.nan,inplace=True)

df['Intel Iris Plus Graphics Model'] = df['GPU'].str.extract(r'(Intel Iris Plus Graphics \d{3})').astype('str')
df['Intel Iris Plus Graphics Model'] = df['Intel Iris Plus Graphics Model'].str.replace('Intel Iris Plus Graphics ','')
df['Intel Iris Plus Graphics Model'].replace('nan', np.nan,inplace=True)


df['Intel Iris Pro Graphics Series'] = df['GPU'].str.extract(r'(Intel Iris Pro Graphics)').astype('str')
df['Intel Iris Pro Graphics Series'] = df['Intel Iris Pro Graphics Series'].str.replace('Intel ','')
df['Intel Iris Pro Graphics Series'].replace('nan', np.nan,inplace=True)

df['Intel Iris Pro Graphics Model'] = df['GPU'].str.extract(r'(Intel Iris Pro Graphics \d{4})').astype('str')
df['Intel Iris Pro Graphics Model'] = df['Intel Iris Pro Graphics Model'].str.replace('Intel Iris Pro Graphics ','')
df['Intel Iris Pro Graphics Model'].replace('nan', np.nan,inplace=True)


df['Intel UHD Graphics Series'] = df['GPU'].str.extract(r'(Intel UHD Graphics)').astype('str')
df['Intel UHD Graphics Series'] = df['Intel UHD Graphics Series'].str.replace('Intel ','')
df['Intel UHD Graphics Series'].replace('nan', np.nan,inplace=True)

df['Intel UHD Graphics Model'] = df['GPU'].str.extract(r'(Intel UHD Graphics \d{3})').astype('str')
df['Intel UHD Graphics Model'] = df['Intel UHD Graphics Model'].str.replace('Intel UHD Graphics ','')
df['Intel UHD Graphics Model'].replace('nan', np.nan,inplace=True)


df['Nvidia GTX Series'] = df['GPU'].str.extract(r'(Nvidia GeForce GTX)').astype('str')
df['Nvidia GTX Series'] = df['Nvidia GTX Series'].str.replace('Nvidia ','')
df['Nvidia GTX Series'].replace('nan', np.nan,inplace=True)

df['Nvidia GTX Model'] = df['GPU'].str.extract(r'(Nvidia GeForce GTX \d{3}MX|Nvidia GeForce GTX \d{3,4}M*|Nvidia GeForce GTX \d{4})').astype('str')
df['Nvidia GTX Model'] = df['Nvidia GTX Model'].str.replace('Nvidia GeForce GTX ','')
df['Nvidia GTX Model'].replace('nan', np.nan,inplace=True)


df['Nvidia GT Series'] = df['GPU'].str.extract(r'(Nvidia GeForce GT)').astype('str')
df['Nvidia GT Series'] = df['Nvidia GT Series'].str.replace('Nvidia ','')
df['Nvidia GT Series'].replace('nan', np.nan,inplace=True)

df['Nvidia GT Model'] = df['GPU'].str.extract(r'(Nvidia GeForce GT \d{3}MX)').astype('str')
df['Nvidia GT Model'] = df['Nvidia GT Model'].str.replace('Nvidia GeForce GT ','')
df['Nvidia GT Model'].replace('nan', np.nan,inplace=True)


df['Nvidia Series'] = df['GPU'].str.extract(r'(Nvidia)').astype('str')
df['Nvidia Series'] = df['Nvidia Series'].str.replace('Nvidia ','')
df['Nvidia Series'].replace('nan', np.nan,inplace=True)

df['Nvidia Model'] = df['GPU'].str.extract(r'(Nvidia GeForce \d{3}MX|Nvidia GeForce \d{3}M|Nvidia GeForce \d{3}|Nvidia GeForce MX\d{3})').astype('str')
df['Nvidia Model'] = df['Nvidia Model'].str.replace('Nvidia GeForce ','')
df['Nvidia Model'].replace('nan', np.nan,inplace=True)


df['Nvidia Quadro Series'] = df['GPU'].str.extract(r'(Nvidia Quadro)').astype('str')
df['Nvidia Quadro Series'] = df['Nvidia Quadro Series'].str.replace('Nvidia ','')
df['Nvidia Quadro Series'].replace('nan', np.nan,inplace=True)

df['Nvidia Quadro Model'] = df['GPU'].str.extract(r'(Nvidia Quadro M*\d{2,4}M*)').astype('str')
df['Nvidia Quadro Model'] = df['Nvidia Quadro Model'].str.replace('Nvidia Quadro ','')
df['Nvidia Quadro Model'].replace('nan', np.nan,inplace=True)

df['GPU Series'] = df['AMD FirePro Series'].fillna(df['AMD Radeon Pro Series'])
df['GPU Series'] = df['GPU Series'].fillna(df['AMD Radeon R Series'])
df['GPU Series'] = df['GPU Series'].fillna(df['AMD Radeon Series'])
df['GPU Series'] = df['GPU Series'].fillna(df['Intel HD Graphics Series'])
df['GPU Series'] = df['GPU Series'].fillna(df['Intel Iris Graphics Series'])
df['GPU Series'] = df['GPU Series'].fillna(df['Intel Iris Plus Graphics Series'])
df['GPU Series'] = df['GPU Series'].fillna(df['Intel Iris Pro Graphics Series'])
df['GPU Series'] = df['GPU Series'].fillna(df['Intel UHD Graphics Series'])
df['GPU Series'] = df['GPU Series'].fillna(df['Nvidia GTX Series'])
df['GPU Series'] = df['GPU Series'].fillna(df['Nvidia GT Series'])
df['GPU Series'] = df['GPU Series'].fillna(df['Nvidia Series'])
df['GPU Series'] = df['GPU Series'].fillna(df['Nvidia Quadro Series'])

df['GPU Model'] = df['AMD FirePro Model'].fillna(df['AMD Radeon Pro Model'])
df['GPU Model'] = df['GPU Model'].fillna(df['AMD Radeon R Model'])
df['GPU Model'] = df['GPU Model'].fillna(df['AMD Radeon Model'])
df['GPU Model'] = df['GPU Model'].fillna(df['Intel HD Graphics Model'])
df['GPU Model'] = df['GPU Model'].fillna(df['Intel Iris Graphics Model'])
df['GPU Model'] = df['GPU Model'].fillna(df['Intel Iris Plus Graphics Model'])
df['GPU Model'] = df['GPU Model'].fillna(df['Intel Iris Pro Graphics Model'])
df['GPU Model'] = df['GPU Model'].fillna(df['Intel UHD Graphics Model'])
df['GPU Model'] = df['GPU Model'].fillna(df['Nvidia GTX Model'])
df['GPU Model'] = df['GPU Model'].fillna(df['Nvidia GT Model'])
df['GPU Model'] = df['GPU Model'].fillna(df['Nvidia Model'])
df['GPU Model'] = df['GPU Model'].fillna(df['Nvidia Quadro Model'])


df = df.drop(['AMD FirePro Series','AMD Radeon Series','AMD Radeon R Series','AMD Radeon Pro Series','Intel HD Graphics Series','Intel Iris Graphics Series','Intel Iris Plus Graphics Series','Intel Iris Pro Graphics Series','Intel UHD Graphics Series','Nvidia Series','Nvidia GT Series','Nvidia GTX Series','Nvidia Quadro Series','AMD FirePro Model','AMD Radeon Model','AMD Radeon Pro Model','AMD Radeon R Model','AMD Radeon R Model','Intel Iris Graphics Model','Intel Iris Plus Graphics Model','Intel Iris Pro Graphics Model','Intel Iris Pro Graphics Model','Nvidia Model','Nvidia GT Model','Nvidia GTX Model','Nvidia Quadro Model','Intel HD Graphics Model','Intel UHD Graphics Model'],axis=1)

df = df[['Brand', 'Type', 'Screen Size', 'Touchscreen Check', 'IPS Check', 'Screen Width', 'Screen Height', 'Aspect Ratio', 'Screen DPI', 'Resolution Type', 'CPU Base Clock', 'Core Count', 'CPU Brand', 'CPU Series', 'Intel Generation', 'Intel Core Series Performance Type', 'RAM', 'Base Storage Space Value', 'Base Storage Size Type','Base Storage Type', 'Additional Storage Space Value', 'Additional Storage Size Type', 'Additional Storage Type', 'GPU Brand', 'GPU Series', 'GPU Model', 'AMD Radeon R Model Number','Operating System', 'Weight','Weight Category', 'Price ($)', ]]



In [14]:
df = pd.get_dummies(df, dummy_na=False,drop_first=True)
display(df)
print(df.dtypes)

Unnamed: 0,Screen Size,Screen Width,Screen Height,Aspect Ratio,Screen DPI,CPU Base Clock,RAM,Base Storage Space Value,Additional Storage Space Value,Weight,Price ($),Brand_Apple,Brand_Asus,Brand_Chuwi,Brand_Dell,Brand_Fujitsu,Brand_Google,Brand_HP,Brand_Huawei,Brand_LG,Brand_Lenovo,Brand_MSI,Brand_Mediacom,Brand_Microsoft,Brand_Razer,Brand_Samsung,Brand_Toshiba,Brand_Vero,Brand_Xiaomi,Type_Gaming,Type_Netbook,Type_Notebook,Type_Ultrabook,Type_Workstation,Touchscreen Check_Touchscreen Available,IPS Check_TN Screen,Resolution Type_HD,Resolution Type_Quad HD,Resolution Type_Ultra HD,Core Count_4 Cores,Core Count_6 Cores,Core Count_8 Cores,CPU Brand_Intel,CPU Series_AMD A12,CPU Series_AMD A4,CPU Series_AMD A6,CPU Series_AMD A8,CPU Series_AMD A9,CPU Series_AMD E6,CPU Series_AMD E7,CPU Series_AMD E9,CPU Series_AMD FX 8,CPU Series_AMD FX 9,CPU Series_Atom Z8300,CPU Series_Atom Z8350,CPU Series_Atom Z8550,CPU Series_Celeron N,CPU Series_Celeron U,CPU Series_Core M,CPU Series_Core M M3,CPU Series_Core M M3-6,CPU Series_Core M M3-7,CPU Series_Core M M7-6,CPU Series_Pentium N,CPU Series_Pentium U,CPU Series_Pentium Y,CPU Series_Ryzen 5,CPU Series_Ryzen 7,CPU Series_Xeon 5th gen,CPU Series_Xeon 6th gen,CPU Series_i3,CPU Series_i5,CPU Series_i7,Intel Generation_4th gen,Intel Generation_6th gen,Intel Generation_7th gen,Intel Generation_8th gen,Intel Core Series Performance Type_HQ,Intel Core Series Performance Type_U,Intel Core Series Performance Type_Y,Base Storage Size Type_TB,Base Storage Type_HDD,Base Storage Type_Hybrid,Base Storage Type_SSD,Additional Storage Size Type_TB,Additional Storage Type_Hybrid,Additional Storage Type_SSD,GPU Brand_Intel,GPU Brand_Nvidia,GPU Series_GeForce GT,GPU Series_GeForce GTX,GPU Series_HD Graphics,GPU Series_Iris Graphics,GPU Series_Iris Plus Graphics,GPU Series_Iris Pro Graphics,GPU Series_Nvidia,GPU Series_Radeon,GPU Series_Radeon Pro,GPU Series_Radeon R,GPU Series_UHD Graphics,GPU Model_1050M,GPU Model_1060,GPU Model_1070,GPU Model_1070M,GPU Model_1080,GPU Model_150MX,GPU Model_3000M,GPU Model_400,GPU Model_405,GPU Model_455,GPU Model_500,GPU Model_505,GPU Model_510,GPU Model_515,GPU Model_520,GPU Model_530,GPU Model_540,GPU Model_550,GPU Model_555,GPU Model_560,GPU Model_600,GPU Model_615,GPU Model_620,GPU Model_6200,GPU Model_630,GPU Model_640,GPU Model_650,GPU Model_920,GPU Model_920M,GPU Model_920MX,GPU Model_930M,GPU Model_930MX,GPU Model_940M,GPU Model_940MX,GPU Model_950M,GPU Model_960,GPU Model_960M,GPU Model_965M,GPU Model_970M,GPU Model_980,GPU Model_980M,GPU Model_Default,GPU Model_M1000M,GPU Model_M1200,GPU Model_M2000M,GPU Model_M2200,GPU Model_M2200M,GPU Model_M3000M,GPU Model_M500M,GPU Model_M520M,GPU Model_M620,GPU Model_M620M,GPU Model_MX130,GPU Model_MX150,GPU Model_R2,GPU Model_R3,GPU Model_R4,GPU Model_R5,GPU Model_R7,GPU Model_R9,GPU Model_RX,GPU Model_W4190M,GPU Model_W5130M,GPU Model_W6150M,AMD Radeon R Model Number_520,AMD Radeon R Model Number_540,AMD Radeon R Model Number_550,AMD Radeon R Model Number_560,AMD Radeon R Model Number_580,AMD Radeon R Model Number_Beema,AMD Radeon R Model Number_Default,AMD Radeon R Model Number_M315,AMD Radeon R Model Number_M330,AMD Radeon R Model Number_M360,AMD Radeon R Model Number_M365X,AMD Radeon R Model Number_M385,AMD Radeon R Model Number_M420,AMD Radeon R Model Number_M420X,AMD Radeon R Model Number_M430,AMD Radeon R Model Number_M440,AMD Radeon R Model Number_M445,AMD Radeon R Model Number_M460,AMD Radeon R Model Number_M465,Operating System_Chrome OS,Operating System_Linux,Operating System_Mac OS X,Operating System_No OS,Operating System_Windows 10,Operating System_Windows 10 S,Operating System_Windows 7,Operating System_macOS,Weight Category_Heavy Duty / Gaming,Weight Category_Ultralight,Weight Category_Workstation
0,13.3,2560,1600,1.600000,226.983005,2.3,8,128,,1.37,3568.93416,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
1,13.3,1440,900,1.600000,127.677940,1.8,8,128,,1.34,2394.77616,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
2,15.6,1920,1080,1.777778,141.211998,2.5,8,256,,1.86,1531.80000,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
3,15.4,2880,1800,1.600000,220.534624,2.7,16,512,,1.83,6759.76680,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
4,13.3,2560,1600,1.600000,226.983005,3.1,8,256,,1.37,4804.79040,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15315,14.0,1920,1080,1.777778,157.350512,2.5,4,128,,1.80,1706.40200,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
15316,13.3,3200,1800,1.777778,276.053530,2.5,16,512,,1.30,4000.10600,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
15317,14.0,1366,768,1.778646,111.935204,1.6,2,64,,1.50,616.82600,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
15318,15.6,1366,768,1.778646,100.454670,2.5,6,1,,2.19,2042.06600,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0


Screen Size                            float64
Screen Width                             int32
Screen Height                            int32
Aspect Ratio                           float64
Screen DPI                             float64
                                        ...   
Operating System_Windows 7               uint8
Operating System_macOS                   uint8
Weight Category_Heavy Duty / Gaming      uint8
Weight Category_Ultralight               uint8
Weight Category_Workstation              uint8
Length: 194, dtype: object


In [15]:
# split data
df = df.fillna(0)

from sklearn.model_selection import train_test_split

train, test = train_test_split(df, test_size=0.2)

train = train.reset_index()
train = train.drop(['index'],axis=1)

test = test.reset_index()
test = test.drop(['index'],axis=1)


display(train)
display(test)

Unnamed: 0,Screen Size,Screen Width,Screen Height,Aspect Ratio,Screen DPI,CPU Base Clock,RAM,Base Storage Space Value,Additional Storage Space Value,Weight,Price ($),Brand_Apple,Brand_Asus,Brand_Chuwi,Brand_Dell,Brand_Fujitsu,Brand_Google,Brand_HP,Brand_Huawei,Brand_LG,Brand_Lenovo,Brand_MSI,Brand_Mediacom,Brand_Microsoft,Brand_Razer,Brand_Samsung,Brand_Toshiba,Brand_Vero,Brand_Xiaomi,Type_Gaming,Type_Netbook,Type_Notebook,Type_Ultrabook,Type_Workstation,Touchscreen Check_Touchscreen Available,IPS Check_TN Screen,Resolution Type_HD,Resolution Type_Quad HD,Resolution Type_Ultra HD,Core Count_4 Cores,Core Count_6 Cores,Core Count_8 Cores,CPU Brand_Intel,CPU Series_AMD A12,CPU Series_AMD A4,CPU Series_AMD A6,CPU Series_AMD A8,CPU Series_AMD A9,CPU Series_AMD E6,CPU Series_AMD E7,CPU Series_AMD E9,CPU Series_AMD FX 8,CPU Series_AMD FX 9,CPU Series_Atom Z8300,CPU Series_Atom Z8350,CPU Series_Atom Z8550,CPU Series_Celeron N,CPU Series_Celeron U,CPU Series_Core M,CPU Series_Core M M3,CPU Series_Core M M3-6,CPU Series_Core M M3-7,CPU Series_Core M M7-6,CPU Series_Pentium N,CPU Series_Pentium U,CPU Series_Pentium Y,CPU Series_Ryzen 5,CPU Series_Ryzen 7,CPU Series_Xeon 5th gen,CPU Series_Xeon 6th gen,CPU Series_i3,CPU Series_i5,CPU Series_i7,Intel Generation_4th gen,Intel Generation_6th gen,Intel Generation_7th gen,Intel Generation_8th gen,Intel Core Series Performance Type_HQ,Intel Core Series Performance Type_U,Intel Core Series Performance Type_Y,Base Storage Size Type_TB,Base Storage Type_HDD,Base Storage Type_Hybrid,Base Storage Type_SSD,Additional Storage Size Type_TB,Additional Storage Type_Hybrid,Additional Storage Type_SSD,GPU Brand_Intel,GPU Brand_Nvidia,GPU Series_GeForce GT,GPU Series_GeForce GTX,GPU Series_HD Graphics,GPU Series_Iris Graphics,GPU Series_Iris Plus Graphics,GPU Series_Iris Pro Graphics,GPU Series_Nvidia,GPU Series_Radeon,GPU Series_Radeon Pro,GPU Series_Radeon R,GPU Series_UHD Graphics,GPU Model_1050M,GPU Model_1060,GPU Model_1070,GPU Model_1070M,GPU Model_1080,GPU Model_150MX,GPU Model_3000M,GPU Model_400,GPU Model_405,GPU Model_455,GPU Model_500,GPU Model_505,GPU Model_510,GPU Model_515,GPU Model_520,GPU Model_530,GPU Model_540,GPU Model_550,GPU Model_555,GPU Model_560,GPU Model_600,GPU Model_615,GPU Model_620,GPU Model_6200,GPU Model_630,GPU Model_640,GPU Model_650,GPU Model_920,GPU Model_920M,GPU Model_920MX,GPU Model_930M,GPU Model_930MX,GPU Model_940M,GPU Model_940MX,GPU Model_950M,GPU Model_960,GPU Model_960M,GPU Model_965M,GPU Model_970M,GPU Model_980,GPU Model_980M,GPU Model_Default,GPU Model_M1000M,GPU Model_M1200,GPU Model_M2000M,GPU Model_M2200,GPU Model_M2200M,GPU Model_M3000M,GPU Model_M500M,GPU Model_M520M,GPU Model_M620,GPU Model_M620M,GPU Model_MX130,GPU Model_MX150,GPU Model_R2,GPU Model_R3,GPU Model_R4,GPU Model_R5,GPU Model_R7,GPU Model_R9,GPU Model_RX,GPU Model_W4190M,GPU Model_W5130M,GPU Model_W6150M,AMD Radeon R Model Number_520,AMD Radeon R Model Number_540,AMD Radeon R Model Number_550,AMD Radeon R Model Number_560,AMD Radeon R Model Number_580,AMD Radeon R Model Number_Beema,AMD Radeon R Model Number_Default,AMD Radeon R Model Number_M315,AMD Radeon R Model Number_M330,AMD Radeon R Model Number_M360,AMD Radeon R Model Number_M365X,AMD Radeon R Model Number_M385,AMD Radeon R Model Number_M420,AMD Radeon R Model Number_M420X,AMD Radeon R Model Number_M430,AMD Radeon R Model Number_M440,AMD Radeon R Model Number_M445,AMD Radeon R Model Number_M460,AMD Radeon R Model Number_M465,Operating System_Chrome OS,Operating System_Linux,Operating System_Mac OS X,Operating System_No OS,Operating System_Windows 10,Operating System_Windows 10 S,Operating System_Windows 7,Operating System_macOS,Weight Category_Heavy Duty / Gaming,Weight Category_Ultralight,Weight Category_Workstation
0,15.6,1920,1080,1.777778,141.211998,2.5,8,256,0.0,2.30,2212.32000,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0
1,15.6,1366,768,1.778646,100.454670,2.7,8,1,0.0,2.30,2152.72736,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0
2,17.3,1920,1080,1.777778,127.335675,2.8,16,256,1.0,2.70,3978.41920,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0
3,17.3,1920,1080,1.777778,127.335675,2.8,8,128,1.0,2.70,3599.12072,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0
4,13.3,3200,1800,1.777778,276.053530,2.7,16,256,0.0,1.42,3736.93600,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12209,15.6,1920,1080,1.777778,141.211998,2.5,8,1,0.0,1.99,2426.83568,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
12210,15.6,1366,768,1.778646,100.454670,2.3,8,1,0.0,2.29,1229.14600,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0
12211,15.6,1920,1080,1.777778,141.211998,2.7,8,256,0.0,2.04,2714.98936,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0
12212,17.3,1920,1080,1.777778,127.335675,2.8,8,128,1.0,3.00,3113.15200,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0


Unnamed: 0,Screen Size,Screen Width,Screen Height,Aspect Ratio,Screen DPI,CPU Base Clock,RAM,Base Storage Space Value,Additional Storage Space Value,Weight,Price ($),Brand_Apple,Brand_Asus,Brand_Chuwi,Brand_Dell,Brand_Fujitsu,Brand_Google,Brand_HP,Brand_Huawei,Brand_LG,Brand_Lenovo,Brand_MSI,Brand_Mediacom,Brand_Microsoft,Brand_Razer,Brand_Samsung,Brand_Toshiba,Brand_Vero,Brand_Xiaomi,Type_Gaming,Type_Netbook,Type_Notebook,Type_Ultrabook,Type_Workstation,Touchscreen Check_Touchscreen Available,IPS Check_TN Screen,Resolution Type_HD,Resolution Type_Quad HD,Resolution Type_Ultra HD,Core Count_4 Cores,Core Count_6 Cores,Core Count_8 Cores,CPU Brand_Intel,CPU Series_AMD A12,CPU Series_AMD A4,CPU Series_AMD A6,CPU Series_AMD A8,CPU Series_AMD A9,CPU Series_AMD E6,CPU Series_AMD E7,CPU Series_AMD E9,CPU Series_AMD FX 8,CPU Series_AMD FX 9,CPU Series_Atom Z8300,CPU Series_Atom Z8350,CPU Series_Atom Z8550,CPU Series_Celeron N,CPU Series_Celeron U,CPU Series_Core M,CPU Series_Core M M3,CPU Series_Core M M3-6,CPU Series_Core M M3-7,CPU Series_Core M M7-6,CPU Series_Pentium N,CPU Series_Pentium U,CPU Series_Pentium Y,CPU Series_Ryzen 5,CPU Series_Ryzen 7,CPU Series_Xeon 5th gen,CPU Series_Xeon 6th gen,CPU Series_i3,CPU Series_i5,CPU Series_i7,Intel Generation_4th gen,Intel Generation_6th gen,Intel Generation_7th gen,Intel Generation_8th gen,Intel Core Series Performance Type_HQ,Intel Core Series Performance Type_U,Intel Core Series Performance Type_Y,Base Storage Size Type_TB,Base Storage Type_HDD,Base Storage Type_Hybrid,Base Storage Type_SSD,Additional Storage Size Type_TB,Additional Storage Type_Hybrid,Additional Storage Type_SSD,GPU Brand_Intel,GPU Brand_Nvidia,GPU Series_GeForce GT,GPU Series_GeForce GTX,GPU Series_HD Graphics,GPU Series_Iris Graphics,GPU Series_Iris Plus Graphics,GPU Series_Iris Pro Graphics,GPU Series_Nvidia,GPU Series_Radeon,GPU Series_Radeon Pro,GPU Series_Radeon R,GPU Series_UHD Graphics,GPU Model_1050M,GPU Model_1060,GPU Model_1070,GPU Model_1070M,GPU Model_1080,GPU Model_150MX,GPU Model_3000M,GPU Model_400,GPU Model_405,GPU Model_455,GPU Model_500,GPU Model_505,GPU Model_510,GPU Model_515,GPU Model_520,GPU Model_530,GPU Model_540,GPU Model_550,GPU Model_555,GPU Model_560,GPU Model_600,GPU Model_615,GPU Model_620,GPU Model_6200,GPU Model_630,GPU Model_640,GPU Model_650,GPU Model_920,GPU Model_920M,GPU Model_920MX,GPU Model_930M,GPU Model_930MX,GPU Model_940M,GPU Model_940MX,GPU Model_950M,GPU Model_960,GPU Model_960M,GPU Model_965M,GPU Model_970M,GPU Model_980,GPU Model_980M,GPU Model_Default,GPU Model_M1000M,GPU Model_M1200,GPU Model_M2000M,GPU Model_M2200,GPU Model_M2200M,GPU Model_M3000M,GPU Model_M500M,GPU Model_M520M,GPU Model_M620,GPU Model_M620M,GPU Model_MX130,GPU Model_MX150,GPU Model_R2,GPU Model_R3,GPU Model_R4,GPU Model_R5,GPU Model_R7,GPU Model_R9,GPU Model_RX,GPU Model_W4190M,GPU Model_W5130M,GPU Model_W6150M,AMD Radeon R Model Number_520,AMD Radeon R Model Number_540,AMD Radeon R Model Number_550,AMD Radeon R Model Number_560,AMD Radeon R Model Number_580,AMD Radeon R Model Number_Beema,AMD Radeon R Model Number_Default,AMD Radeon R Model Number_M315,AMD Radeon R Model Number_M330,AMD Radeon R Model Number_M360,AMD Radeon R Model Number_M365X,AMD Radeon R Model Number_M385,AMD Radeon R Model Number_M420,AMD Radeon R Model Number_M420X,AMD Radeon R Model Number_M430,AMD Radeon R Model Number_M440,AMD Radeon R Model Number_M445,AMD Radeon R Model Number_M460,AMD Radeon R Model Number_M465,Operating System_Chrome OS,Operating System_Linux,Operating System_Mac OS X,Operating System_No OS,Operating System_Windows 10,Operating System_Windows 10 S,Operating System_Windows 7,Operating System_macOS,Weight Category_Heavy Duty / Gaming,Weight Category_Ultralight,Weight Category_Workstation
0,15.6,1920,1080,1.777778,141.211998,2.60,8,500,0.0,2.23,3648.61600,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0
1,13.3,1920,1080,1.777778,165.632118,2.50,8,256,0.0,1.60,2103.52264,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
2,15.6,1920,1080,1.777778,141.211998,2.50,8,500,0.0,2.00,2291.44000,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
3,15.6,1920,1080,1.777778,141.211998,1.60,8,256,0.0,2.20,2129.47024,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0
4,17.3,1920,1080,1.777778,127.335675,2.70,32,512,1.0,4.60,8636.53000,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,1,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3049,15.6,1920,1080,1.777778,141.211998,2.80,16,256,1.0,2.50,4802.53600,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0
3050,15.6,1920,1080,1.777778,141.211998,1.44,4,64,0.0,1.89,664.25336,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
3051,14.0,2560,1440,1.777778,209.800683,2.50,8,256,0.0,1.54,5855.97816,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
3052,11.6,1366,768,1.778646,135.094211,1.10,4,64,0.0,1.20,906.96000,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0


In [16]:
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
from imblearn.pipeline import Pipeline
from sklearn.linear_model import Lasso

# data imputation
display(train.isnull().sum())
display(train.dtypes)
print(train.columns.to_list())

num_columns = ['Screen Size', 'Screen Width', 'Screen Height', 'Aspect Ratio', 'Screen DPI', 'CPU Base Clock', 'RAM', 'Base Storage Space Value', 'Additional Storage Space Value', 'Weight', 'Price ($)']
cat_columns = ['Brand_Apple', 'Brand_Asus', 'Brand_Chuwi', 'Brand_Dell', 'Brand_Fujitsu', 'Brand_Google', 'Brand_HP', 'Brand_Huawei', 'Brand_LG', 'Brand_Lenovo', 'Brand_MSI', 'Brand_Mediacom', 'Brand_Microsoft', 'Brand_Razer', 'Brand_Samsung', 'Brand_Toshiba', 'Brand_Vero', 'Brand_Xiaomi', 'Type_Gaming', 'Type_Netbook', 'Type_Notebook', 'Type_Ultrabook', 'Type_Workstation', 'Touchscreen Check_Touchscreen Available', 'IPS Check_TN Screen', 'Resolution Type_HD', 'Resolution Type_Quad HD', 'Resolution Type_Ultra HD', 'Core Count_4 Cores', 'Core Count_6 Cores', 'Core Count_8 Cores', 'CPU Brand_Intel', 'CPU Series_AMD A12', 'CPU Series_AMD A4', 'CPU Series_AMD A6', 'CPU Series_AMD A8', 'CPU Series_AMD A9', 'CPU Series_AMD E6', 'CPU Series_AMD E7', 'CPU Series_AMD E9', 'CPU Series_AMD FX 8', 'CPU Series_AMD FX 9', 'CPU Series_Atom Z8300', 'CPU Series_Atom Z8350', 'CPU Series_Atom Z8550', 'CPU Series_Celeron N', 'CPU Series_Celeron U', 'CPU Series_Core M', 'CPU Series_Core M M3', 'CPU Series_Core M M3-6', 'CPU Series_Core M M3-7', 'CPU Series_Core M M7-6', 'CPU Series_Pentium N', 'CPU Series_Pentium U', 'CPU Series_Pentium Y', 'CPU Series_Ryzen 5', 'CPU Series_Ryzen 7', 'CPU Series_Xeon 5th gen', 'CPU Series_Xeon 6th gen', 'CPU Series_i3', 'CPU Series_i5', 'CPU Series_i7', 'Intel Generation_4th gen', 'Intel Generation_6th gen', 'Intel Generation_7th gen', 'Intel Generation_8th gen', 'Intel Core Series Performance Type_HQ', 'Intel Core Series Performance Type_U', 'Intel Core Series Performance Type_Y', 'Base Storage Size Type_TB', 'Base Storage Type_HDD', 'Base Storage Type_Hybrid', 'Base Storage Type_SSD', 'Additional Storage Size Type_TB', 'Additional Storage Type_Hybrid', 'Additional Storage Type_SSD', 'GPU Brand_Intel', 'GPU Brand_Nvidia', 'GPU Series_GeForce GT', 'GPU Series_GeForce GTX', 'GPU Series_HD Graphics', 'GPU Series_Iris Graphics', 'GPU Series_Iris Plus Graphics', 'GPU Series_Iris Pro Graphics', 'GPU Series_Nvidia', 'GPU Series_Radeon', 'GPU Series_Radeon Pro', 'GPU Series_Radeon R', 'GPU Series_UHD Graphics', 'GPU Model_1050M', 'GPU Model_1060', 'GPU Model_1070', 'GPU Model_1070M', 'GPU Model_1080', 'GPU Model_150MX', 'GPU Model_3000M', 'GPU Model_400', 'GPU Model_405', 'GPU Model_455', 'GPU Model_500', 'GPU Model_505', 'GPU Model_510', 'GPU Model_515', 'GPU Model_520', 'GPU Model_530', 'GPU Model_540', 'GPU Model_550', 'GPU Model_555', 'GPU Model_560', 'GPU Model_600', 'GPU Model_615', 'GPU Model_620', 'GPU Model_6200', 'GPU Model_630', 'GPU Model_640', 'GPU Model_650', 'GPU Model_920', 'GPU Model_920M', 'GPU Model_920MX', 'GPU Model_930M', 'GPU Model_930MX', 'GPU Model_940M', 'GPU Model_940MX', 'GPU Model_950M', 'GPU Model_960', 'GPU Model_960M', 'GPU Model_965M', 'GPU Model_970M', 'GPU Model_980', 'GPU Model_980M', 'GPU Model_Default', 'GPU Model_M1000M', 'GPU Model_M1200', 'GPU Model_M2000M', 'GPU Model_M2200', 'GPU Model_M2200M', 'GPU Model_M3000M', 'GPU Model_M500M', 'GPU Model_M520M', 'GPU Model_M620', 'GPU Model_M620M', 'GPU Model_MX130', 'GPU Model_MX150', 'GPU Model_R2', 'GPU Model_R3', 'GPU Model_R4', 'GPU Model_R5', 'GPU Model_R7', 'GPU Model_R9', 'GPU Model_RX', 'GPU Model_W4190M', 'GPU Model_W5130M', 'GPU Model_W6150M', 'AMD Radeon R Model Number_520', 'AMD Radeon R Model Number_540', 'AMD Radeon R Model Number_550', 'AMD Radeon R Model Number_560', 'AMD Radeon R Model Number_580', 'AMD Radeon R Model Number_Beema', 'AMD Radeon R Model Number_Default', 'AMD Radeon R Model Number_M315', 'AMD Radeon R Model Number_M330', 'AMD Radeon R Model Number_M360', 'AMD Radeon R Model Number_M365X', 'AMD Radeon R Model Number_M385', 'AMD Radeon R Model Number_M420', 'AMD Radeon R Model Number_M420X', 'AMD Radeon R Model Number_M430', 'AMD Radeon R Model Number_M440', 'AMD Radeon R Model Number_M445', 'AMD Radeon R Model Number_M460', 'AMD Radeon R Model Number_M465', 'Operating System_Chrome OS', 'Operating System_Linux', 'Operating System_Mac OS X', 'Operating System_No OS', 'Operating System_Windows 10', 'Operating System_Windows 10 S', 'Operating System_Windows 7', 'Operating System_macOS', 'Weight Category_Heavy Duty / Gaming', 'Weight Category_Ultralight', 'Weight Category_Workstation']

scaler = RobustScaler()

# data scaling (train)
numerical_df_train = train[['Screen Size', 'Screen Width', 'Screen Height', 'Aspect Ratio', 'Screen DPI', 'CPU Base Clock', 'RAM', 'Base Storage Space Value', 'Additional Storage Space Value', 'Weight', 'Price ($)']]
numerical_df_train = pd.DataFrame(scaler.fit_transform(numerical_df_train),columns=num_columns)
categorical_df_train = train[cat_columns]
cleaned_df_train = pd.merge(numerical_df_train, categorical_df_train, left_index=True, right_index=True)
cleaned_df_train = cleaned_df_train.reset_index()
cleaned_df_train = cleaned_df_train.drop(['index'],axis=1)
display(cleaned_df_train)


# data scaling (test)
numerical_df_test = test[['Screen Size', 'Screen Width', 'Screen Height', 'Aspect Ratio', 'Screen DPI', 'CPU Base Clock', 'RAM', 'Base Storage Space Value', 'Additional Storage Space Value', 'Weight', 'Price ($)']]
numerical_df_test = pd.DataFrame(scaler.fit_transform(numerical_df_test),columns=num_columns)
categorical_df_test = test[cat_columns]
cleaned_df_test = pd.merge(numerical_df_test, categorical_df_test, left_index=True, right_index=True)
cleaned_df_test = cleaned_df_test.reset_index()
cleaned_df_test = cleaned_df_test.drop(['index'],axis=1)
display(cleaned_df_test)

Screen Size                            0
Screen Width                           0
Screen Height                          0
Aspect Ratio                           0
Screen DPI                             0
                                      ..
Operating System_Windows 7             0
Operating System_macOS                 0
Weight Category_Heavy Duty / Gaming    0
Weight Category_Ultralight             0
Weight Category_Workstation            0
Length: 194, dtype: int64

Screen Size                            float64
Screen Width                             int32
Screen Height                            int32
Aspect Ratio                           float64
Screen DPI                             float64
                                        ...   
Operating System_Windows 7               uint8
Operating System_macOS                   uint8
Weight Category_Heavy Duty / Gaming      uint8
Weight Category_Ultralight               uint8
Weight Category_Workstation              uint8
Length: 194, dtype: object

['Screen Size', 'Screen Width', 'Screen Height', 'Aspect Ratio', 'Screen DPI', 'CPU Base Clock', 'RAM', 'Base Storage Space Value', 'Additional Storage Space Value', 'Weight', 'Price ($)', 'Brand_Apple', 'Brand_Asus', 'Brand_Chuwi', 'Brand_Dell', 'Brand_Fujitsu', 'Brand_Google', 'Brand_HP', 'Brand_Huawei', 'Brand_LG', 'Brand_Lenovo', 'Brand_MSI', 'Brand_Mediacom', 'Brand_Microsoft', 'Brand_Razer', 'Brand_Samsung', 'Brand_Toshiba', 'Brand_Vero', 'Brand_Xiaomi', 'Type_Gaming', 'Type_Netbook', 'Type_Notebook', 'Type_Ultrabook', 'Type_Workstation', 'Touchscreen Check_Touchscreen Available', 'IPS Check_TN Screen', 'Resolution Type_HD', 'Resolution Type_Quad HD', 'Resolution Type_Ultra HD', 'Core Count_4 Cores', 'Core Count_6 Cores', 'Core Count_8 Cores', 'CPU Brand_Intel', 'CPU Series_AMD A12', 'CPU Series_AMD A4', 'CPU Series_AMD A6', 'CPU Series_AMD A8', 'CPU Series_AMD A9', 'CPU Series_AMD E6', 'CPU Series_AMD E7', 'CPU Series_AMD E9', 'CPU Series_AMD FX 8', 'CPU Series_AMD FX 9', 'CPU S

Unnamed: 0,Screen Size,Screen Width,Screen Height,Aspect Ratio,Screen DPI,CPU Base Clock,RAM,Base Storage Space Value,Additional Storage Space Value,Weight,Price ($),Brand_Apple,Brand_Asus,Brand_Chuwi,Brand_Dell,Brand_Fujitsu,Brand_Google,Brand_HP,Brand_Huawei,Brand_LG,Brand_Lenovo,Brand_MSI,Brand_Mediacom,Brand_Microsoft,Brand_Razer,Brand_Samsung,Brand_Toshiba,Brand_Vero,Brand_Xiaomi,Type_Gaming,Type_Netbook,Type_Notebook,Type_Ultrabook,Type_Workstation,Touchscreen Check_Touchscreen Available,IPS Check_TN Screen,Resolution Type_HD,Resolution Type_Quad HD,Resolution Type_Ultra HD,Core Count_4 Cores,Core Count_6 Cores,Core Count_8 Cores,CPU Brand_Intel,CPU Series_AMD A12,CPU Series_AMD A4,CPU Series_AMD A6,CPU Series_AMD A8,CPU Series_AMD A9,CPU Series_AMD E6,CPU Series_AMD E7,CPU Series_AMD E9,CPU Series_AMD FX 8,CPU Series_AMD FX 9,CPU Series_Atom Z8300,CPU Series_Atom Z8350,CPU Series_Atom Z8550,CPU Series_Celeron N,CPU Series_Celeron U,CPU Series_Core M,CPU Series_Core M M3,CPU Series_Core M M3-6,CPU Series_Core M M3-7,CPU Series_Core M M7-6,CPU Series_Pentium N,CPU Series_Pentium U,CPU Series_Pentium Y,CPU Series_Ryzen 5,CPU Series_Ryzen 7,CPU Series_Xeon 5th gen,CPU Series_Xeon 6th gen,CPU Series_i3,CPU Series_i5,CPU Series_i7,Intel Generation_4th gen,Intel Generation_6th gen,Intel Generation_7th gen,Intel Generation_8th gen,Intel Core Series Performance Type_HQ,Intel Core Series Performance Type_U,Intel Core Series Performance Type_Y,Base Storage Size Type_TB,Base Storage Type_HDD,Base Storage Type_Hybrid,Base Storage Type_SSD,Additional Storage Size Type_TB,Additional Storage Type_Hybrid,Additional Storage Type_SSD,GPU Brand_Intel,GPU Brand_Nvidia,GPU Series_GeForce GT,GPU Series_GeForce GTX,GPU Series_HD Graphics,GPU Series_Iris Graphics,GPU Series_Iris Plus Graphics,GPU Series_Iris Pro Graphics,GPU Series_Nvidia,GPU Series_Radeon,GPU Series_Radeon Pro,GPU Series_Radeon R,GPU Series_UHD Graphics,GPU Model_1050M,GPU Model_1060,GPU Model_1070,GPU Model_1070M,GPU Model_1080,GPU Model_150MX,GPU Model_3000M,GPU Model_400,GPU Model_405,GPU Model_455,GPU Model_500,GPU Model_505,GPU Model_510,GPU Model_515,GPU Model_520,GPU Model_530,GPU Model_540,GPU Model_550,GPU Model_555,GPU Model_560,GPU Model_600,GPU Model_615,GPU Model_620,GPU Model_6200,GPU Model_630,GPU Model_640,GPU Model_650,GPU Model_920,GPU Model_920M,GPU Model_920MX,GPU Model_930M,GPU Model_930MX,GPU Model_940M,GPU Model_940MX,GPU Model_950M,GPU Model_960,GPU Model_960M,GPU Model_965M,GPU Model_970M,GPU Model_980,GPU Model_980M,GPU Model_Default,GPU Model_M1000M,GPU Model_M1200,GPU Model_M2000M,GPU Model_M2200,GPU Model_M2200M,GPU Model_M3000M,GPU Model_M500M,GPU Model_M520M,GPU Model_M620,GPU Model_M620M,GPU Model_MX130,GPU Model_MX150,GPU Model_R2,GPU Model_R3,GPU Model_R4,GPU Model_R5,GPU Model_R7,GPU Model_R9,GPU Model_RX,GPU Model_W4190M,GPU Model_W5130M,GPU Model_W6150M,AMD Radeon R Model Number_520,AMD Radeon R Model Number_540,AMD Radeon R Model Number_550,AMD Radeon R Model Number_560,AMD Radeon R Model Number_580,AMD Radeon R Model Number_Beema,AMD Radeon R Model Number_Default,AMD Radeon R Model Number_M315,AMD Radeon R Model Number_M330,AMD Radeon R Model Number_M360,AMD Radeon R Model Number_M365X,AMD Radeon R Model Number_M385,AMD Radeon R Model Number_M420,AMD Radeon R Model Number_M420X,AMD Radeon R Model Number_M430,AMD Radeon R Model Number_M440,AMD Radeon R Model Number_M445,AMD Radeon R Model Number_M460,AMD Radeon R Model Number_M465,Operating System_Chrome OS,Operating System_Linux,Operating System_Mac OS X,Operating System_No OS,Operating System_Windows 10,Operating System_Windows 10 S,Operating System_Windows 7,Operating System_macOS,Weight Category_Heavy Duty / Gaming,Weight Category_Ultralight,Weight Category_Workstation
0,0.0000,0.00000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.320988,-0.172884,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0
1,0.0000,-1.73125,-1.733333,0.000868,-1.357906,0.285714,0.0,-1.328125,0.0,0.320988,-0.198202,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0
2,1.0625,0.00000,0.000000,0.000000,-0.462315,0.428571,2.0,0.000000,1.0,0.814815,0.577433,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0
3,1.0625,0.00000,0.000000,0.000000,-0.462315,0.428571,0.0,-0.666667,1.0,0.814815,0.416290,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0
4,-1.4375,4.00000,4.000000,0.000000,4.492496,0.285714,2.0,0.000000,0.0,-0.765432,0.474840,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12209,0.0000,0.00000,0.000000,0.000000,0.000000,0.000000,0.0,-1.328125,0.0,-0.061728,-0.081749,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
12210,0.0000,-1.73125,-1.733333,0.000868,-1.357906,-0.285714,0.0,-1.328125,0.0,0.308642,-0.590580,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0
12211,0.0000,0.00000,0.000000,0.000000,0.000000,0.285714,0.0,0.000000,0.0,0.000000,0.040672,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0
12212,1.0625,0.00000,0.000000,0.000000,-0.462315,0.428571,0.0,-0.666667,1.0,1.185185,0.209829,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0


Unnamed: 0,Screen Size,Screen Width,Screen Height,Aspect Ratio,Screen DPI,CPU Base Clock,RAM,Base Storage Space Value,Additional Storage Space Value,Weight,Price ($),Brand_Apple,Brand_Asus,Brand_Chuwi,Brand_Dell,Brand_Fujitsu,Brand_Google,Brand_HP,Brand_Huawei,Brand_LG,Brand_Lenovo,Brand_MSI,Brand_Mediacom,Brand_Microsoft,Brand_Razer,Brand_Samsung,Brand_Toshiba,Brand_Vero,Brand_Xiaomi,Type_Gaming,Type_Netbook,Type_Notebook,Type_Ultrabook,Type_Workstation,Touchscreen Check_Touchscreen Available,IPS Check_TN Screen,Resolution Type_HD,Resolution Type_Quad HD,Resolution Type_Ultra HD,Core Count_4 Cores,Core Count_6 Cores,Core Count_8 Cores,CPU Brand_Intel,CPU Series_AMD A12,CPU Series_AMD A4,CPU Series_AMD A6,CPU Series_AMD A8,CPU Series_AMD A9,CPU Series_AMD E6,CPU Series_AMD E7,CPU Series_AMD E9,CPU Series_AMD FX 8,CPU Series_AMD FX 9,CPU Series_Atom Z8300,CPU Series_Atom Z8350,CPU Series_Atom Z8550,CPU Series_Celeron N,CPU Series_Celeron U,CPU Series_Core M,CPU Series_Core M M3,CPU Series_Core M M3-6,CPU Series_Core M M3-7,CPU Series_Core M M7-6,CPU Series_Pentium N,CPU Series_Pentium U,CPU Series_Pentium Y,CPU Series_Ryzen 5,CPU Series_Ryzen 7,CPU Series_Xeon 5th gen,CPU Series_Xeon 6th gen,CPU Series_i3,CPU Series_i5,CPU Series_i7,Intel Generation_4th gen,Intel Generation_6th gen,Intel Generation_7th gen,Intel Generation_8th gen,Intel Core Series Performance Type_HQ,Intel Core Series Performance Type_U,Intel Core Series Performance Type_Y,Base Storage Size Type_TB,Base Storage Type_HDD,Base Storage Type_Hybrid,Base Storage Type_SSD,Additional Storage Size Type_TB,Additional Storage Type_Hybrid,Additional Storage Type_SSD,GPU Brand_Intel,GPU Brand_Nvidia,GPU Series_GeForce GT,GPU Series_GeForce GTX,GPU Series_HD Graphics,GPU Series_Iris Graphics,GPU Series_Iris Plus Graphics,GPU Series_Iris Pro Graphics,GPU Series_Nvidia,GPU Series_Radeon,GPU Series_Radeon Pro,GPU Series_Radeon R,GPU Series_UHD Graphics,GPU Model_1050M,GPU Model_1060,GPU Model_1070,GPU Model_1070M,GPU Model_1080,GPU Model_150MX,GPU Model_3000M,GPU Model_400,GPU Model_405,GPU Model_455,GPU Model_500,GPU Model_505,GPU Model_510,GPU Model_515,GPU Model_520,GPU Model_530,GPU Model_540,GPU Model_550,GPU Model_555,GPU Model_560,GPU Model_600,GPU Model_615,GPU Model_620,GPU Model_6200,GPU Model_630,GPU Model_640,GPU Model_650,GPU Model_920,GPU Model_920M,GPU Model_920MX,GPU Model_930M,GPU Model_930MX,GPU Model_940M,GPU Model_940MX,GPU Model_950M,GPU Model_960,GPU Model_960M,GPU Model_965M,GPU Model_970M,GPU Model_980,GPU Model_980M,GPU Model_Default,GPU Model_M1000M,GPU Model_M1200,GPU Model_M2000M,GPU Model_M2200,GPU Model_M2200M,GPU Model_M3000M,GPU Model_M500M,GPU Model_M520M,GPU Model_M620,GPU Model_M620M,GPU Model_MX130,GPU Model_MX150,GPU Model_R2,GPU Model_R3,GPU Model_R4,GPU Model_R5,GPU Model_R7,GPU Model_R9,GPU Model_RX,GPU Model_W4190M,GPU Model_W5130M,GPU Model_W6150M,AMD Radeon R Model Number_520,AMD Radeon R Model Number_540,AMD Radeon R Model Number_550,AMD Radeon R Model Number_560,AMD Radeon R Model Number_580,AMD Radeon R Model Number_Beema,AMD Radeon R Model Number_Default,AMD Radeon R Model Number_M315,AMD Radeon R Model Number_M330,AMD Radeon R Model Number_M360,AMD Radeon R Model Number_M365X,AMD Radeon R Model Number_M385,AMD Radeon R Model Number_M420,AMD Radeon R Model Number_M420X,AMD Radeon R Model Number_M430,AMD Radeon R Model Number_M440,AMD Radeon R Model Number_M445,AMD Radeon R Model Number_M460,AMD Radeon R Model Number_M465,Operating System_Chrome OS,Operating System_Linux,Operating System_Mac OS X,Operating System_No OS,Operating System_Windows 10,Operating System_Windows 10 S,Operating System_Windows 7,Operating System_macOS,Weight Category_Heavy Duty / Gaming,Weight Category_Ultralight,Weight Category_Workstation
0,0.0000,0.00000,0.000000,0.000000,0.000000,0.142857,0.0,1.089286,0.0,0.2375,0.432867,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0
1,-1.4375,0.00000,0.000000,0.000000,0.813602,0.000000,0.0,0.000000,0.0,-0.5500,-0.213967,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
2,0.0000,0.00000,0.000000,0.000000,0.000000,0.000000,0.0,1.089286,0.0,-0.0500,-0.135298,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
3,0.0000,0.00000,0.000000,0.000000,0.000000,-1.285714,0.0,0.000000,0.0,0.2000,-0.203105,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0
4,1.0625,0.00000,0.000000,0.000000,-0.462315,0.285714,6.0,1.142857,1.0,3.2000,2.520997,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,1,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3049,0.0000,0.00000,0.000000,0.000000,0.000000,0.428571,2.0,0.000000,1.0,0.5750,0.915942,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0
3050,0.0000,0.00000,0.000000,0.000000,0.000000,-1.514286,-1.0,-0.857143,0.0,-0.1875,-0.816500,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
3051,-1.0000,2.00000,2.000000,0.000000,2.285159,0.000000,0.0,0.000000,0.0,-0.6250,1.356952,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
3052,-2.5000,-1.73125,-1.733333,0.000868,-0.203825,-2.000000,-1.0,-0.857143,0.0,-1.0500,-0.714894,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0


In [17]:
# get rid of cpu and hard disk from cols
# check for nan values using np unique and then look for nan

In [18]:
#splitting the data into training and test sets (80:20)
num_columns = ['Screen Size', 'Screen Width', 'Screen Height', 'Aspect Ratio', 'Screen DPI', 'CPU Base Clock', 'RAM', 'Base Storage Space Value', 'Additional Storage Space Value', 'Weight']
all_cols = num_columns + cat_columns

features = []

for col in all_cols:
    features.append(cleaned_df_train[col])


X_train = np.transpose(np.array(features))
y_train = np.transpose(np.array(cleaned_df_train['Price ($)']))

X_test = np.transpose(np.array(features))
y_test = np.transpose(np.array(cleaned_df_test['Price ($)']))

# gridsearch best params for lasso regression for feature importance
pipeline = Pipeline([
                     ('model',Lasso())
])

search = GridSearchCV(pipeline,
                      {'model__alpha':np.arange(0.1,10,0.1)},
                      cv = 5, scoring="neg_mean_squared_error",verbose=3
                      )

search.fit(X_train,y_train)

print(search.best_params_)
coefficients = search.best_estimator_.named_steps['model'].coef_
importance = np.abs(coefficients)

Fitting 5 folds for each of 99 candidates, totalling 495 fits
[CV 1/5] END .................model__alpha=0.1;, score=-0.231 total time=   0.1s
[CV 2/5] END .................model__alpha=0.1;, score=-0.229 total time=   0.1s
[CV 3/5] END .................model__alpha=0.1;, score=-0.240 total time=   0.0s
[CV 4/5] END .................model__alpha=0.1;, score=-0.254 total time=   0.3s
[CV 5/5] END .................model__alpha=0.1;, score=-0.247 total time=   0.1s
[CV 1/5] END .................model__alpha=0.2;, score=-0.260 total time=   0.2s
[CV 2/5] END .................model__alpha=0.2;, score=-0.258 total time=   0.0s
[CV 3/5] END .................model__alpha=0.2;, score=-0.269 total time=   0.1s
[CV 4/5] END .................model__alpha=0.2;, score=-0.278 total time=   0.1s
[CV 5/5] END .................model__alpha=0.2;, score=-0.272 total time=   0.1s
[CV 1/5] END .model__alpha=0.30000000000000004;, score=-0.297 total time=   0.2s
[CV 2/5] END .model__alpha=0.30000000000000004;

In [19]:
print(np.array(all_cols)[importance > 0])
print(np.array(all_cols)[importance == 0])

['Screen Height' 'CPU Base Clock' 'RAM' 'Base Storage Space Value']
['Screen Size' 'Screen Width' 'Aspect Ratio' 'Screen DPI'
 'Additional Storage Space Value' 'Weight' 'Brand_Apple' 'Brand_Asus'
 'Brand_Chuwi' 'Brand_Dell' 'Brand_Fujitsu' 'Brand_Google' 'Brand_HP'
 'Brand_Huawei' 'Brand_LG' 'Brand_Lenovo' 'Brand_MSI' 'Brand_Mediacom'
 'Brand_Microsoft' 'Brand_Razer' 'Brand_Samsung' 'Brand_Toshiba'
 'Brand_Vero' 'Brand_Xiaomi' 'Type_Gaming' 'Type_Netbook' 'Type_Notebook'
 'Type_Ultrabook' 'Type_Workstation'
 'Touchscreen Check_Touchscreen Available' 'IPS Check_TN Screen'
 'Resolution Type_HD' 'Resolution Type_Quad HD' 'Resolution Type_Ultra HD'
 'Core Count_4 Cores' 'Core Count_6 Cores' 'Core Count_8 Cores'
 'CPU Brand_Intel' 'CPU Series_AMD A12' 'CPU Series_AMD A4'
 'CPU Series_AMD A6' 'CPU Series_AMD A8' 'CPU Series_AMD A9'
 'CPU Series_AMD E6' 'CPU Series_AMD E7' 'CPU Series_AMD E9'
 'CPU Series_AMD FX 8' 'CPU Series_AMD FX 9' 'CPU Series_Atom Z8300'
 'CPU Series_Atom Z8350' 'CPU Se


### (A3) Imputing Data


### (A4) Choice of Impute Method:


### (A5) Dataframe Stitching


### (A6) Oversampling Data


### (A6) All Features

## (B) Exploratory Data Analysis

### (B1) Qualitative Variables


### (B2) Quantitative Variables


## (C) ML Model Exploration

## (D) ML Model Selection & Analysis

## (E) Questions & Answers