# Laptop Price

In [2]:
import re
import numpy as np
import pandas as pd
import seaborn as sns

In [10]:
df = pd.read_csv('data/laptop_price.csv', encoding='latin-1')

In [11]:
df.head()

Unnamed: 0,laptop_ID,Company,Product,TypeName,Inches,ScreenResolution,Cpu,Ram,Memory,Gpu,OpSys,Weight,Price_euros
0,1,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 2.3GHz,8GB,128GB SSD,Intel Iris Plus Graphics 640,macOS,1.37kg,1339.69
1,2,Apple,Macbook Air,Ultrabook,13.3,1440x900,Intel Core i5 1.8GHz,8GB,128GB Flash Storage,Intel HD Graphics 6000,macOS,1.34kg,898.94
2,3,HP,250 G6,Notebook,15.6,Full HD 1920x1080,Intel Core i5 7200U 2.5GHz,8GB,256GB SSD,Intel HD Graphics 620,No OS,1.86kg,575.0
3,4,Apple,MacBook Pro,Ultrabook,15.4,IPS Panel Retina Display 2880x1800,Intel Core i7 2.7GHz,16GB,512GB SSD,AMD Radeon Pro 455,macOS,1.83kg,2537.45
4,5,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 3.1GHz,8GB,256GB SSD,Intel Iris Plus Graphics 650,macOS,1.37kg,1803.6


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1303 entries, 0 to 1302
Data columns (total 13 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   laptop_ID         1303 non-null   int64  
 1   Company           1303 non-null   object 
 2   Product           1303 non-null   object 
 3   TypeName          1303 non-null   object 
 4   Inches            1303 non-null   float64
 5   ScreenResolution  1303 non-null   object 
 6   Cpu               1303 non-null   object 
 7   Ram               1303 non-null   object 
 8   Memory            1303 non-null   object 
 9   Gpu               1303 non-null   object 
 10  OpSys             1303 non-null   object 
 11  Weight            1303 non-null   object 
 12  Price_euros       1303 non-null   float64
dtypes: float64(2), int64(1), object(10)
memory usage: 132.5+ KB


In [None]:
df['laptop_ID'].quantile

In [5]:
df['laptop_ID'] = df['laptop_ID'].astype('int16')

In [6]:
screenResolution = df['ScreenResolution'].str.split('\s').str[-1].str.lower().str.split('x', expand=True)
df['X_res'] = screenResolution[0].astype('int16')
df['Y_res'] = screenResolution[1].astype('int16')

In [7]:
df['Ram'] = df['Ram'].str.strip().str.replace('GB', '').astype('int16')

In [8]:
# df['Weight'] = df['Weight'].str.strip().str.replace('kg', '') # one way
df['Weight'] = df['Weight'].str.strip().str.extract('(\d+|\d+\.\d+)[KkMm]g').astype('float32') # another way

In [9]:
df['GpuBrand'] = df['Gpu'].str.strip().str.split('\s').str[0].astype('str')

In [10]:
df['CpuClockSpeed'] = df['Cpu'].str.strip().str.extract('(\d+|\d+\.\d|\.\d)GHz').astype('float32')

In [11]:
df = df.drop(['ScreenResolution', 'Gpu', 'Cpu'], axis=1)

---

In [12]:
df

Unnamed: 0,laptop_ID,Company,Product,TypeName,Inches,Ram,Memory,OpSys,Weight,Price_euros,X_res,Y_res,GpuBrand,CpuClockSpeed
0,1,Apple,MacBook Pro,Ultrabook,13.3,8,128GB SSD,macOS,1.37,1339.69,2560,1600,Intel,2.3
1,2,Apple,Macbook Air,Ultrabook,13.3,8,128GB Flash Storage,macOS,1.34,898.94,1440,900,Intel,1.8
2,3,HP,250 G6,Notebook,15.6,8,256GB SSD,No OS,1.86,575.00,1920,1080,Intel,2.5
3,4,Apple,MacBook Pro,Ultrabook,15.4,16,512GB SSD,macOS,1.83,2537.45,2880,1800,AMD,2.7
4,5,Apple,MacBook Pro,Ultrabook,13.3,8,256GB SSD,macOS,1.37,1803.60,2560,1600,Intel,3.1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1298,1316,Lenovo,Yoga 500-14ISK,2 in 1 Convertible,14.0,4,128GB SSD,Windows 10,1.80,638.00,1920,1080,Intel,2.5
1299,1317,Lenovo,Yoga 900-13ISK,2 in 1 Convertible,13.3,16,512GB SSD,Windows 10,1.30,1499.00,3200,1800,Intel,2.5
1300,1318,Lenovo,IdeaPad 100S-14IBR,Notebook,14.0,2,64GB Flash Storage,Windows 10,1.50,229.00,1366,768,Intel,1.6
1301,1319,HP,15-AC110nv (i7-6500U/6GB/1TB/Radeon,Notebook,15.6,6,1TB HDD,Windows 10,2.19,764.00,1366,768,AMD,2.5


In [9]:
def inGB(s: str) -> float:
    match = re.search(r'(\d+|\d+.\d+|.\d+)(GB)', s, re.IGNORECASE)
    if match and match.group(2).upper() == 'GB':
        return float(match.group(1))
    match = re.search(r'(\d+|\d+.\d+|.\d+)(TB)', s, re.IGNORECASE)
    if match and match.group(2).upper() == 'TB':
        return float(match.group(1)) * 1024
    return 0.0

print(inGB('500GB'))
print(inGB('1TB'))
print(inGB('test'))

500.0
1024.0
0.0


In [179]:
l1 = df['Memory'].str.split('\+', expand=True)[0]
l2 = df['Memory'].str.split('\+', expand=True)[1].fillna('0GB')
l1_memory_size = l1.str.extract('(\d+[GT]B)')[0].astype(str).apply(inGB)
l2_memory_size = l2.str.extract('(\d+[GT]B)')[0].astype(str).apply(inGB)
df['TotalMemory'] = l1_memory_size + l2_memory_size

In [182]:
df["Layer1_SSD"] = l1.apply(lambda x: 'SSD' in x.upper()).astype(np.bool_)
df["Layer1_HDD"] = l1.apply(lambda x: 'HDD' in x.upper()).astype(np.bool_)
df["Layer1_FLASH"] = l1.apply(lambda x: 'FLASH' in x.upper()).astype(np.bool_)
df["Layer1_HYBRID"] = l1.apply(lambda x: 'HYBRID' in x.upper()).astype(np.bool_)
df["Layer2_SSD"] = l2.apply(lambda x: 'SSD' in x.upper()).astype(np.bool_)
df["Layer2_HDD"] = l2.apply(lambda x: 'HDD' in x.upper()).astype(np.bool_)
df["Layer2_FLASH"] = l2.apply(lambda x: 'FLASH' in x.upper()).astype(np.bool_)
df["Layer2_HYBRID"] = l2.apply(lambda x: 'HYBRID' in x.upper()).astype(np.bool_)

In [184]:
df["SSD"] = l1_memory_size * df["Layer1_SSD"] + l2_memory_size * df["Layer2_SSD"]
df["HDD"] = l1_memory_size * df["Layer1_HDD"] + l2_memory_size * df["Layer2_HDD"]
df["FLASH"] = l1_memory_size * df["Layer1_FLASH"] + l2_memory_size * df["Layer2_FLASH"]
df["HYBRID"] = l1_memory_size * df["Layer1_HYBRID"] + l2_memory_size * df["Layer2_HYBRID"]

In [185]:
df

Unnamed: 0,laptop_ID,Company,Product,TypeName,Inches,Ram,Memory,OpSys,Weight,Price_euros,...,Layer1_FLASH,Layer1_HYBRID,Layer2_SSD,Layer2_HDD,Layer2_FLASH,Layer2_HYBRID,SSD,HDD,FLASH,HYBRID
0,1,Apple,MacBook Pro,Ultrabook,13.3,8,128GB SSD,macOS,1.37,1339.69,...,False,False,False,False,False,False,128.0,0.0,0.0,0.0
1,2,Apple,Macbook Air,Ultrabook,13.3,8,128GB Flash Storage,macOS,1.34,898.94,...,True,False,False,False,False,False,0.0,0.0,128.0,0.0
2,3,HP,250 G6,Notebook,15.6,8,256GB SSD,No OS,1.86,575.00,...,False,False,False,False,False,False,256.0,0.0,0.0,0.0
3,4,Apple,MacBook Pro,Ultrabook,15.4,16,512GB SSD,macOS,1.83,2537.45,...,False,False,False,False,False,False,512.0,0.0,0.0,0.0
4,5,Apple,MacBook Pro,Ultrabook,13.3,8,256GB SSD,macOS,1.37,1803.60,...,False,False,False,False,False,False,256.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1298,1316,Lenovo,Yoga 500-14ISK,2 in 1 Convertible,14.0,4,128GB SSD,Windows 10,1.80,638.00,...,False,False,False,False,False,False,128.0,0.0,0.0,0.0
1299,1317,Lenovo,Yoga 900-13ISK,2 in 1 Convertible,13.3,16,512GB SSD,Windows 10,1.30,1499.00,...,False,False,False,False,False,False,512.0,0.0,0.0,0.0
1300,1318,Lenovo,IdeaPad 100S-14IBR,Notebook,14.0,2,64GB Flash Storage,Windows 10,1.50,229.00,...,True,False,False,False,False,False,0.0,0.0,64.0,0.0
1301,1319,HP,15-AC110nv (i7-6500U/6GB/1TB/Radeon,Notebook,15.6,6,1TB HDD,Windows 10,2.19,764.00,...,False,False,False,False,False,False,0.0,1024.0,0.0,0.0
