# Data Cleaning and Feature Engineering

In [1]:
import numpy as np
import pandas as pd

import re

In [2]:
df = pd.read_csv("data/laptop_raw_data.csv")

df.head()

Unnamed: 0,Product Title,Product Price,Product Rating,Product Review,Product Feature
0,Primebook Wifi MT8183 - (4 GB/64 GB EMMC Stora...,"₹8,990",4.2,773 Ratings & 207 Reviews,MediaTek MT8183 Processor4 GB LPDDR4 RAMAndroi...
1,ASUS TUF Gaming F15 - AI Powered Gaming Core i...,"₹51,990",4.3,"1,768 Ratings & 177 Reviews",Intel Core i5 Processor (11th Gen)8 GB DDR4 RA...
2,HP 2023 Athlon Dual Core 3050U - (8 GB/512 GB ...,"₹26,990",4.1,"2,776 Ratings & 227 Reviews",AMD Athlon Dual Core Processor8 GB DDR4 RAMWin...
3,APPLE 2022 MacBook AIR M2 - (8 GB/256 GB SSD/M...,"₹87,990",4.7,"3,207 Ratings & 252 Reviews",Apple M2 Processor8 GB Unified Memory RAMMac O...
4,Infinix INBook Y1 Plus Intel Core i3 10th Gen ...,"₹23,990",4.2,"3,042 Ratings & 387 Reviews",Intel Core i3 Processor (10th Gen)8 GB LPDDR4X...


## Deriving New Columns from Existing Columns

In [6]:
# Brand

regex = r'^\w+'

df['Brand'] = df['Product Title'].apply(lambda x : re.findall(regex, x))

df.head()

Unnamed: 0,Product Title,Product Price,Product Rating,Product Review,Product Feature,Brand
0,Primebook Wifi MT8183 - (4 GB/64 GB EMMC Stora...,"₹8,990",4.2,773 Ratings & 207 Reviews,MediaTek MT8183 Processor4 GB LPDDR4 RAMAndroi...,[Primebook]
1,ASUS TUF Gaming F15 - AI Powered Gaming Core i...,"₹51,990",4.3,"1,768 Ratings & 177 Reviews",Intel Core i5 Processor (11th Gen)8 GB DDR4 RA...,[ASUS]
2,HP 2023 Athlon Dual Core 3050U - (8 GB/512 GB ...,"₹26,990",4.1,"2,776 Ratings & 227 Reviews",AMD Athlon Dual Core Processor8 GB DDR4 RAMWin...,[HP]
3,APPLE 2022 MacBook AIR M2 - (8 GB/256 GB SSD/M...,"₹87,990",4.7,"3,207 Ratings & 252 Reviews",Apple M2 Processor8 GB Unified Memory RAMMac O...,[APPLE]
4,Infinix INBook Y1 Plus Intel Core i3 10th Gen ...,"₹23,990",4.2,"3,042 Ratings & 387 Reviews",Intel Core i3 Processor (10th Gen)8 GB LPDDR4X...,[Infinix]


In [7]:
# Processor Information

regex = r'(?:MediaTek|AMD|Intel|Apple)[\s\w]+Processor'

df['Processor'] = df['Product Feature'].apply(lambda x : re.findall(regex, x))

df.head()

Unnamed: 0,Product Title,Product Price,Product Rating,Product Review,Product Feature,Brand,Processor
0,Primebook Wifi MT8183 - (4 GB/64 GB EMMC Stora...,"₹8,990",4.2,773 Ratings & 207 Reviews,MediaTek MT8183 Processor4 GB LPDDR4 RAMAndroi...,[Primebook],[MediaTek MT8183 Processor]
1,ASUS TUF Gaming F15 - AI Powered Gaming Core i...,"₹51,990",4.3,"1,768 Ratings & 177 Reviews",Intel Core i5 Processor (11th Gen)8 GB DDR4 RA...,[ASUS],[Intel Core i5 Processor]
2,HP 2023 Athlon Dual Core 3050U - (8 GB/512 GB ...,"₹26,990",4.1,"2,776 Ratings & 227 Reviews",AMD Athlon Dual Core Processor8 GB DDR4 RAMWin...,[HP],[AMD Athlon Dual Core Processor]
3,APPLE 2022 MacBook AIR M2 - (8 GB/256 GB SSD/M...,"₹87,990",4.7,"3,207 Ratings & 252 Reviews",Apple M2 Processor8 GB Unified Memory RAMMac O...,[APPLE],[Apple M2 Processor]
4,Infinix INBook Y1 Plus Intel Core i3 10th Gen ...,"₹23,990",4.2,"3,042 Ratings & 387 Reviews",Intel Core i3 Processor (10th Gen)8 GB LPDDR4X...,[Infinix],[Intel Core i3 Processor]


In [8]:
# RAM Information

regex = r'(?:\([\w\s]+\))?\d+\sGB[\s\w]+RAM'

df['RAM'] = df['Product Feature'].apply(lambda x : re.findall(regex, x))

df.head()

Unnamed: 0,Product Title,Product Price,Product Rating,Product Review,Product Feature,Brand,Processor,RAM
0,Primebook Wifi MT8183 - (4 GB/64 GB EMMC Stora...,"₹8,990",4.2,773 Ratings & 207 Reviews,MediaTek MT8183 Processor4 GB LPDDR4 RAMAndroi...,[Primebook],[MediaTek MT8183 Processor],[4 GB LPDDR4 RAM]
1,ASUS TUF Gaming F15 - AI Powered Gaming Core i...,"₹51,990",4.3,"1,768 Ratings & 177 Reviews",Intel Core i5 Processor (11th Gen)8 GB DDR4 RA...,[ASUS],[Intel Core i5 Processor],[(11th Gen)8 GB DDR4 RAM]
2,HP 2023 Athlon Dual Core 3050U - (8 GB/512 GB ...,"₹26,990",4.1,"2,776 Ratings & 227 Reviews",AMD Athlon Dual Core Processor8 GB DDR4 RAMWin...,[HP],[AMD Athlon Dual Core Processor],[8 GB DDR4 RAM]
3,APPLE 2022 MacBook AIR M2 - (8 GB/256 GB SSD/M...,"₹87,990",4.7,"3,207 Ratings & 252 Reviews",Apple M2 Processor8 GB Unified Memory RAMMac O...,[APPLE],[Apple M2 Processor],[8 GB Unified Memory RAM]
4,Infinix INBook Y1 Plus Intel Core i3 10th Gen ...,"₹23,990",4.2,"3,042 Ratings & 387 Reviews",Intel Core i3 Processor (10th Gen)8 GB LPDDR4X...,[Infinix],[Intel Core i3 Processor],[(10th Gen)8 GB LPDDR4X RAM]


In [9]:
# OS 

regex = r'(?:\d+\sbit|Android|Mac|Window|DOS)[\s\w]+Operating System'

df['OS'] = df['Product Feature'].apply(lambda x : re.findall(regex, x))

df.head()

Unnamed: 0,Product Title,Product Price,Product Rating,Product Review,Product Feature,Brand,Processor,RAM,OS
0,Primebook Wifi MT8183 - (4 GB/64 GB EMMC Stora...,"₹8,990",4.2,773 Ratings & 207 Reviews,MediaTek MT8183 Processor4 GB LPDDR4 RAMAndroi...,[Primebook],[MediaTek MT8183 Processor],[4 GB LPDDR4 RAM],[Android Operating System]
1,ASUS TUF Gaming F15 - AI Powered Gaming Core i...,"₹51,990",4.3,"1,768 Ratings & 177 Reviews",Intel Core i5 Processor (11th Gen)8 GB DDR4 RA...,[ASUS],[Intel Core i5 Processor],[(11th Gen)8 GB DDR4 RAM],[Windows 11 Operating System]
2,HP 2023 Athlon Dual Core 3050U - (8 GB/512 GB ...,"₹26,990",4.1,"2,776 Ratings & 227 Reviews",AMD Athlon Dual Core Processor8 GB DDR4 RAMWin...,[HP],[AMD Athlon Dual Core Processor],[8 GB DDR4 RAM],[Windows 11 Operating System]
3,APPLE 2022 MacBook AIR M2 - (8 GB/256 GB SSD/M...,"₹87,990",4.7,"3,207 Ratings & 252 Reviews",Apple M2 Processor8 GB Unified Memory RAMMac O...,[APPLE],[Apple M2 Processor],[8 GB Unified Memory RAM],[Mac OS Operating System]
4,Infinix INBook Y1 Plus Intel Core i3 10th Gen ...,"₹23,990",4.2,"3,042 Ratings & 387 Reviews",Intel Core i3 Processor (10th Gen)8 GB LPDDR4X...,[Infinix],[Intel Core i3 Processor],[(10th Gen)8 GB LPDDR4X RAM],[64 bit Windows 11 Operating System]


In [10]:
# Storage

regex = r'[\d]+\s(?:GB|TB)\s(?:HDD|SSD)'

df['Storage'] = df['Product Feature'].apply(lambda x : re.findall(regex, x))

df.head()

Unnamed: 0,Product Title,Product Price,Product Rating,Product Review,Product Feature,Brand,Processor,RAM,OS,Storage
0,Primebook Wifi MT8183 - (4 GB/64 GB EMMC Stora...,"₹8,990",4.2,773 Ratings & 207 Reviews,MediaTek MT8183 Processor4 GB LPDDR4 RAMAndroi...,[Primebook],[MediaTek MT8183 Processor],[4 GB LPDDR4 RAM],[Android Operating System],[]
1,ASUS TUF Gaming F15 - AI Powered Gaming Core i...,"₹51,990",4.3,"1,768 Ratings & 177 Reviews",Intel Core i5 Processor (11th Gen)8 GB DDR4 RA...,[ASUS],[Intel Core i5 Processor],[(11th Gen)8 GB DDR4 RAM],[Windows 11 Operating System],[512 GB SSD]
2,HP 2023 Athlon Dual Core 3050U - (8 GB/512 GB ...,"₹26,990",4.1,"2,776 Ratings & 227 Reviews",AMD Athlon Dual Core Processor8 GB DDR4 RAMWin...,[HP],[AMD Athlon Dual Core Processor],[8 GB DDR4 RAM],[Windows 11 Operating System],[512 GB SSD]
3,APPLE 2022 MacBook AIR M2 - (8 GB/256 GB SSD/M...,"₹87,990",4.7,"3,207 Ratings & 252 Reviews",Apple M2 Processor8 GB Unified Memory RAMMac O...,[APPLE],[Apple M2 Processor],[8 GB Unified Memory RAM],[Mac OS Operating System],[256 GB SSD]
4,Infinix INBook Y1 Plus Intel Core i3 10th Gen ...,"₹23,990",4.2,"3,042 Ratings & 387 Reviews",Intel Core i3 Processor (10th Gen)8 GB LPDDR4X...,[Infinix],[Intel Core i3 Processor],[(10th Gen)8 GB LPDDR4X RAM],[64 bit Windows 11 Operating System],[512 GB SSD]


In [11]:
# Display

regex = r'\d+\.?\d+\scm.*Display'

df['Display'] = df['Product Feature'].apply(lambda x : re.findall(regex, x))

df.head()

Unnamed: 0,Product Title,Product Price,Product Rating,Product Review,Product Feature,Brand,Processor,RAM,OS,Storage,Display
0,Primebook Wifi MT8183 - (4 GB/64 GB EMMC Stora...,"₹8,990",4.2,773 Ratings & 207 Reviews,MediaTek MT8183 Processor4 GB LPDDR4 RAMAndroi...,[Primebook],[MediaTek MT8183 Processor],[4 GB LPDDR4 RAM],[Android Operating System],[],[29.46 cm (11.6 Inch) Display]
1,ASUS TUF Gaming F15 - AI Powered Gaming Core i...,"₹51,990",4.3,"1,768 Ratings & 177 Reviews",Intel Core i5 Processor (11th Gen)8 GB DDR4 RA...,[ASUS],[Intel Core i5 Processor],[(11th Gen)8 GB DDR4 RAM],[Windows 11 Operating System],[512 GB SSD],[39.62 cm (15.6 Inch) Display]
2,HP 2023 Athlon Dual Core 3050U - (8 GB/512 GB ...,"₹26,990",4.1,"2,776 Ratings & 227 Reviews",AMD Athlon Dual Core Processor8 GB DDR4 RAMWin...,[HP],[AMD Athlon Dual Core Processor],[8 GB DDR4 RAM],[Windows 11 Operating System],[512 GB SSD],[39.62 cm (15.6 Inch) Display]
3,APPLE 2022 MacBook AIR M2 - (8 GB/256 GB SSD/M...,"₹87,990",4.7,"3,207 Ratings & 252 Reviews",Apple M2 Processor8 GB Unified Memory RAMMac O...,[APPLE],[Apple M2 Processor],[8 GB Unified Memory RAM],[Mac OS Operating System],[256 GB SSD],[34.54 cm (13.6 Inch) Display]
4,Infinix INBook Y1 Plus Intel Core i3 10th Gen ...,"₹23,990",4.2,"3,042 Ratings & 387 Reviews",Intel Core i3 Processor (10th Gen)8 GB LPDDR4X...,[Infinix],[Intel Core i3 Processor],[(10th Gen)8 GB LPDDR4X RAM],[64 bit Windows 11 Operating System],[512 GB SSD],[39.62 cm (15.6 inch) Display]


In [12]:
# Warranty

regex = r'\d+\sYear.*Warranty'

df['Warranty'] = df['Product Feature'].apply(lambda x : re.findall(regex, x))

df.head()

Unnamed: 0,Product Title,Product Price,Product Rating,Product Review,Product Feature,Brand,Processor,RAM,OS,Storage,Display,Warranty
0,Primebook Wifi MT8183 - (4 GB/64 GB EMMC Stora...,"₹8,990",4.2,773 Ratings & 207 Reviews,MediaTek MT8183 Processor4 GB LPDDR4 RAMAndroi...,[Primebook],[MediaTek MT8183 Processor],[4 GB LPDDR4 RAM],[Android Operating System],[],[29.46 cm (11.6 Inch) Display],[1 Year Pick and Drop Warranty]
1,ASUS TUF Gaming F15 - AI Powered Gaming Core i...,"₹51,990",4.3,"1,768 Ratings & 177 Reviews",Intel Core i5 Processor (11th Gen)8 GB DDR4 RA...,[ASUS],[Intel Core i5 Processor],[(11th Gen)8 GB DDR4 RAM],[Windows 11 Operating System],[512 GB SSD],[39.62 cm (15.6 Inch) Display],[1 Year Carry-in Warranty]
2,HP 2023 Athlon Dual Core 3050U - (8 GB/512 GB ...,"₹26,990",4.1,"2,776 Ratings & 227 Reviews",AMD Athlon Dual Core Processor8 GB DDR4 RAMWin...,[HP],[AMD Athlon Dual Core Processor],[8 GB DDR4 RAM],[Windows 11 Operating System],[512 GB SSD],[39.62 cm (15.6 Inch) Display],[1 Year Onsite Warranty]
3,APPLE 2022 MacBook AIR M2 - (8 GB/256 GB SSD/M...,"₹87,990",4.7,"3,207 Ratings & 252 Reviews",Apple M2 Processor8 GB Unified Memory RAMMac O...,[APPLE],[Apple M2 Processor],[8 GB Unified Memory RAM],[Mac OS Operating System],[256 GB SSD],[34.54 cm (13.6 Inch) Display],[1 Year Limited Warranty]
4,Infinix INBook Y1 Plus Intel Core i3 10th Gen ...,"₹23,990",4.2,"3,042 Ratings & 387 Reviews",Intel Core i3 Processor (10th Gen)8 GB LPDDR4X...,[Infinix],[Intel Core i3 Processor],[(10th Gen)8 GB LPDDR4X RAM],[64 bit Windows 11 Operating System],[512 GB SSD],[39.62 cm (15.6 inch) Display],[1 Year Onsite Warranty]


## Fixing the Issues

In [15]:
df.shape

(984, 12)

In [13]:
df.isnull().sum()

Product Title        0
Product Price        1
Product Rating     119
Product Review     119
Product Feature      0
Brand                0
Processor            0
RAM                  0
OS                   0
Storage              0
Display              0
Warranty             0
dtype: int64

In [None]:
df.info()

In [None]:
# Missing Values

df.isnull().sum()

In [None]:
print(df['Processor'][0])

print(type(df['Processor'][0]))

In [None]:
print(''.join(df['Processor'][0]))
print(type(''.join(df['Processor'][0])))

In [None]:
df.head()

In [16]:
df.Processor = df.Processor.apply(lambda x : ''.join(x))
df.RAM = df.RAM.apply(lambda x : ''.join(x))
df.OS = df.OS.apply(lambda x : ''.join(x))
df.Storage = df.Storage.apply(lambda x : ''.join(x))
df.Brand = df.Brand.apply(lambda x : ''.join(x))
df.Display = df.Display.apply(lambda x : ''.join(x))
df.Warranty = df.Warranty.apply(lambda x : ''.join(x))

df.isnull().sum()

Product Title        0
Product Price        1
Product Rating     119
Product Review     119
Product Feature      0
Brand                0
Processor            0
RAM                  0
OS                   0
Storage              0
Display              0
Warranty             0
dtype: int64

In [19]:
df = df.replace('', np.nan)

df.isnull().sum()

Product Title        0
Product Price        1
Product Rating     119
Product Review     119
Product Feature      0
Brand                0
Processor            1
RAM                  0
OS                  47
Storage            218
Display              0
Warranty           104
dtype: int64

In [25]:
df[df.isnull().any(axis=1)]

Unnamed: 0,Product Title,Product Price,Product Rating,Product Review,Product Feature,Brand,Processor,RAM,OS,Storage,Display,Warranty
0,Primebook Wifi MT8183 - (4 GB/64 GB EMMC Stora...,"₹8,990",4.2,773 Ratings & 207 Reviews,MediaTek MT8183 Processor4 GB LPDDR4 RAMAndroi...,Primebook,MediaTek MT8183 Processor,4 GB LPDDR4 RAM,Android Operating System,,29.46 cm (11.6 Inch) Display,1 Year Pick and Drop Warranty
6,Lenovo Lenovo V15 Celeron Dual Core 4th Gen - ...,"₹19,990",4.0,"1,152 Ratings & 90 Reviews",Intel Celeron Dual Core Processor (4th Gen)8 G...,Lenovo,Intel Celeron Dual Core Processor,(4th Gen)8 GB DDR4 RAM,64 bit Windows 11 Operating System,256 GB SSD,39.62 cm (15.6 inch) Display,
7,Primebook S Wifi MT8183 - (4 GB/128 GB EMMC St...,"₹10,990",4.2,324 Ratings & 97 Reviews,MediaTek MT8183 Processor4 GB LPDDR4 RAMAndroi...,Primebook,MediaTek MT8183 Processor,4 GB LPDDR4 RAM,Android Operating System,,29.46 cm (11.6 Inch) Display,1 Year Pick and Drop Warranty
10,Primebook 4G Android Based MediaTek MT8788 - (...,"₹12,490",4.1,"1,176 Ratings & 350 Reviews",MediaTek MT8788 Processor4 GB LPDDR4 RAMPrime ...,Primebook,MediaTek MT8788 Processor,4 GB LPDDR4 RAM,,,29.46 cm (11.6 Inch) Display,1 Year Pick and Drop Warranty
11,Primebook 4G Android Based MediaTek MT8788 - (...,"₹11,990",4.2,"2,180 Ratings & 690 Reviews",MediaTek MT8788 Processor4 GB LPDDR4 RAMAndroi...,Primebook,MediaTek MT8788 Processor,4 GB LPDDR4 RAM,Android Operating System,,29.46 cm (11.6 Inch) Display,1 Year Pick and Drop Warranty
...,...,...,...,...,...,...,...,...,...,...,...,...
969,MICROSOFT Surface Laptop 5 Core i5 12th Gen 12...,"₹1,07,999",,,Intel Core i5 Processor (12th Gen)8 GB LPDDR5 ...,MICROSOFT,Intel Core i5 Processor,(12th Gen)8 GB LPDDR5 RAM,Windows 11 Operating System,256 GB SSD,34.29 cm (13.5 Inch) Touchscreen Display,1 Year Carry-in Warranty
971,Primebook 4G Android Based MediaTek MT8788 - (...,"₹11,990",4.2,"2,180 Ratings & 690 Reviews",MediaTek MT8788 Processor4 GB LPDDR4 RAMAndroi...,Primebook,MediaTek MT8788 Processor,4 GB LPDDR4 RAM,Android Operating System,,29.46 cm (11.6 Inch) Display,1 Year Pick and Drop Warranty
973,HP Victus Core i7 13th Gen 13700H - (16 GB/512...,"₹1,19,500",,,Intel Core i7 Processor (13th Gen)16 GB DDR4 R...,HP,Intel Core i7 Processor,(13th Gen)16 GB DDR4 RAM,Windows 11 Operating System,512 GB SSD,39.62 cm (15.6 Inch) Display,1 Year Onsite Warranty
977,Primebook 4G enabled Android based MediaTek MT...,"₹15,606",4.1,"1,176 Ratings & 350 Reviews",MediaTek MT8788 Processor4 GB LPDDR4 RAMAndroi...,Primebook,MediaTek MT8788 Processor,4 GB LPDDR4 RAM,Android Operating System,,27.94 cm (11 Inch) Display,1 Year Onsite Warranty


In [35]:
df[df['Storage'].isnull()]

Unnamed: 0,Product Title,Product Price,Product Rating,Product Review,Product Feature,Brand,Processor,RAM,OS,Storage,Display,Warranty
0,Primebook Wifi MT8183 - (4 GB/64 GB EMMC Stora...,"₹8,990",4.2,773 Ratings & 207 Reviews,MediaTek MT8183 Processor4 GB LPDDR4 RAMAndroi...,Primebook,MediaTek MT8183 Processor,4 GB LPDDR4 RAM,Android Operating System,,29.46 cm (11.6 Inch) Display,1 Year Pick and Drop Warranty
7,Primebook S Wifi MT8183 - (4 GB/128 GB EMMC St...,"₹10,990",4.2,324 Ratings & 97 Reviews,MediaTek MT8183 Processor4 GB LPDDR4 RAMAndroi...,Primebook,MediaTek MT8183 Processor,4 GB LPDDR4 RAM,Android Operating System,,29.46 cm (11.6 Inch) Display,1 Year Pick and Drop Warranty
10,Primebook 4G Android Based MediaTek MT8788 - (...,"₹12,490",4.1,"1,176 Ratings & 350 Reviews",MediaTek MT8788 Processor4 GB LPDDR4 RAMPrime ...,Primebook,MediaTek MT8788 Processor,4 GB LPDDR4 RAM,,,29.46 cm (11.6 Inch) Display,1 Year Pick and Drop Warranty
11,Primebook 4G Android Based MediaTek MT8788 - (...,"₹11,990",4.2,"2,180 Ratings & 690 Reviews",MediaTek MT8788 Processor4 GB LPDDR4 RAMAndroi...,Primebook,MediaTek MT8788 Processor,4 GB LPDDR4 RAM,Android Operating System,,29.46 cm (11.6 Inch) Display,1 Year Pick and Drop Warranty
17,Primebook 4G enabled Android based MediaTek MT...,"₹15,606",4.1,"1,176 Ratings & 350 Reviews",MediaTek MT8788 Processor4 GB LPDDR4 RAMAndroi...,Primebook,MediaTek MT8788 Processor,4 GB LPDDR4 RAM,Android Operating System,,27.94 cm (11 Inch) Display,1 Year Onsite Warranty
...,...,...,...,...,...,...,...,...,...,...,...,...
960,Primebook Wifi MT8183 - (4 GB/64 GB EMMC Stora...,"₹8,990",4.2,773 Ratings & 207 Reviews,MediaTek MT8183 Processor4 GB LPDDR4 RAMAndroi...,Primebook,MediaTek MT8183 Processor,4 GB LPDDR4 RAM,Android Operating System,,29.46 cm (11.6 Inch) Display,1 Year Pick and Drop Warranty
966,Primebook S Wifi MT8183 - (4 GB/128 GB EMMC St...,"₹10,990",4.2,324 Ratings & 97 Reviews,MediaTek MT8183 Processor4 GB LPDDR4 RAMAndroi...,Primebook,MediaTek MT8183 Processor,4 GB LPDDR4 RAM,Android Operating System,,29.46 cm (11.6 Inch) Display,1 Year Pick and Drop Warranty
967,Primebook 4G Android Based MediaTek MT8788 - (...,"₹12,490",4.1,"1,176 Ratings & 350 Reviews",MediaTek MT8788 Processor4 GB LPDDR4 RAMPrime ...,Primebook,MediaTek MT8788 Processor,4 GB LPDDR4 RAM,,,29.46 cm (11.6 Inch) Display,1 Year Pick and Drop Warranty
971,Primebook 4G Android Based MediaTek MT8788 - (...,"₹11,990",4.2,"2,180 Ratings & 690 Reviews",MediaTek MT8788 Processor4 GB LPDDR4 RAMAndroi...,Primebook,MediaTek MT8788 Processor,4 GB LPDDR4 RAM,Android Operating System,,29.46 cm (11.6 Inch) Display,1 Year Pick and Drop Warranty


In [36]:
df.loc[977, "Product Feature"]

'MediaTek MT8788 Processor4 GB LPDDR4 RAMAndroid Operating System27.94 cm (11 Inch) Display1 Year Onsite Warranty'

In [46]:
df['Product Price'] = df['Product Price'].str.replace(',', '').str.replace('₹', '')

In [47]:
df.head()

Unnamed: 0,Product Title,Product Price,Product Rating,Product Review,Product Feature,Brand,Processor,RAM,OS,Storage,Display,Warranty
0,Primebook Wifi MT8183 - (4 GB/64 GB EMMC Stora...,8990,4.2,773 Ratings & 207 Reviews,MediaTek MT8183 Processor4 GB LPDDR4 RAMAndroi...,Primebook,MediaTek MT8183 Processor,4 GB LPDDR4 RAM,Android Operating System,,29.46 cm (11.6 Inch) Display,1 Year Pick and Drop Warranty
1,ASUS TUF Gaming F15 - AI Powered Gaming Core i...,51990,4.3,"1,768 Ratings & 177 Reviews",Intel Core i5 Processor (11th Gen)8 GB DDR4 RA...,ASUS,Intel Core i5 Processor,(11th Gen)8 GB DDR4 RAM,Windows 11 Operating System,512 GB SSD,39.62 cm (15.6 Inch) Display,1 Year Carry-in Warranty
2,HP 2023 Athlon Dual Core 3050U - (8 GB/512 GB ...,26990,4.1,"2,776 Ratings & 227 Reviews",AMD Athlon Dual Core Processor8 GB DDR4 RAMWin...,HP,AMD Athlon Dual Core Processor,8 GB DDR4 RAM,Windows 11 Operating System,512 GB SSD,39.62 cm (15.6 Inch) Display,1 Year Onsite Warranty
3,APPLE 2022 MacBook AIR M2 - (8 GB/256 GB SSD/M...,87990,4.7,"3,207 Ratings & 252 Reviews",Apple M2 Processor8 GB Unified Memory RAMMac O...,APPLE,Apple M2 Processor,8 GB Unified Memory RAM,Mac OS Operating System,256 GB SSD,34.54 cm (13.6 Inch) Display,1 Year Limited Warranty
4,Infinix INBook Y1 Plus Intel Core i3 10th Gen ...,23990,4.2,"3,042 Ratings & 387 Reviews",Intel Core i3 Processor (10th Gen)8 GB LPDDR4X...,Infinix,Intel Core i3 Processor,(10th Gen)8 GB LPDDR4X RAM,64 bit Windows 11 Operating System,512 GB SSD,39.62 cm (15.6 inch) Display,1 Year Onsite Warranty


In [53]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 984 entries, 0 to 983
Data columns (total 12 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Product Title    984 non-null    object 
 1   Product Price    983 non-null    object 
 2   Product Rating   865 non-null    float64
 3   Product Review   865 non-null    object 
 4   Product Feature  984 non-null    object 
 5   Brand            984 non-null    object 
 6   Processor        983 non-null    object 
 7   RAM              984 non-null    object 
 8   OS               937 non-null    object 
 9   Storage          766 non-null    object 
 10  Display          984 non-null    object 
 11  Warranty         880 non-null    object 
dtypes: float64(1), object(11)
memory usage: 92.4+ KB


In [55]:
df['Product Price'] = df['Product Price'].astype('float')

In [74]:
df.to_csv("data/laptop_clean_data.csv", index=False)