In [2]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [3]:
df = pd.read_csv('./smartphones.csv')

In [4]:
df.shape

(1020, 11)

In [5]:
df.head()

Unnamed: 0,model,price,rating,sim,processor,ram,battery,display,camera,card,os
0,OnePlus 11 5G,"₹54,999",89.0,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC","Snapdragon 8 Gen2, Octa Core, 3.2 GHz Processor","12 GB RAM, 256 GB inbuilt",5000 mAh Battery with 100W Fast Charging,"6.7 inches, 1440 x 3216 px, 120 Hz Display wit...",50 MP + 48 MP + 32 MP Triple Rear & 16 MP Fron...,Memory Card Not Supported,Android v13
1,OnePlus Nord CE 2 Lite 5G,"₹19,989",81.0,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi","Snapdragon 695, Octa Core, 2.2 GHz Processor","6 GB RAM, 128 GB inbuilt",5000 mAh Battery with 33W Fast Charging,"6.59 inches, 1080 x 2412 px, 120 Hz Display wi...",64 MP + 2 MP + 2 MP Triple Rear & 16 MP Front ...,"Memory Card (Hybrid), upto 1 TB",Android v12
2,Samsung Galaxy A14 5G,"₹16,499",75.0,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi","Exynos 1330, Octa Core, 2.4 GHz Processor","4 GB RAM, 64 GB inbuilt",5000 mAh Battery with 15W Fast Charging,"6.6 inches, 1080 x 2408 px, 90 Hz Display with...",50 MP + 2 MP + 2 MP Triple Rear & 13 MP Front ...,"Memory Card Supported, upto 1 TB",Android v13
3,Motorola Moto G62 5G,"₹14,999",81.0,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi","Snapdragon 695, Octa Core, 2.2 GHz Processor","6 GB RAM, 128 GB inbuilt",5000 mAh Battery with Fast Charging,"6.55 inches, 1080 x 2400 px, 120 Hz Display wi...",50 MP + 8 MP + 2 MP Triple Rear & 16 MP Front ...,"Memory Card (Hybrid), upto 1 TB",Android v12
4,Realme 10 Pro Plus,"₹24,999",82.0,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi","Dimensity 1080, Octa Core, 2.6 GHz Processor","6 GB RAM, 128 GB inbuilt",5000 mAh Battery with 67W Fast Charging,"6.7 inches, 1080 x 2412 px, 120 Hz Display wit...",108 MP + 8 MP + 2 MP Triple Rear & 16 MP Front...,Memory Card Not Supported,Android v13


#### Manual assessment observations (copy in excel and go through the data):

All the issues fall under 4 categories:
- Completeness
- Validity
- Consistency
- Accuracy

Quality issues:
- Sim column has lots of information in a single column
- Same with processor
- Price has currency symbol which is not needed, it also has , in the price which can be removed
- Battery column - battery capacity and charging type/speed are in a single column
- Rating column has missing values
- Model column has brand name as well as model name in one single column. Some brands are written in uppercase, some in lowercase
- Processor column has incorrect values, processor name is missing
- Ram column, ram value is missing and only storage space is mentioned
- OS column has FM information
- Missing values: Rating-141, OS-17, Card-7, Camera-1 

Tidiness issues:
- Sim column can be split into multiple columns, whether it has 5G, IR blaster, NFC
- Processor column can be split, processor type, cores, frequency
- Battery column can be split, battery capacity, charging speed
- Display column can be split, size and resolution, frequency
- Camera 
- Memory card - split - supported or not, if yes, then capacity

In [11]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1020 entries, 0 to 1019
Data columns (total 11 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   model      1020 non-null   object 
 1   price      1020 non-null   object 
 2   rating     879 non-null    float64
 3   sim        1020 non-null   object 
 4   processor  1020 non-null   object 
 5   ram        1020 non-null   object 
 6   battery    1020 non-null   object 
 7   display    1020 non-null   object 
 8   camera     1019 non-null   object 
 9   card       1013 non-null   object 
 10  os         1003 non-null   object 
dtypes: float64(1), object(10)
memory usage: 87.8+ KB


In [12]:
df.isnull().sum()

model          0
price          0
rating       141
sim            0
processor      0
ram            0
battery        0
display        0
camera         1
card           7
os            17
dtype: int64

In [4]:
#Clean Price column and convert it to int
df['price'] = df['price'].str.replace('₹','').str.replace(',', '').astype(int)

In [20]:
#Find if there are any duplicates, if yes, we can drop those records
df.duplicated().sum()

0

In [22]:
df['sim'].value_counts()

sim
Dual Sim, 3G, 4G, VoLTE, Wi-Fi                               324
Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC                      268
Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi                           155
Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, IR Blaster                54
Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC, IR Blaster           52
Dual Sim, 3G, 4G, VoLTE, Wi-Fi, NFC                           46
Dual Sim, 3G, 4G, VoLTE, Wi-Fi, IR Blaster                    46
Dual Sim                                                      13
Dual Sim, 3G, 4G, Wi-Fi                                        9
Dual Sim, 3G, 4G, 5G, VoLTE, Vo5G, Wi-Fi, NFC                  7
Single Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC                      7
Dual Sim, 3G, 4G, VoLTE, Wi-Fi, NFC, IR Blaster                5
Dual Sim, 3G, 4G, VoLTE                                        5
Single Sim, 3G, 4G, VoLTE, Wi-Fi, NFC                          4
Single Sim                                                     4
Single Sim, 3G, 4G, V

In [5]:
# Clean and treat SIM column

#find records where there is no dual or single sim mentioned
df['sim'].count() - df[df['sim'].str.contains('Dual') | df['sim'].str.contains('Single')]['sim'].count()

#As there is just a single record, we can either drop it or replace it with single or dual sim

df['is5g'] = np.where(df['sim'].str.contains('5G'),1,0)
df['sim_count'] = np.where(df['sim'].str.contains('Dual'), 2,1)
df['has_ir_blaster'] = np.where(df['sim'].str.contains('IR Blaster'),1,0)
df['has_nfc'] = np.where(df['sim'].str.contains('NFC'),1,0)
df.drop('sim', axis=1, inplace=True)

In [64]:
df.head()

Unnamed: 0,model,price,rating,processor,ram,battery,display,camera,card,os,is5g,sim_count,has_ir_blaster,has_nfc
0,OnePlus 11 5G,54999,89.0,"Snapdragon 8 Gen2, Octa Core, 3.2 GHz Processor","12 GB RAM, 256 GB inbuilt",5000 mAh Battery with 100W Fast Charging,"6.7 inches, 1440 x 3216 px, 120 Hz Display wit...",50 MP + 48 MP + 32 MP Triple Rear & 16 MP Fron...,Memory Card Not Supported,Android v13,1,2,0,1
1,OnePlus Nord CE 2 Lite 5G,19989,81.0,"Snapdragon 695, Octa Core, 2.2 GHz Processor","6 GB RAM, 128 GB inbuilt",5000 mAh Battery with 33W Fast Charging,"6.59 inches, 1080 x 2412 px, 120 Hz Display wi...",64 MP + 2 MP + 2 MP Triple Rear & 16 MP Front ...,"Memory Card (Hybrid), upto 1 TB",Android v12,1,2,0,0
2,Samsung Galaxy A14 5G,16499,75.0,"Exynos 1330, Octa Core, 2.4 GHz Processor","4 GB RAM, 64 GB inbuilt",5000 mAh Battery with 15W Fast Charging,"6.6 inches, 1080 x 2408 px, 90 Hz Display with...",50 MP + 2 MP + 2 MP Triple Rear & 13 MP Front ...,"Memory Card Supported, upto 1 TB",Android v13,1,2,0,0
3,Motorola Moto G62 5G,14999,81.0,"Snapdragon 695, Octa Core, 2.2 GHz Processor","6 GB RAM, 128 GB inbuilt",5000 mAh Battery with Fast Charging,"6.55 inches, 1080 x 2400 px, 120 Hz Display wi...",50 MP + 8 MP + 2 MP Triple Rear & 16 MP Front ...,"Memory Card (Hybrid), upto 1 TB",Android v12,1,2,0,0
4,Realme 10 Pro Plus,24999,82.0,"Dimensity 1080, Octa Core, 2.6 GHz Processor","6 GB RAM, 128 GB inbuilt",5000 mAh Battery with 67W Fast Charging,"6.7 inches, 1080 x 2412 px, 120 Hz Display wit...",108 MP + 8 MP + 2 MP Triple Rear & 16 MP Front...,Memory Card Not Supported,Android v13,1,2,0,0


In [6]:
# Clean and treat invalid values
# On observing the data in excel, I could see the data is incorrect for phones less than INR 3500. 
# As we are studying smartphones, usually smartphones are costlier than INR 3500, we can ignore these records

df = df[df['price'] > 3500]

In [77]:
df.shape

(990, 14)

In [78]:
df.head()

Unnamed: 0,model,price,rating,processor,ram,battery,display,camera,card,os,is5g,sim_count,has_ir_blaster,has_nfc
0,OnePlus 11 5G,54999,89.0,"Snapdragon 8 Gen2, Octa Core, 3.2 GHz Processor","12 GB RAM, 256 GB inbuilt",5000 mAh Battery with 100W Fast Charging,"6.7 inches, 1440 x 3216 px, 120 Hz Display wit...",50 MP + 48 MP + 32 MP Triple Rear & 16 MP Fron...,Memory Card Not Supported,Android v13,1,2,0,1
1,OnePlus Nord CE 2 Lite 5G,19989,81.0,"Snapdragon 695, Octa Core, 2.2 GHz Processor","6 GB RAM, 128 GB inbuilt",5000 mAh Battery with 33W Fast Charging,"6.59 inches, 1080 x 2412 px, 120 Hz Display wi...",64 MP + 2 MP + 2 MP Triple Rear & 16 MP Front ...,"Memory Card (Hybrid), upto 1 TB",Android v12,1,2,0,0
2,Samsung Galaxy A14 5G,16499,75.0,"Exynos 1330, Octa Core, 2.4 GHz Processor","4 GB RAM, 64 GB inbuilt",5000 mAh Battery with 15W Fast Charging,"6.6 inches, 1080 x 2408 px, 90 Hz Display with...",50 MP + 2 MP + 2 MP Triple Rear & 13 MP Front ...,"Memory Card Supported, upto 1 TB",Android v13,1,2,0,0
3,Motorola Moto G62 5G,14999,81.0,"Snapdragon 695, Octa Core, 2.2 GHz Processor","6 GB RAM, 128 GB inbuilt",5000 mAh Battery with Fast Charging,"6.55 inches, 1080 x 2400 px, 120 Hz Display wi...",50 MP + 8 MP + 2 MP Triple Rear & 16 MP Front ...,"Memory Card (Hybrid), upto 1 TB",Android v12,1,2,0,0
4,Realme 10 Pro Plus,24999,82.0,"Dimensity 1080, Octa Core, 2.6 GHz Processor","6 GB RAM, 128 GB inbuilt",5000 mAh Battery with 67W Fast Charging,"6.7 inches, 1080 x 2412 px, 120 Hz Display wit...",108 MP + 8 MP + 2 MP Triple Rear & 16 MP Front...,Memory Card Not Supported,Android v13,1,2,0,0


In [7]:
# Treat processor column

# On looking at the data, few records are feature phones and not smart phones and the data of different feature is in different columns
str_find = ['RAM','mAh','inches', 'MP', 'Wifi']
join = '|'.join(str_find)
df[df['processor'].str.contains(join)]

#these rows can be dropped
df.drop(df[df['processor'].str.contains(join)].index, axis=0, inplace=True)

In [8]:
df.iloc[:,5:]

Unnamed: 0,battery,display,camera,card,os,is5g,sim_count,has_ir_blaster,has_nfc
0,5000 mAh Battery with 100W Fast Charging,"6.7 inches, 1440 x 3216 px, 120 Hz Display wit...",50 MP + 48 MP + 32 MP Triple Rear & 16 MP Fron...,Memory Card Not Supported,Android v13,1,2,0,1
1,5000 mAh Battery with 33W Fast Charging,"6.59 inches, 1080 x 2412 px, 120 Hz Display wi...",64 MP + 2 MP + 2 MP Triple Rear & 16 MP Front ...,"Memory Card (Hybrid), upto 1 TB",Android v12,1,2,0,0
2,5000 mAh Battery with 15W Fast Charging,"6.6 inches, 1080 x 2408 px, 90 Hz Display with...",50 MP + 2 MP + 2 MP Triple Rear & 13 MP Front ...,"Memory Card Supported, upto 1 TB",Android v13,1,2,0,0
3,5000 mAh Battery with Fast Charging,"6.55 inches, 1080 x 2400 px, 120 Hz Display wi...",50 MP + 8 MP + 2 MP Triple Rear & 16 MP Front ...,"Memory Card (Hybrid), upto 1 TB",Android v12,1,2,0,0
4,5000 mAh Battery with 67W Fast Charging,"6.7 inches, 1080 x 2412 px, 120 Hz Display wit...",108 MP + 8 MP + 2 MP Triple Rear & 16 MP Front...,Memory Card Not Supported,Android v13,1,2,0,0
...,...,...,...,...,...,...,...,...,...
1015,5000 mAh Battery with 68.2W Fast Charging,"6.67 inches, 1080 x 2460 px, 120 Hz Display wi...",64 MP + 8 MP + 2 MP Triple Rear & 16 MP Front ...,Android v12,No FM Radio,1,2,0,0
1016,5000 mAh Battery with 22.5W Fast Charging,"6.5 inches, 720 x 1600 px Display with Water D...",48 MP + 2 MP + Depth Sensor Triple Rear & 8 MP...,"Memory Card Supported, upto 1 TB",Android v11,1,2,0,0
1017,5080 mAh Battery with 67W Fast Charging,"6.6 inches, 1080 x 2460 px, 144 Hz Display wit...",64 MP + 8 MP + 2 MP Triple Rear & 16 MP Front ...,Memory Card Not Supported,Android v12,1,2,1,1
1018,5000 mAh Battery with Fast Charging,"6.8 inches, 1080 x 2400 px Display with Punch ...",108 MP + 8 MP + 2 MP Triple Rear & 32 MP Front...,"Memory Card Supported, upto 1 TB",Android v12,1,2,0,1


In [8]:
# Treat battery column

# On looking at the data, for few apple devices battery column and columns after that need to be shifted by 1 column
temp_df = df[df['battery'].str.contains('inches')]
x = temp_df.iloc[:,5:].shift(1, axis=1)

df.loc[temp_df.index, temp_df.columns[5:]] = x


 'No FM Radio' 'No FM Radio' 'No FM Radio' 'No FM Radio' 'No FM Radio'
 'No FM Radio']' has dtype incompatible with int32, please explicitly cast to a compatible dtype first.
  df.loc[temp_df.index, temp_df.columns[5:]] = x


In [119]:
df.loc[111]

model                                               Apple iPhone 12
price                                                         51999
rating                                                         74.0
processor                  Bionic A14, Hexa Core, 3.1 GHz Processor
ram                                         4 GB RAM, 64 GB inbuilt
battery                                                        None
display           6.1 inches, 1170 x 2532 px Display with Large ...
camera                 12 MP + 12 MP Dual Rear & 12 MP Front Camera
card                                      Memory Card Not Supported
os                                                          iOS v14
is5g                                                    No FM Radio
sim_count                                                         1
has_ir_blaster                                                    2
has_nfc                                                           0
Name: 111, dtype: object

In [11]:
# Treat camera column

df.head()

Unnamed: 0,model,price,rating,processor,ram,battery,display,camera,card,os,is5g,sim_count,has_ir_blaster,has_nfc
0,OnePlus 11 5G,54999,89.0,"Snapdragon 8 Gen2, Octa Core, 3.2 GHz Processor","12 GB RAM, 256 GB inbuilt",5000 mAh Battery with 100W Fast Charging,"6.7 inches, 1440 x 3216 px, 120 Hz Display wit...",50 MP + 48 MP + 32 MP Triple Rear & 16 MP Fron...,Memory Card Not Supported,Android v13,1,2,0,1
1,OnePlus Nord CE 2 Lite 5G,19989,81.0,"Snapdragon 695, Octa Core, 2.2 GHz Processor","6 GB RAM, 128 GB inbuilt",5000 mAh Battery with 33W Fast Charging,"6.59 inches, 1080 x 2412 px, 120 Hz Display wi...",64 MP + 2 MP + 2 MP Triple Rear & 16 MP Front ...,"Memory Card (Hybrid), upto 1 TB",Android v12,1,2,0,0
2,Samsung Galaxy A14 5G,16499,75.0,"Exynos 1330, Octa Core, 2.4 GHz Processor","4 GB RAM, 64 GB inbuilt",5000 mAh Battery with 15W Fast Charging,"6.6 inches, 1080 x 2408 px, 90 Hz Display with...",50 MP + 2 MP + 2 MP Triple Rear & 13 MP Front ...,"Memory Card Supported, upto 1 TB",Android v13,1,2,0,0
3,Motorola Moto G62 5G,14999,81.0,"Snapdragon 695, Octa Core, 2.2 GHz Processor","6 GB RAM, 128 GB inbuilt",5000 mAh Battery with Fast Charging,"6.55 inches, 1080 x 2400 px, 120 Hz Display wi...",50 MP + 8 MP + 2 MP Triple Rear & 16 MP Front ...,"Memory Card (Hybrid), upto 1 TB",Android v12,1,2,0,0
4,Realme 10 Pro Plus,24999,82.0,"Dimensity 1080, Octa Core, 2.6 GHz Processor","6 GB RAM, 128 GB inbuilt",5000 mAh Battery with 67W Fast Charging,"6.7 inches, 1080 x 2412 px, 120 Hz Display wit...",108 MP + 8 MP + 2 MP Triple Rear & 16 MP Front...,Memory Card Not Supported,Android v13,1,2,0,0


In [9]:
temp_camera = df[df['camera'].str.contains('Fold')]['card']

In [10]:
temp_camera.values

array(['64\u2009MP + 10\u2009MP + 8\u2009MP Triple Rear & 32\u2009MP Front Camera',
       '50\u2009MP Quad Rear & 16\u2009MP Front Camera',
       '50\u2009MP + 48\u2009MP + 32\u2009MP Triple Rear & 32\u2009MP + 32\u2009MP Dual Front Camera',
       '50\u2009MP + 13\u2009MP + 8\u2009MP Triple Rear & 20\u2009MP Front Camera',
       '12\u2009MP + 12\u2009MP Dual Rear & 10\u2009MP Front Camera',
       '50\u2009MP + 12\u2009MP + 10\u2009MP Triple Rear & 10\u2009MP + 4\u2009MP Dual Front Camera',
       '64\u2009MP + 16\u2009MP + 8\u2009MP Triple Rear & 32\u2009MP Front Camera',
       '50\u2009MP + 8\u2009MP Dual Rear & 32\u2009MP Front Camera',
       '50\u2009MP + 8\u2009MP Dual Rear & 32\u2009MP Front Camera',
       '50\u2009MP Quad Rear & 16\u2009MP + 16\u2009MP Dual Front Camera',
       '12\u2009MP + 12\u2009MP Dual Rear & 10\u2009MP Front Camera',
       '12\u2009MP + 12\u2009MP + 12\u2009MP Triple Rear & 10\u2009MP + 4\u2009MP Dual Front Camera',
       '50\u2009MP + 16\u2009MP

In [11]:
df.loc[temp_camera.index, 'camera'] = temp_camera.values

In [12]:
df[df['camera'].str.contains('Fold')]['card']

Series([], Name: card, dtype: object)

In [13]:
temp_card = df[df['card'].str.contains('MP')]['card']

In [52]:
temp_card

69      64 MP + 10 MP + 8 MP Triple Rear & 32 MP Front...
98                   50 MP Quad Rear & 16 MP Front Camera
155                                      5 MP Rear Camera
159     50 MP + 48 MP + 32 MP Triple Rear & 32 MP + 32...
236     50 MP + 13 MP + 8 MP Triple Rear & 20 MP Front...
271                                      2 MP Rear Camera
306          12 MP + 12 MP Dual Rear & 10 MP Front Camera
321     50 MP + 12 MP + 10 MP Triple Rear & 10 MP + 4 ...
322     64 MP + 16 MP + 8 MP Triple Rear & 32 MP Front...
365           50 MP + 8 MP Dual Rear & 32 MP Front Camera
392           50 MP + 8 MP Dual Rear & 32 MP Front Camera
482     50 MP Quad Rear & 16 MP + 16 MP Dual Front Camera
504          12 MP + 12 MP Dual Rear & 10 MP Front Camera
569     12 MP + 12 MP + 12 MP Triple Rear & 10 MP + 4 ...
570     64 MP + 13 MP + 12 MP Triple Rear & 32 MP Fron...
613     64 MP + 20 MP + 2 MP Triple Rear & Main Front ...
682     64 MP + 13 MP + 0.3 MP Triple Rear & 10 MP Fro...
703     50 MP 

In [14]:
df.loc[temp_card.index, 'card'] = 'Memory Card Not Supported'

In [63]:
df['card'].value_counts()

card
Memory Card Supported, upto 1 TB       171
Memory Card Not Supported              151
Android v12                            107
Memory Card Supported, upto 512 GB     105
Memory Card (Hybrid), upto 1 TB         91
Memory Card Supported                   89
Memory Card Supported, upto 256 GB      87
Android v13                             46
Android v11                             41
Memory Card (Hybrid)                    30
Memory Card (Hybrid), upto 256 GB       13
Android v10                             11
Memory Card (Hybrid), upto 512 GB       11
Memory Card Supported, upto 128 GB       5
Memory Card Supported, upto 2 TB         5
Memory Card Supported, upto 32 GB        4
Memory Card (Hybrid), upto 128 GB        3
Memory Card (Hybrid), upto 64 GB         3
Android v9.0 (Pie)                       2
Android v12.1                            1
Memory Card Supported, upto 1000 GB      1
iOS v10                                  1
Android v10.0                            1
iOS v1

In [15]:
temp_card = df[~df['card'].str.contains('Memory Card')]

In [65]:
temp_card

Unnamed: 0,model,price,rating,processor,ram,battery,display,camera,card,os,is5g,sim_count,has_ir_blaster,has_nfc
8,Nothing Phone 1,26749,85.0,"Snapdragon 778G Plus, Octa Core, 2.5 GHz Proce...","8 GB RAM, 128 GB inbuilt",4500 mAh Battery with 33W Fast Charging,"6.55 inches, 1080 x 2400 px, 120 Hz Display wi...",50 MP + 50 MP Dual Rear & 16 MP Front Camera,Android v12,No FM Radio,1,2,0,1
9,OnePlus Nord 2T 5G,28999,84.0,"Dimensity 1300, Octa Core, 3 GHz Processor","8 GB RAM, 128 GB inbuilt",4500 mAh Battery with 80W Fast Charging,"6.43 inches, 1080 x 2400 px, 90 Hz Display wit...",50 MP + 8 MP + 2 MP Triple Rear & 32 MP Front ...,Android v12,No FM Radio,1,2,0,1
12,Xiaomi Redmi Note 12 Pro 5G,24762,79.0,"Dimensity 1080, Octa Core, 2.6 GHz Processor","6 GB RAM, 128 GB inbuilt",5000 mAh Battery with 67W Fast Charging,"6.67 inches, 1080 x 2400 px, 120 Hz Display wi...",50 MP + 8 MP + 2 MP Triple Rear & 16 MP Front ...,Android v12,No FM Radio,1,2,1,0
17,OPPO Reno 9 Pro Plus,45999,86.0,"Snapdragon 8+ Gen1, Octa Core, 3.2 GHz Processor","16 GB RAM, 256 GB inbuilt",4700 mAh Battery with 80W Fast Charging,"6.7 inches, 1080 x 2412 px, 120 Hz Display wit...",50 MP + 8 MP + 2 MP Triple Rear & 32 MP Front ...,Android v13,No FM Radio,1,2,0,1
18,OnePlus 10R 5G,32999,86.0,"Dimensity 8100 Max, Octa Core, 2.85 GHz Processor","8 GB RAM, 128 GB inbuilt",5000 mAh Battery with 80W Fast Charging,"6.7 inches, 1080 x 2412 px, 120 Hz Display wit...",50 MP + 8 MP + 2 MP Triple Rear & 16 MP Front ...,Android v12,Bluetooth,1,2,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1009,Xiaomi Civi 3,32990,86.0,"Dimensity 8200, Octa Core, 3.1 GHz Processor","8 GB RAM, 256 GB inbuilt",5000 mAh Battery with 80W Fast Charging,"6.7 inches, 1080 x 2400 px, 120 Hz Display wit...",64 MP + 20 MP + 2 MP Triple Rear & 32 MP + 32 ...,Android v13,No FM Radio,1,2,1,1
1011,Oppo Find X6,69990,89.0,"Snapdragon 8 Gen2, Octa Core, 3.2 GHz Processor","8 GB RAM, 256 GB inbuilt",4700 mAh Battery with 120W Fast Charging,"6.73 inches, 1080 x 2400 px, 120 Hz Display wi...",50 MP + 48 MP + 32 MP Triple Rear & 32 MP Fron...,Android v12,No FM Radio,1,2,0,1
1012,itel A23s,4787,,"Spreadtrum SC9832E, Quad Core, 1.4 GHz Processor","2 GB RAM, 32 GB inbuilt",3020 mAh Battery,"5 inches, 854 x 480 px Display",2 MP Rear Camera,Android v11,No FM Radio,0,2,0,0
1013,Google Pixel 8 Pro,70990,80.0,"Google Tensor 3, Octa Core Processor","12 GB RAM, 256 GB inbuilt",5000 mAh Battery with 67W Fast Charging,"6.73 inches, 1440 x 3120 px, 120 Hz Display wi...",50 MP + 50 MP + 50 MP Triple Rear & 12 MP Fron...,Android v13,No FM Radio,1,2,0,1


In [16]:
df.loc[temp_card.index, 'card'] = 'Memory Card Not Supported'

In [68]:
df['card'].value_counts()

card
Memory Card Not Supported              365
Memory Card Supported, upto 1 TB       171
Memory Card Supported, upto 512 GB     105
Memory Card (Hybrid), upto 1 TB         91
Memory Card Supported                   89
Memory Card Supported, upto 256 GB      87
Memory Card (Hybrid)                    30
Memory Card (Hybrid), upto 256 GB       13
Memory Card (Hybrid), upto 512 GB       11
Memory Card Supported, upto 2 TB         5
Memory Card Supported, upto 128 GB       5
Memory Card Supported, upto 32 GB        4
Memory Card (Hybrid), upto 128 GB        3
Memory Card (Hybrid), upto 64 GB         3
Memory Card Supported, upto 1000 GB      1
Memory Card (Hybrid), upto 2 TB          1
Name: count, dtype: int64

In [69]:
df['os'].value_counts()

os
Android v12                           287
Android v11                           233
No FM Radio                           180
Android v10                            58
Android v13                            45
Bluetooth                              34
Android v9.0 (Pie)                     26
Android v10.0                          22
iOS v16                                15
iOS v15                                12
Android v8.1 (Oreo)                    10
iOS v14                                 6
Memory Card Not Supported               6
Android v11.0                           4
Android v8.0 (Oreo)                     4
iOS v13                                 3
iOS v15.0                               3
EMUI v12                                2
Android v6.0 (Marshmallow)              2
Memory Card (Hybrid), upto 256 GB       2
Harmony v2.0                            2
iOS v14.0                               2
Memory Card (Hybrid), upto 2 TB         2
Android v5.1.1 (Lollipop)      

In [17]:
temp_df = df[df['os'].str.contains('Memory', na=False)]

In [18]:
df.loc[temp_df.index, 'os'] = np.nan

In [19]:
temp_df = df[df['os'] == 'Bluetooth']

In [20]:
df.loc[temp_df.index, 'os'] = np.nan

In [21]:
df['os'].value_counts()

os
Android v12                        287
Android v11                        233
No FM Radio                        180
Android v10                         58
Android v13                         45
Android v9.0 (Pie)                  26
Android v10.0                       22
iOS v16                             15
iOS v15                             12
Android v8.1 (Oreo)                 10
iOS v14                              6
Android v11.0                        4
Android v8.0 (Oreo)                  4
iOS v15.0                            3
iOS v13                              3
iOS v14.0                            2
EMUI v12                             2
Android v6.0 (Marshmallow)           2
Android v5.1.1 (Lollipop)            2
Harmony v2.0                         2
Android v7.1 (Nougat)                1
Android                              1
Browser                              1
HarmonyOS v2.0                       1
Hongmeng OS v4.0                     1
iOS v17               

In [22]:
df[df['os'] == 'Memory']

Unnamed: 0,model,price,rating,processor,ram,battery,display,camera,card,os,is5g,sim_count,has_ir_blaster,has_nfc


In [24]:
#show all rows
pd.set_option("display.max_rows", None)


In [25]:
df['camera'].value_counts()

camera
50 MP + 8 MP + 2 MP Triple Rear & 16 MP Front Camera                   40
64 MP + 8 MP + 2 MP Triple Rear & 16 MP Front Camera                   38
50 MP + 2 MP + 2 MP Triple Rear & 16 MP Front Camera                   34
13 MP + 2 MP Dual Rear & 5 MP Front Camera                             22
12 MP + 12 MP Dual Rear & 12 MP Front Camera                           20
13 MP Rear & 5 MP Front Camera                                         20
50 MP + 2 MP + 2 MP Triple Rear & 8 MP Front Camera                    20
50 MP + 2 MP Dual Rear & 8 MP Front Camera                             19
8 MP Rear & 5 MP Front Camera                                          19
108 MP + 8 MP + 2 MP Triple Rear & 16 MP Front Camera                  18
13 MP + 2 MP Dual Rear & 8 MP Front Camera                             18
64 MP Quad Rear & 32 MP Front Camera                                   18
64 MP + 2 MP + 2 MP Triple Rear & 16 MP Front Camera                   16
50 MP + 2 MP Dual Rear & 5 MP F