# Import necessary Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

# Load the data

In [2]:
# https://www.kaggle.com/datasets/manishkc06/mobile-price-prediction?select=mobile_price_data.csv
data = pd.read_csv(r"C:\Users\athir\data_sets\ML\Assg\mobile_price_data.csv")
data.head()

Unnamed: 0,mobile_name,mobile_price,mobile_color,dual_sim,disp_size,resolution,os,num_cores,mp_speed,int_memory,ram,p_cam,f_cam,network,bluetooth,battery_power,mob_width,mob_height,mob_depth,mob_weight
0,"Realme Narzo 10A (So White, 32 GB)","₹8,999",So White,Dual Sim,16.56 cm (6.52 inch),1600 x 720 Pixels,Android 10,Octa Core,2 GHz,32 GB,3 GB,12MP + 2MP + 2MP,5MP Front Camera,"4G VOLTE, 4G, 3G, 2G",Yes,5000 mAh,75 mm,164.4 mm,8.95 mm,195 g
1,"Realme Narzo 10A (So Blue, 32 GB)","₹8,999",So Blue,Dual Sim,16.56 cm (6.52 inch),1600 x 720 Pixels,Android 10,Octa Core,2 GHz,32 GB,3 GB,12MP + 2MP + 2MP,5MP Front Camera,"4G VOLTE, 4G, 3G, 2G",Yes,5000 mAh,75 mm,164.4 mm,8.95 mm,195 g
2,"Realme Narzo 10A (So Blue, 64 GB)","₹9,999",So Blue,Dual Sim,16.56 cm (6.52 inch),1600 x 720 Pixels,Android 10,Octa Core,2 GHz,64 GB,4 GB,12MP + 2MP + 2MP,5MP Front Camera,"4G VOLTE, 4G, 3G, 2G",Yes,5000 mAh,75 mm,164.4 mm,8.95 mm,195 g
3,"Realme Narzo 10A (So White, 64 GB)","₹9,999",So White,Dual Sim,16.56 cm (6.52 inch),1600 x 720 Pixels,Android 10,Octa Core,2 GHz,64 GB,4 GB,12MP + 2MP + 2MP,5MP Front Camera,"4G VOLTE, 4G, 3G, 2G",Yes,5000 mAh,75 mm,164.4 mm,8.95 mm,195 g
4,"Realme Narzo 10 (That Green, 128 GB)","₹11,999",That Green,Dual Sim,16.51 cm (6.5 inch),1600 x 720 Pixels,Android 10,Octa Core,2 GHz,128 GB,4 GB,48MP + 8MP + 2MP + 2MP,16MP Front Camera,"4G VOLTE, 4G, 3G, 2G",Yes,5000 mAh,75.4 mm,164.4 mm,9 mm,199 g


In [3]:
data.shape

(459, 20)

In [4]:
data.columns

Index(['mobile_name', 'mobile_price', 'mobile_color', 'dual_sim', 'disp_size',
       'resolution', 'os', 'num_cores', 'mp_speed', 'int_memory', 'ram',
       'p_cam', 'f_cam', 'network', 'bluetooth', 'battery_power', 'mob_width',
       'mob_height', 'mob_depth', 'mob_weight'],
      dtype='object')

# data cleaning, data wrangling, data preprocessing

## Missing value treatment

In [5]:
data.isnull().sum()

mobile_name      0
mobile_price     0
mobile_color     0
dual_sim         0
disp_size        0
resolution       0
os               0
num_cores        0
mp_speed         0
int_memory       0
ram              0
p_cam            0
f_cam            0
network          0
bluetooth        0
battery_power    0
mob_width        0
mob_height       0
mob_depth        0
mob_weight       0
dtype: int64

In [6]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 459 entries, 0 to 458
Data columns (total 20 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   mobile_name    459 non-null    object
 1   mobile_price   459 non-null    object
 2   mobile_color   459 non-null    object
 3   dual_sim       459 non-null    object
 4   disp_size      459 non-null    object
 5   resolution     459 non-null    object
 6   os             459 non-null    object
 7   num_cores      459 non-null    object
 8   mp_speed       459 non-null    object
 9   int_memory     459 non-null    object
 10  ram            459 non-null    object
 11  p_cam          459 non-null    object
 12  f_cam          459 non-null    object
 13  network        459 non-null    object
 14  bluetooth      459 non-null    object
 15  battery_power  459 non-null    object
 16  mob_width      459 non-null    object
 17  mob_height     459 non-null    object
 18  mob_depth      459 non-null   

In [7]:
data['bluetooth'].unique()

array(['Yes'], dtype=object)

In [8]:
data = data.drop('bluetooth',axis=1)

In [9]:
data['mp_speed'].unique()

array(['2 GHz', '2.3 GHz', '2.96 GHz', '1.95 GHz', '2.8 GHz', '2.05 GHz',
       '2.2 GHz', '2.1 GHz', '2.84 GHz', '2.5 GHz', '1.8 GHz', '1.4 GHz',
       '1.5 GHz', '1.3 GHz', '1.6 GHz', '2.39 GHz', '2.6 GHz',
       '2.649 GHz'], dtype=object)

In [10]:
data['mp_speed'] = data['mp_speed'].str.replace(' GHz','')
data.head()

Unnamed: 0,mobile_name,mobile_price,mobile_color,dual_sim,disp_size,resolution,os,num_cores,mp_speed,int_memory,ram,p_cam,f_cam,network,battery_power,mob_width,mob_height,mob_depth,mob_weight
0,"Realme Narzo 10A (So White, 32 GB)","₹8,999",So White,Dual Sim,16.56 cm (6.52 inch),1600 x 720 Pixels,Android 10,Octa Core,2,32 GB,3 GB,12MP + 2MP + 2MP,5MP Front Camera,"4G VOLTE, 4G, 3G, 2G",5000 mAh,75 mm,164.4 mm,8.95 mm,195 g
1,"Realme Narzo 10A (So Blue, 32 GB)","₹8,999",So Blue,Dual Sim,16.56 cm (6.52 inch),1600 x 720 Pixels,Android 10,Octa Core,2,32 GB,3 GB,12MP + 2MP + 2MP,5MP Front Camera,"4G VOLTE, 4G, 3G, 2G",5000 mAh,75 mm,164.4 mm,8.95 mm,195 g
2,"Realme Narzo 10A (So Blue, 64 GB)","₹9,999",So Blue,Dual Sim,16.56 cm (6.52 inch),1600 x 720 Pixels,Android 10,Octa Core,2,64 GB,4 GB,12MP + 2MP + 2MP,5MP Front Camera,"4G VOLTE, 4G, 3G, 2G",5000 mAh,75 mm,164.4 mm,8.95 mm,195 g
3,"Realme Narzo 10A (So White, 64 GB)","₹9,999",So White,Dual Sim,16.56 cm (6.52 inch),1600 x 720 Pixels,Android 10,Octa Core,2,64 GB,4 GB,12MP + 2MP + 2MP,5MP Front Camera,"4G VOLTE, 4G, 3G, 2G",5000 mAh,75 mm,164.4 mm,8.95 mm,195 g
4,"Realme Narzo 10 (That Green, 128 GB)","₹11,999",That Green,Dual Sim,16.51 cm (6.5 inch),1600 x 720 Pixels,Android 10,Octa Core,2,128 GB,4 GB,48MP + 8MP + 2MP + 2MP,16MP Front Camera,"4G VOLTE, 4G, 3G, 2G",5000 mAh,75.4 mm,164.4 mm,9 mm,199 g


In [11]:
data['mobile_price'] = data['mobile_price'].replace(['₹',','],'',regex=True)
data.head(2)

Unnamed: 0,mobile_name,mobile_price,mobile_color,dual_sim,disp_size,resolution,os,num_cores,mp_speed,int_memory,ram,p_cam,f_cam,network,battery_power,mob_width,mob_height,mob_depth,mob_weight
0,"Realme Narzo 10A (So White, 32 GB)",8999,So White,Dual Sim,16.56 cm (6.52 inch),1600 x 720 Pixels,Android 10,Octa Core,2,32 GB,3 GB,12MP + 2MP + 2MP,5MP Front Camera,"4G VOLTE, 4G, 3G, 2G",5000 mAh,75 mm,164.4 mm,8.95 mm,195 g
1,"Realme Narzo 10A (So Blue, 32 GB)",8999,So Blue,Dual Sim,16.56 cm (6.52 inch),1600 x 720 Pixels,Android 10,Octa Core,2,32 GB,3 GB,12MP + 2MP + 2MP,5MP Front Camera,"4G VOLTE, 4G, 3G, 2G",5000 mAh,75 mm,164.4 mm,8.95 mm,195 g


In [12]:
data['dual_sim'].value_counts()

Dual Sim      458
Single Sim      1
Name: dual_sim, dtype: int64

In [13]:
data = data.drop('dual_sim',axis=1)
data.head(3)

Unnamed: 0,mobile_name,mobile_price,mobile_color,disp_size,resolution,os,num_cores,mp_speed,int_memory,ram,p_cam,f_cam,network,battery_power,mob_width,mob_height,mob_depth,mob_weight
0,"Realme Narzo 10A (So White, 32 GB)",8999,So White,16.56 cm (6.52 inch),1600 x 720 Pixels,Android 10,Octa Core,2,32 GB,3 GB,12MP + 2MP + 2MP,5MP Front Camera,"4G VOLTE, 4G, 3G, 2G",5000 mAh,75 mm,164.4 mm,8.95 mm,195 g
1,"Realme Narzo 10A (So Blue, 32 GB)",8999,So Blue,16.56 cm (6.52 inch),1600 x 720 Pixels,Android 10,Octa Core,2,32 GB,3 GB,12MP + 2MP + 2MP,5MP Front Camera,"4G VOLTE, 4G, 3G, 2G",5000 mAh,75 mm,164.4 mm,8.95 mm,195 g
2,"Realme Narzo 10A (So Blue, 64 GB)",9999,So Blue,16.56 cm (6.52 inch),1600 x 720 Pixels,Android 10,Octa Core,2,64 GB,4 GB,12MP + 2MP + 2MP,5MP Front Camera,"4G VOLTE, 4G, 3G, 2G",5000 mAh,75 mm,164.4 mm,8.95 mm,195 g


In [14]:
def split_word(text):
    return text.split()[0]

In [15]:
data['mobile_name']=data['mobile_name'].apply(lambda x: split_word(x))
data.head()

Unnamed: 0,mobile_name,mobile_price,mobile_color,disp_size,resolution,os,num_cores,mp_speed,int_memory,ram,p_cam,f_cam,network,battery_power,mob_width,mob_height,mob_depth,mob_weight
0,Realme,8999,So White,16.56 cm (6.52 inch),1600 x 720 Pixels,Android 10,Octa Core,2,32 GB,3 GB,12MP + 2MP + 2MP,5MP Front Camera,"4G VOLTE, 4G, 3G, 2G",5000 mAh,75 mm,164.4 mm,8.95 mm,195 g
1,Realme,8999,So Blue,16.56 cm (6.52 inch),1600 x 720 Pixels,Android 10,Octa Core,2,32 GB,3 GB,12MP + 2MP + 2MP,5MP Front Camera,"4G VOLTE, 4G, 3G, 2G",5000 mAh,75 mm,164.4 mm,8.95 mm,195 g
2,Realme,9999,So Blue,16.56 cm (6.52 inch),1600 x 720 Pixels,Android 10,Octa Core,2,64 GB,4 GB,12MP + 2MP + 2MP,5MP Front Camera,"4G VOLTE, 4G, 3G, 2G",5000 mAh,75 mm,164.4 mm,8.95 mm,195 g
3,Realme,9999,So White,16.56 cm (6.52 inch),1600 x 720 Pixels,Android 10,Octa Core,2,64 GB,4 GB,12MP + 2MP + 2MP,5MP Front Camera,"4G VOLTE, 4G, 3G, 2G",5000 mAh,75 mm,164.4 mm,8.95 mm,195 g
4,Realme,11999,That Green,16.51 cm (6.5 inch),1600 x 720 Pixels,Android 10,Octa Core,2,128 GB,4 GB,48MP + 8MP + 2MP + 2MP,16MP Front Camera,"4G VOLTE, 4G, 3G, 2G",5000 mAh,75.4 mm,164.4 mm,9 mm,199 g


In [16]:
data['mobile_name'].unique()

array(['Realme', 'Redmi', 'Mi', 'MI3', 'Vivo', 'Samsung', 'Infinix',
       'POCO', 'OPPO'], dtype=object)

In [17]:
data = data.drop('mobile_color',axis=1)
data.head()

Unnamed: 0,mobile_name,mobile_price,disp_size,resolution,os,num_cores,mp_speed,int_memory,ram,p_cam,f_cam,network,battery_power,mob_width,mob_height,mob_depth,mob_weight
0,Realme,8999,16.56 cm (6.52 inch),1600 x 720 Pixels,Android 10,Octa Core,2,32 GB,3 GB,12MP + 2MP + 2MP,5MP Front Camera,"4G VOLTE, 4G, 3G, 2G",5000 mAh,75 mm,164.4 mm,8.95 mm,195 g
1,Realme,8999,16.56 cm (6.52 inch),1600 x 720 Pixels,Android 10,Octa Core,2,32 GB,3 GB,12MP + 2MP + 2MP,5MP Front Camera,"4G VOLTE, 4G, 3G, 2G",5000 mAh,75 mm,164.4 mm,8.95 mm,195 g
2,Realme,9999,16.56 cm (6.52 inch),1600 x 720 Pixels,Android 10,Octa Core,2,64 GB,4 GB,12MP + 2MP + 2MP,5MP Front Camera,"4G VOLTE, 4G, 3G, 2G",5000 mAh,75 mm,164.4 mm,8.95 mm,195 g
3,Realme,9999,16.56 cm (6.52 inch),1600 x 720 Pixels,Android 10,Octa Core,2,64 GB,4 GB,12MP + 2MP + 2MP,5MP Front Camera,"4G VOLTE, 4G, 3G, 2G",5000 mAh,75 mm,164.4 mm,8.95 mm,195 g
4,Realme,11999,16.51 cm (6.5 inch),1600 x 720 Pixels,Android 10,Octa Core,2,128 GB,4 GB,48MP + 8MP + 2MP + 2MP,16MP Front Camera,"4G VOLTE, 4G, 3G, 2G",5000 mAh,75.4 mm,164.4 mm,9 mm,199 g


In [18]:
data['mp_speed'].unique()

array(['2', '2.3', '2.96', '1.95', '2.8', '2.05', '2.2', '2.1', '2.84',
       '2.5', '1.8', '1.4', '1.5', '1.3', '1.6', '2.39', '2.6', '2.649'],
      dtype=object)

In [19]:
data['resolution'].unique()

array(['1600 x 720 Pixels', '2340 x 1080 pixels', '2400 x 1080 Pixels',
       '1080 x 2340 Pixels', '1080 x 2400 Pixels', '720 x 1600 Pixels',
       '1560 x 720 pixels', '2400 × 1080 Pixels', '1520 x 720 pixels',
       '1520 x 720 Pixels', '2400 x 1080 pixel', '1560 x 720 Pixels',
       '2160 x 1080 Pixels', '1440 x 720 pixels', '2280 x 1080 pixels',
       '2160 x 1080 pixels', '2340 x 1080 Pixel', '1920 x 1080 Pixels',
       '1520 x 720$$Pixels', '1520 x 720$$pixel', '2280 x 1080$$Pixels',
       '1920 x 1080 pixels', '2400 x 1080 Pixel', '1280 x 720 pixels',
       '2400 x 1080$$pixel', '1280 x 720 Pixels', '720 x 1544 pixels',
       '720 x 1544 Pixels', '2340 x 1080 Pixels', '2316 x 1080 pixels',
       '2280 x 1080 Pixels', '1440 x 720 Pixels', '1440 x 720 Pixel',
       '1520 x 720$$ pixels', '1520 x 720$pixels', '2340 x 1080$$pixel',
       '2340 x 1080$$Pixel', '960 x 540 Pixels', '540 x 960 Pixels',
       '1080 x 2160 pixels', '1500 x 720 pixels', '720 x 1440 Pixels',
 

In [20]:
data['int_memory'] = data['int_memory'].str.replace(' GB','')
data['mob_width'] = data['mob_width'].str.replace(' mm','')
data['mob_height'] = data['mob_height'].str.replace(' mm','')
data['mob_depth'] = data['mob_depth'].str.replace(' mm','')
data['ram'] = data['ram'].str.replace(' GB','')
data['battery_power'] = data['battery_power'].str.replace(' mAh','')
data['mob_weight'] = data['mob_weight'].str.replace(' g','')
data.head()

Unnamed: 0,mobile_name,mobile_price,disp_size,resolution,os,num_cores,mp_speed,int_memory,ram,p_cam,f_cam,network,battery_power,mob_width,mob_height,mob_depth,mob_weight
0,Realme,8999,16.56 cm (6.52 inch),1600 x 720 Pixels,Android 10,Octa Core,2,32,3,12MP + 2MP + 2MP,5MP Front Camera,"4G VOLTE, 4G, 3G, 2G",5000,75.0,164.4,8.95,195
1,Realme,8999,16.56 cm (6.52 inch),1600 x 720 Pixels,Android 10,Octa Core,2,32,3,12MP + 2MP + 2MP,5MP Front Camera,"4G VOLTE, 4G, 3G, 2G",5000,75.0,164.4,8.95,195
2,Realme,9999,16.56 cm (6.52 inch),1600 x 720 Pixels,Android 10,Octa Core,2,64,4,12MP + 2MP + 2MP,5MP Front Camera,"4G VOLTE, 4G, 3G, 2G",5000,75.0,164.4,8.95,195
3,Realme,9999,16.56 cm (6.52 inch),1600 x 720 Pixels,Android 10,Octa Core,2,64,4,12MP + 2MP + 2MP,5MP Front Camera,"4G VOLTE, 4G, 3G, 2G",5000,75.0,164.4,8.95,195
4,Realme,11999,16.51 cm (6.5 inch),1600 x 720 Pixels,Android 10,Octa Core,2,128,4,48MP + 8MP + 2MP + 2MP,16MP Front Camera,"4G VOLTE, 4G, 3G, 2G",5000,75.4,164.4,9.0,199


In [21]:
data['f_cam'].unique()

array(['5MP Front Camera', '16MP Front Camera',
       '32MP + 8MP Dual Front Camera', '16MP + 8MP Dual Front Camera',
       '13MP Front Camera', '32MP Front Camera', '8MP Front Camera',
       '20MP Front Camera', '20MP + 2MP Dual Front Camera',
       '2MP Front Camera', '8MP Dual Front Camera', '25MP Front Camera',
       '24MP Front Camera', '13MP + 2MP Dual Front Camera',
       '44MP + 2MP Dual Front Camera'], dtype=object)

In [22]:
data['network'].unique()

array(['4G VOLTE, 4G, 3G, 2G', '3G, 4G VOLTE, 2G', '3G, 4G VOLTE, 4G, 2G',
       '3G, 4G, 2G', '4G, 2G, 3G', '5G, 4G VOLTE, 4G, 3G, 2G',
       '4G VOLTE, 3G, 2G', '4G VOLTE, 3G', '4G VOLTE, 4G, 2G, 3G',
       '4G VOLTE, 5G', '3G, 4G VOLTE, 4G', '3G', '4G VOLTE',
       '3G, 4G VOLTE', '2G', '4G, 3G, 2G', '3G, 4G', '4G VOLTE, 4G'],
      dtype=object)

In [23]:
data['os'].unique()

array(['Android 10', 'Android Pie 9.0', 'Android Pie 9',
       'Android Oreo 8.1', 'Android Pie 10', 'Android Nougat 7.1.1',
       'Android Oreo 8.0', 'Android Nougat 7.1.2', 'Android KitKat 4.4.2',
       'Android Marshmallow 6.0.1', 'Android Nougat 7.1',
       'Android Marshmallow 6', 'Android Nougat 7',
       'Android Lollipop 5.4.1', 'Android Oreo 8.1.0', 'Android Oreo 8',
       'Android Lollipop 5.1', 'Android Lollipop 5.1.1'], dtype=object)

In [24]:
data['os']=data['os'].str.replace(r'^Android\s[a-zA-Z]*\s?','')
data.head()

Unnamed: 0,mobile_name,mobile_price,disp_size,resolution,os,num_cores,mp_speed,int_memory,ram,p_cam,f_cam,network,battery_power,mob_width,mob_height,mob_depth,mob_weight
0,Realme,8999,16.56 cm (6.52 inch),1600 x 720 Pixels,10,Octa Core,2,32,3,12MP + 2MP + 2MP,5MP Front Camera,"4G VOLTE, 4G, 3G, 2G",5000,75.0,164.4,8.95,195
1,Realme,8999,16.56 cm (6.52 inch),1600 x 720 Pixels,10,Octa Core,2,32,3,12MP + 2MP + 2MP,5MP Front Camera,"4G VOLTE, 4G, 3G, 2G",5000,75.0,164.4,8.95,195
2,Realme,9999,16.56 cm (6.52 inch),1600 x 720 Pixels,10,Octa Core,2,64,4,12MP + 2MP + 2MP,5MP Front Camera,"4G VOLTE, 4G, 3G, 2G",5000,75.0,164.4,8.95,195
3,Realme,9999,16.56 cm (6.52 inch),1600 x 720 Pixels,10,Octa Core,2,64,4,12MP + 2MP + 2MP,5MP Front Camera,"4G VOLTE, 4G, 3G, 2G",5000,75.0,164.4,8.95,195
4,Realme,11999,16.51 cm (6.5 inch),1600 x 720 Pixels,10,Octa Core,2,128,4,48MP + 8MP + 2MP + 2MP,16MP Front Camera,"4G VOLTE, 4G, 3G, 2G",5000,75.4,164.4,9.0,199


In [25]:
data['os'].unique()

array(['10', '9.0', '9', '8.1', '7.1.1', '8.0', '7.1.2', '4.4.2', '6.0.1',
       '7.1', '6', '7', '5.4.1', '8.1.0', '8', '5.1', '5.1.1'],
      dtype=object)

In [26]:
def split_dot(text):
    return text.split('.')[0]

In [27]:
data['os'] = data['os'].apply(lambda x: split_dot(x))
data.head(3)

Unnamed: 0,mobile_name,mobile_price,disp_size,resolution,os,num_cores,mp_speed,int_memory,ram,p_cam,f_cam,network,battery_power,mob_width,mob_height,mob_depth,mob_weight
0,Realme,8999,16.56 cm (6.52 inch),1600 x 720 Pixels,10,Octa Core,2,32,3,12MP + 2MP + 2MP,5MP Front Camera,"4G VOLTE, 4G, 3G, 2G",5000,75,164.4,8.95,195
1,Realme,8999,16.56 cm (6.52 inch),1600 x 720 Pixels,10,Octa Core,2,32,3,12MP + 2MP + 2MP,5MP Front Camera,"4G VOLTE, 4G, 3G, 2G",5000,75,164.4,8.95,195
2,Realme,9999,16.56 cm (6.52 inch),1600 x 720 Pixels,10,Octa Core,2,64,4,12MP + 2MP + 2MP,5MP Front Camera,"4G VOLTE, 4G, 3G, 2G",5000,75,164.4,8.95,195


In [28]:
data['os'].unique()

array(['10', '9', '8', '7', '4', '6', '5'], dtype=object)

In [29]:
data['disp_size']=data['disp_size'].replace('\scm\s\(.+\)','',regex=True)
data.head()

Unnamed: 0,mobile_name,mobile_price,disp_size,resolution,os,num_cores,mp_speed,int_memory,ram,p_cam,f_cam,network,battery_power,mob_width,mob_height,mob_depth,mob_weight
0,Realme,8999,16.56,1600 x 720 Pixels,10,Octa Core,2,32,3,12MP + 2MP + 2MP,5MP Front Camera,"4G VOLTE, 4G, 3G, 2G",5000,75.0,164.4,8.95,195
1,Realme,8999,16.56,1600 x 720 Pixels,10,Octa Core,2,32,3,12MP + 2MP + 2MP,5MP Front Camera,"4G VOLTE, 4G, 3G, 2G",5000,75.0,164.4,8.95,195
2,Realme,9999,16.56,1600 x 720 Pixels,10,Octa Core,2,64,4,12MP + 2MP + 2MP,5MP Front Camera,"4G VOLTE, 4G, 3G, 2G",5000,75.0,164.4,8.95,195
3,Realme,9999,16.56,1600 x 720 Pixels,10,Octa Core,2,64,4,12MP + 2MP + 2MP,5MP Front Camera,"4G VOLTE, 4G, 3G, 2G",5000,75.0,164.4,8.95,195
4,Realme,11999,16.51,1600 x 720 Pixels,10,Octa Core,2,128,4,48MP + 8MP + 2MP + 2MP,16MP Front Camera,"4G VOLTE, 4G, 3G, 2G",5000,75.4,164.4,9.0,199


In [30]:
data['network'] = data['network'].str.replace(' ','')
data['network']= data['network'].apply(lambda x: sorted(x.split(',')))
data['network'].value_counts()

[2G, 3G, 4G, 4GVOLTE]        238
[2G, 3G, 4GVOLTE]             99
[2G, 3G, 4G]                  68
[3G, 4GVOLTE]                 18
[3G, 4G, 4GVOLTE]             13
[4GVOLTE]                      8
[4GVOLTE, 5G]                  4
[2G, 3G, 4G, 4GVOLTE, 5G]      3
[4G, 4GVOLTE]                  3
[2G]                           2
[3G, 4G]                       2
[3G]                           1
Name: network, dtype: int64

In [31]:
from sklearn.preprocessing import MultiLabelBinarizer
mlb=MultiLabelBinarizer()
mlb

In [32]:
dg=pd.DataFrame(mlb.fit_transform(data['network']),columns=mlb.classes_,index=data.index)
dg

Unnamed: 0,2G,3G,4G,4GVOLTE,5G
0,1,1,1,1,0
1,1,1,1,1,0
2,1,1,1,1,0
3,1,1,1,1,0
4,1,1,1,1,0
...,...,...,...,...,...
454,1,1,0,1,0
455,1,1,1,1,0
456,1,1,1,0,0
457,1,1,1,1,0


In [33]:
data['p_cam']=data['p_cam'].apply(lambda x: split_word(x))
data['p_cam'].value_counts()

13MP     134
48MP     125
12MP      77
64MP      54
16MP      43
20MP       9
8MP        6
108MP      4
48         4
13         2
5MP        1
Name: p_cam, dtype: int64

In [34]:
data['f_cam']=data['f_cam'].apply(lambda x: split_word(x))
data['f_cam'].value_counts()

16MP    118
20MP     77
8MP      71
32MP     66
5MP      54
13MP     48
25MP     13
24MP      7
44MP      3
2MP       2
Name: f_cam, dtype: int64

In [35]:
data['p_cam'] = data['p_cam'].str.replace('MP','')
data['f_cam'] = data['f_cam'].str.replace('MP','')
data.head()

Unnamed: 0,mobile_name,mobile_price,disp_size,resolution,os,num_cores,mp_speed,int_memory,ram,p_cam,f_cam,network,battery_power,mob_width,mob_height,mob_depth,mob_weight
0,Realme,8999,16.56,1600 x 720 Pixels,10,Octa Core,2,32,3,12,5,"[2G, 3G, 4G, 4GVOLTE]",5000,75.0,164.4,8.95,195
1,Realme,8999,16.56,1600 x 720 Pixels,10,Octa Core,2,32,3,12,5,"[2G, 3G, 4G, 4GVOLTE]",5000,75.0,164.4,8.95,195
2,Realme,9999,16.56,1600 x 720 Pixels,10,Octa Core,2,64,4,12,5,"[2G, 3G, 4G, 4GVOLTE]",5000,75.0,164.4,8.95,195
3,Realme,9999,16.56,1600 x 720 Pixels,10,Octa Core,2,64,4,12,5,"[2G, 3G, 4G, 4GVOLTE]",5000,75.0,164.4,8.95,195
4,Realme,11999,16.51,1600 x 720 Pixels,10,Octa Core,2,128,4,48,16,"[2G, 3G, 4G, 4GVOLTE]",5000,75.4,164.4,9.0,199


In [36]:
data = data.drop('resolution',axis=1)
data.head()

Unnamed: 0,mobile_name,mobile_price,disp_size,os,num_cores,mp_speed,int_memory,ram,p_cam,f_cam,network,battery_power,mob_width,mob_height,mob_depth,mob_weight
0,Realme,8999,16.56,10,Octa Core,2,32,3,12,5,"[2G, 3G, 4G, 4GVOLTE]",5000,75.0,164.4,8.95,195
1,Realme,8999,16.56,10,Octa Core,2,32,3,12,5,"[2G, 3G, 4G, 4GVOLTE]",5000,75.0,164.4,8.95,195
2,Realme,9999,16.56,10,Octa Core,2,64,4,12,5,"[2G, 3G, 4G, 4GVOLTE]",5000,75.0,164.4,8.95,195
3,Realme,9999,16.56,10,Octa Core,2,64,4,12,5,"[2G, 3G, 4G, 4GVOLTE]",5000,75.0,164.4,8.95,195
4,Realme,11999,16.51,10,Octa Core,2,128,4,48,16,"[2G, 3G, 4G, 4GVOLTE]",5000,75.4,164.4,9.0,199


In [37]:
data.columns

Index(['mobile_name', 'mobile_price', 'disp_size', 'os', 'num_cores',
       'mp_speed', 'int_memory', 'ram', 'p_cam', 'f_cam', 'network',
       'battery_power', 'mob_width', 'mob_height', 'mob_depth', 'mob_weight'],
      dtype='object')

In [38]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 459 entries, 0 to 458
Data columns (total 16 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   mobile_name    459 non-null    object
 1   mobile_price   459 non-null    object
 2   disp_size      459 non-null    object
 3   os             459 non-null    object
 4   num_cores      459 non-null    object
 5   mp_speed       459 non-null    object
 6   int_memory     459 non-null    object
 7   ram            459 non-null    object
 8   p_cam          459 non-null    object
 9   f_cam          459 non-null    object
 10  network        459 non-null    object
 11  battery_power  459 non-null    object
 12  mob_width      459 non-null    object
 13  mob_height     459 non-null    object
 14  mob_depth      459 non-null    object
 15  mob_weight     459 non-null    object
dtypes: object(16)
memory usage: 57.5+ KB


In [39]:
data[['mobile_price','os','disp_size','mp_speed','int_memory','ram','p_cam','f_cam',
     'battery_power','mob_width','mob_height',
      'mob_depth','mob_weight']] = data[['mobile_price','os','disp_size','mp_speed','int_memory',
                                         'ram','p_cam','f_cam','battery_power','mob_width',
                                         'mob_height','mob_depth','mob_weight']].astype(float)
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 459 entries, 0 to 458
Data columns (total 16 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   mobile_name    459 non-null    object 
 1   mobile_price   459 non-null    float64
 2   disp_size      459 non-null    float64
 3   os             459 non-null    float64
 4   num_cores      459 non-null    object 
 5   mp_speed       459 non-null    float64
 6   int_memory     459 non-null    float64
 7   ram            459 non-null    float64
 8   p_cam          459 non-null    float64
 9   f_cam          459 non-null    float64
 10  network        459 non-null    object 
 11  battery_power  459 non-null    float64
 12  mob_width      459 non-null    float64
 13  mob_height     459 non-null    float64
 14  mob_depth      459 non-null    float64
 15  mob_weight     459 non-null    float64
dtypes: float64(13), object(3)
memory usage: 57.5+ KB


In [40]:
from sklearn.preprocessing import RobustScaler
rs = RobustScaler()
rs

In [41]:
data[['os','disp_size','mp_speed','int_memory','ram','p_cam','f_cam',
     'battery_power','mob_width','mob_height',
      'mob_depth','mob_weight']] = rs.fit_transform(data[['os','disp_size','mp_speed','int_memory','ram','p_cam','f_cam',
     'battery_power','mob_width','mob_height',
      'mob_depth','mob_weight']])
data.head()

Unnamed: 0,mobile_name,mobile_price,disp_size,os,num_cores,mp_speed,int_memory,ram,p_cam,f_cam,network,battery_power,mob_width,mob_height,mob_depth,mob_weight
0,Realme,8999.0,0.691358,1.0,Octa Core,0.0,-0.333333,-0.5,-0.114286,-0.916667,"[2G, 3G, 4G, 4GVOLTE]",1.94,-0.4375,0.934426,0.584416,0.521739
1,Realme,8999.0,0.691358,1.0,Octa Core,0.0,-0.333333,-0.5,-0.114286,-0.916667,"[2G, 3G, 4G, 4GVOLTE]",1.94,-0.4375,0.934426,0.584416,0.521739
2,Realme,9999.0,0.691358,1.0,Octa Core,0.0,0.0,0.0,-0.114286,-0.916667,"[2G, 3G, 4G, 4GVOLTE]",1.94,-0.4375,0.934426,0.584416,0.521739
3,Realme,9999.0,0.691358,1.0,Octa Core,0.0,0.0,0.0,-0.114286,-0.916667,"[2G, 3G, 4G, 4GVOLTE]",1.94,-0.4375,0.934426,0.584416,0.521739
4,Realme,11999.0,0.62963,1.0,Octa Core,0.0,0.666667,0.0,0.914286,0.0,"[2G, 3G, 4G, 4GVOLTE]",1.94,0.0625,0.934426,0.649351,0.695652


## feature encoding

In [42]:
data = pd.concat([data,dg],axis=1)
data = data.drop('network',axis=1)
data.head()

Unnamed: 0,mobile_name,mobile_price,disp_size,os,num_cores,mp_speed,int_memory,ram,p_cam,f_cam,battery_power,mob_width,mob_height,mob_depth,mob_weight,2G,3G,4G,4GVOLTE,5G
0,Realme,8999.0,0.691358,1.0,Octa Core,0.0,-0.333333,-0.5,-0.114286,-0.916667,1.94,-0.4375,0.934426,0.584416,0.521739,1,1,1,1,0
1,Realme,8999.0,0.691358,1.0,Octa Core,0.0,-0.333333,-0.5,-0.114286,-0.916667,1.94,-0.4375,0.934426,0.584416,0.521739,1,1,1,1,0
2,Realme,9999.0,0.691358,1.0,Octa Core,0.0,0.0,0.0,-0.114286,-0.916667,1.94,-0.4375,0.934426,0.584416,0.521739,1,1,1,1,0
3,Realme,9999.0,0.691358,1.0,Octa Core,0.0,0.0,0.0,-0.114286,-0.916667,1.94,-0.4375,0.934426,0.584416,0.521739,1,1,1,1,0
4,Realme,11999.0,0.62963,1.0,Octa Core,0.0,0.666667,0.0,0.914286,0.0,1.94,0.0625,0.934426,0.649351,0.695652,1,1,1,1,0


In [43]:
data_ohe = pd.get_dummies(data[['mobile_name','num_cores']])
data_ohe.head()

Unnamed: 0,mobile_name_Infinix,mobile_name_MI3,mobile_name_Mi,mobile_name_OPPO,mobile_name_POCO,mobile_name_Realme,mobile_name_Redmi,mobile_name_Samsung,mobile_name_Vivo,num_cores_Octa Core,num_cores_Quad Core,num_cores_Single Core
0,0,0,0,0,0,1,0,0,0,1,0,0
1,0,0,0,0,0,1,0,0,0,1,0,0
2,0,0,0,0,0,1,0,0,0,1,0,0
3,0,0,0,0,0,1,0,0,0,1,0,0
4,0,0,0,0,0,1,0,0,0,1,0,0


In [44]:
data = pd.concat([data,data_ohe],axis=1)
data = data.drop(['mobile_name','num_cores'],axis=1)
data.head()

Unnamed: 0,mobile_price,disp_size,os,mp_speed,int_memory,ram,p_cam,f_cam,battery_power,mob_width,...,mobile_name_Mi,mobile_name_OPPO,mobile_name_POCO,mobile_name_Realme,mobile_name_Redmi,mobile_name_Samsung,mobile_name_Vivo,num_cores_Octa Core,num_cores_Quad Core,num_cores_Single Core
0,8999.0,0.691358,1.0,0.0,-0.333333,-0.5,-0.114286,-0.916667,1.94,-0.4375,...,0,0,0,1,0,0,0,1,0,0
1,8999.0,0.691358,1.0,0.0,-0.333333,-0.5,-0.114286,-0.916667,1.94,-0.4375,...,0,0,0,1,0,0,0,1,0,0
2,9999.0,0.691358,1.0,0.0,0.0,0.0,-0.114286,-0.916667,1.94,-0.4375,...,0,0,0,1,0,0,0,1,0,0
3,9999.0,0.691358,1.0,0.0,0.0,0.0,-0.114286,-0.916667,1.94,-0.4375,...,0,0,0,1,0,0,0,1,0,0
4,11999.0,0.62963,1.0,0.0,0.666667,0.0,0.914286,0.0,1.94,0.0625,...,0,0,0,1,0,0,0,1,0,0


# separate x and y

In [45]:
x = data.drop('mobile_price',axis=1)
y = data['mobile_price']

# split the data

In [46]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state = 0)

# Apply linear regression on the train dataset

In [47]:
from sklearn.linear_model import LinearRegression
lr = LinearRegression()
lr

In [48]:
lr.fit(x_train,y_train)

# Perform predictions

In [49]:
y_pred = lr.predict(x_test)
y_pred

array([13345.78941526, 17338.57385442, 14645.61322826, 11224.65160847,
       12908.98001092, 12175.91674979,  7441.19456638, 26684.96237479,
       23887.25161513, 20337.58814241, 19225.56429544, 18847.84581934,
       20954.38871288, 10967.64265201, 16312.79694975, 11400.3786286 ,
        8591.94239702, 20175.38007157, 20565.34770042,  9238.74225715,
        5496.00386876, 35331.50786058, 23887.25161513, 42892.76397953,
       17258.20933594, 11451.64497874, 15657.81851379, 15301.00218617,
       10753.57876847, 42892.76397953, 20337.58814241, 17214.47078697,
       14459.98159391,  9921.01082715, 10014.28037834, 13639.09795498,
       22200.62517035, 12794.30542596, 16175.16677928, 11207.94079841,
       11198.82325963, 27612.48393089, 35331.50786058, 11918.90779333,
       16948.33634957, 16139.79227834,  9720.90799465, 15248.3196979 ,
       15962.31379793, 27216.73571647, 14524.57307645,  9264.10428071,
       19849.26027086, 33146.86957285, 12531.98927883, 15807.09511835,
      

# Evaluation

In [50]:
from sklearn.metrics import r2_score
r2_score(y_test,y_pred)

0.8329461065311625

The model has an accuracy of 83.29% in the above data.