In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

In [39]:
df = pd.read_csv("dataset.csv")

In [41]:
df = df[['model','brand_name','price', '5G_or_not', 'processor_brand', 'battery_capacity',
         'ram_capacity', 'internal_memory', 'refresh_rate', 'os',
         'primary_camera_rear', 'fast_charging']]

In [43]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 980 entries, 0 to 979
Data columns (total 12 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   model                980 non-null    object 
 1   brand_name           980 non-null    object 
 2   price                980 non-null    int64  
 3   5G_or_not            980 non-null    int64  
 4   processor_brand      960 non-null    object 
 5   battery_capacity     969 non-null    float64
 6   ram_capacity         980 non-null    int64  
 7   internal_memory      980 non-null    int64  
 8   refresh_rate         980 non-null    int64  
 9   os                   966 non-null    object 
 10  primary_camera_rear  980 non-null    float64
 11  fast_charging        769 non-null    float64
dtypes: float64(3), int64(5), object(4)
memory usage: 92.0+ KB


In [45]:
df.isnull().sum()

model                    0
brand_name               0
price                    0
5G_or_not                0
processor_brand         20
battery_capacity        11
ram_capacity             0
internal_memory          0
refresh_rate             0
os                      14
primary_camera_rear      0
fast_charging          211
dtype: int64

In [47]:
# Handling Null Values

df['battery_capacity'] = df['battery_capacity'].fillna(df['battery_capacity'].mean())
df['processor_brand'] = df['processor_brand'].fillna('other')
df['os'] = df['os'].fillna('other')
df['fast_charging'] = df['fast_charging'].fillna(df['fast_charging'].mean())
df['os'] = df['os'].replace('nan', 'other')

In [49]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 980 entries, 0 to 979
Data columns (total 12 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   model                980 non-null    object 
 1   brand_name           980 non-null    object 
 2   price                980 non-null    int64  
 3   5G_or_not            980 non-null    int64  
 4   processor_brand      980 non-null    object 
 5   battery_capacity     980 non-null    float64
 6   ram_capacity         980 non-null    int64  
 7   internal_memory      980 non-null    int64  
 8   refresh_rate         980 non-null    int64  
 9   os                   980 non-null    object 
 10  primary_camera_rear  980 non-null    float64
 11  fast_charging        980 non-null    float64
dtypes: float64(3), int64(5), object(4)
memory usage: 92.0+ KB


In [51]:
df

Unnamed: 0,model,brand_name,price,5G_or_not,processor_brand,battery_capacity,ram_capacity,internal_memory,refresh_rate,os,primary_camera_rear,fast_charging
0,Apple iPhone 11,apple,38999,0,bionic,3110.000000,4,64,60,ios,12.0,46.126138
1,Apple iPhone 11 (128GB),apple,46999,0,bionic,3110.000000,4,128,60,ios,12.0,46.126138
2,Apple iPhone 11 Pro Max,apple,109900,0,bionic,3500.000000,4,64,60,ios,12.0,18.000000
3,Apple iPhone 12,apple,51999,1,bionic,4817.748194,4,64,60,ios,12.0,46.126138
4,Apple iPhone 12 (128GB),apple,55999,1,bionic,4817.748194,4,128,60,ios,12.0,46.126138
...,...,...,...,...,...,...,...,...,...,...,...,...
975,Xiaomi Redmi Note 9 Pro,xiaomi,13999,0,snapdragon,5020.000000,4,64,60,android,48.0,18.000000
976,Xiaomi Redmi Note 9 Pro (4GB RAM + 128GB),xiaomi,14439,0,snapdragon,5020.000000,4,128,60,android,48.0,18.000000
977,Xiaomi Redmi Note 9 Pro Max,xiaomi,16490,0,snapdragon,5020.000000,6,64,60,android,64.0,33.000000
978,ZTE Axon 30S,zte,19999,1,snapdragon,4200.000000,6,128,120,android,50.0,55.000000


In [61]:
# Capitalizing the Names

df['brand_name'] = df['brand_name'].str.capitalize()
df['5G_or_not'] = df['5G_or_not'].replace({0: 'No', 1: 'Yes'})
df['processor_brand'] = df['processor_brand'].str.capitalize()
df['os'] = df['os'].str.capitalize()

In [63]:
df

Unnamed: 0,model,brand_name,price,5G_or_not,processor_brand,battery_capacity,ram_capacity,internal_memory,refresh_rate,os,primary_camera_rear,fast_charging
0,Apple iPhone 11,Apple,38999,No,Bionic,3110.000000,4,64,60,Ios,12.0,46.126138
1,Apple iPhone 11 (128GB),Apple,46999,No,Bionic,3110.000000,4,128,60,Ios,12.0,46.126138
2,Apple iPhone 11 Pro Max,Apple,109900,No,Bionic,3500.000000,4,64,60,Ios,12.0,18.000000
3,Apple iPhone 12,Apple,51999,Yes,Bionic,4817.748194,4,64,60,Ios,12.0,46.126138
4,Apple iPhone 12 (128GB),Apple,55999,Yes,Bionic,4817.748194,4,128,60,Ios,12.0,46.126138
...,...,...,...,...,...,...,...,...,...,...,...,...
975,Xiaomi Redmi Note 9 Pro,Xiaomi,13999,No,Snapdragon,5020.000000,4,64,60,Android,48.0,18.000000
976,Xiaomi Redmi Note 9 Pro (4GB RAM + 128GB),Xiaomi,14439,No,Snapdragon,5020.000000,4,128,60,Android,48.0,18.000000
977,Xiaomi Redmi Note 9 Pro Max,Xiaomi,16490,No,Snapdragon,5020.000000,6,64,60,Android,64.0,33.000000
978,ZTE Axon 30S,Zte,19999,Yes,Snapdragon,4200.000000,6,128,120,Android,50.0,55.000000
