In [25]:
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

In [26]:
df = pd.read_csv('Mobile phone price.csv')
df.head()

Unnamed: 0,Brand,Model,Storage,RAM,Screen Size (inches),Camera (MP),Battery Capacity (mAh),Price ($)
0,Apple,iPhone 13 Pro,128 GB,6 GB,6.1,12 + 12 + 12,3095,999
1,Samsung,Galaxy S21 Ultra,256 GB,12 GB,6.8,108 + 10 + 10 + 12,5000,1199
2,OnePlus,9 Pro,128 GB,8 GB,6.7,48 + 50 + 8 + 2,4500,899
3,Xiaomi,Redmi Note 10 Pro,128 GB,6 GB,6.67,64 + 8 + 5 + 2,5020,279
4,Google,Pixel 6,128 GB,8 GB,6.4,50 + 12.2,4614,799


In [27]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 407 entries, 0 to 406
Data columns (total 8 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   Brand                   407 non-null    object
 1   Model                   407 non-null    object
 2   Storage                 407 non-null    object
 3   RAM                     407 non-null    object
 4   Screen Size (inches)    407 non-null    object
 5   Camera (MP)             407 non-null    object
 6   Battery Capacity (mAh)  407 non-null    int64 
 7   Price ($)               407 non-null    object
dtypes: int64(1), object(7)
memory usage: 25.6+ KB


In [28]:
df.isnull().sum()

Brand                     0
Model                     0
Storage                   0
RAM                       0
Screen Size (inches)      0
Camera (MP)               0
Battery Capacity (mAh)    0
Price ($)                 0
dtype: int64

In [29]:
df.shape

(407, 8)

In [30]:
df.describe()

Unnamed: 0,Battery Capacity (mAh)
count,407.0
mean,4676.476658
std,797.193713
min,1821.0
25%,4300.0
50%,5000.0
75%,5000.0
max,7000.0


In [31]:
df.columns.tolist()

['Brand',
 'Model',
 'Storage ',
 'RAM ',
 'Screen Size (inches)',
 'Camera (MP)',
 'Battery Capacity (mAh)',
 'Price ($)']

In [32]:
drop_col=['Model','Camera (MP)','Screen Size (inches)']
df.drop(drop_col,inplace=True,axis=1)
object_columns = df.select_dtypes(include=['object']).columns
df[object_columns]

Unnamed: 0,Brand,Storage,RAM,Price ($)
0,Apple,128 GB,6 GB,999
1,Samsung,256 GB,12 GB,1199
2,OnePlus,128 GB,8 GB,899
3,Xiaomi,128 GB,6 GB,279
4,Google,128 GB,8 GB,799
...,...,...,...,...
402,Samsung,128,8,1049
403,Xiaomi,128,6,349
404,Apple,128,6,1099
405,Oppo,128,8,429


In [34]:
df['Price ($)'].unique()


array(['999', '1199', '899', '279', '799', '249', '699', '329', '449',
       '199', '299', '379', '179', '729', '599', '139', '189', '399',
       '259', '159', '229', '499', '129', '529', '369', '1099', '169',
       '99', '459', '239', '1299', '429', '659', '269', '359', '$799 ',
       '$399 ', '$699 ', '$329 ', '$999 ', '$549 ', '$1,299 ', '$899 ',
       '$449 ', '$319 ', '$269 ', '$349 ', '$279 ', '$249 ', '$299 ',
       '$969 ', '$1,199 ', '$149 ', '$139 ', '$99 ', '$199 ', '$169 ',
       '$499 ', '$179 ', '$219 ', '$229 ', '$239 ', '$109 ', '$189 ',
       '$389 ', '$309 ', '$369 ', '$129 ', '$849 ', '$469 ', '$209 ',
       '$119 ', '$339 ', '$429 ', '$159 ', '$379 ', '$289 ', '130', '749',
       '149', '969', '649', '349', '419', '1399', '1999', '119', '319',
       '1049'], dtype=object)

In [35]:
df['Price ($)'] = df['Price ($)'].str.replace('$', '').str.replace(',', '').str.strip()
df['Price ($)'] = pd.to_numeric(df['Price ($)'])
df['Price ($)'].unique()

array([ 999, 1199,  899,  279,  799,  249,  699,  329,  449,  199,  299,
        379,  179,  729,  599,  139,  189,  399,  259,  159,  229,  499,
        129,  529,  369, 1099,  169,   99,  459,  239, 1299,  429,  659,
        269,  359,  549,  319,  349,  969,  149,  219,  109,  389,  309,
        849,  469,  209,  119,  339,  289,  130,  749,  649,  419, 1399,
       1999, 1049], dtype=int64)

In [36]:
for column in object_columns:
    unique_values = df[column].unique()
    print(f"Unique values in '{column}':")
    print(unique_values)
    print()

Unique values in 'Brand':
['Apple' 'Samsung' 'OnePlus' 'Xiaomi' 'Google' 'Oppo' 'Vivo' 'Realme'
 'Motorola' 'Nokia' 'Sony' 'LG' 'Asus' 'Blackberry' 'CAT' 'Huawei']

Unique values in 'Storage ':
['128 GB' '256 GB' '64 GB' '32 GB' '128GB' '256GB' '64GB' '32GB' '256'
 '64' '128' '512' '32']

Unique values in 'RAM ':
['6 GB' '12 GB' '8 GB' '4 GB' '3 GB' '2 GB' '4GB' '8GB' '6GB' '12GB' '3GB'
 '2GB' '5GB' '12' '3' '6' '8' '4' '16' '2']

Unique values in 'Price ($)':
[ 999 1199  899  279  799  249  699  329  449  199  299  379  179  729
  599  139  189  399  259  159  229  499  129  529  369 1099  169   99
  459  239 1299  429  659  269  359  549  319  349  969  149  219  109
  389  309  849  469  209  119  339  289  130  749  649  419 1399 1999
 1049]



In [41]:

df['Storage '] = df['Storage '].str.replace('GB', '').str.strip()
df['Storage '] = pd.to_numeric(df['Storage '])

df['RAM '] = df['RAM '].str.replace('GB', '').str.strip()
df['RAM '] = pd.to_numeric(df['RAM '])


print(df['Storage '].unique())
print(df['RAM '].unique())

AttributeError: Can only use .str accessor with string values!