In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import warnings 
warnings.filterwarnings('ignore')

In [2]:
df = pd.read_csv('mobile_price_range_data.csv')
df.head()

Unnamed: 0,battery_power,blue,clock_speed,dual_sim,fc,four_g,int_memory,m_dep,mobile_wt,n_cores,...,px_height,px_width,ram,sc_h,sc_w,talk_time,three_g,touch_screen,wifi,price_range
0,842,0,2.2,0,1,0,7,0.6,188,2,...,20,756,2549,9,7,19,0,0,1,1
1,1021,1,0.5,1,0,1,53,0.7,136,3,...,905,1988,2631,17,3,7,1,1,0,2
2,563,1,0.5,1,2,1,41,0.9,145,5,...,1263,1716,2603,11,2,9,1,1,0,2
3,615,1,2.5,0,0,0,10,0.8,131,6,...,1216,1786,2769,16,8,11,1,0,0,2
4,1821,1,1.2,0,13,1,44,0.6,141,2,...,1208,1212,1411,8,2,15,1,1,0,1


In [3]:
df.shape

(2000, 21)

## Data preprocessing 

### 1. Handling null values

In [4]:
df.isnull().sum()

battery_power    0
blue             0
clock_speed      0
dual_sim         0
fc               0
four_g           0
int_memory       0
m_dep            0
mobile_wt        0
n_cores          0
pc               0
px_height        0
px_width         0
ram              0
sc_h             0
sc_w             0
talk_time        0
three_g          0
touch_screen     0
wifi             0
price_range      0
dtype: int64

### 2. Handling duplicate records

In [5]:
df.duplicated().sum()

0

### 3. Check datatypes

In [6]:
df.dtypes

battery_power      int64
blue               int64
clock_speed      float64
dual_sim           int64
fc                 int64
four_g             int64
int_memory         int64
m_dep            float64
mobile_wt          int64
n_cores            int64
pc                 int64
px_height          int64
px_width           int64
ram                int64
sc_h               int64
sc_w               int64
talk_time          int64
three_g            int64
touch_screen       int64
wifi               int64
price_range        int64
dtype: object

### 4. Check the target variable

In [7]:
df['price_range'].value_counts()

1    500
2    500
3    500
0    500
Name: price_range, dtype: int64

### 5. Select x(independent features) and y(dependent feature)

In [8]:
x = df.drop('price_range',axis=1)
y = df['price_range']
print(type(x))
print(type(y))
print(x.shape)
print(y.shape)

<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.series.Series'>
(2000, 20)
(2000,)


### 6. Split the data into train and test data

In [9]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [10]:
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.25,random_state=42)
print(x_train.shape)
print(x_test.shape)
print(y_train.shape)
print(y_test.shape)

(1500, 20)
(500, 20)
(1500,)
(500,)


In [11]:
def eval_model(ytest,ypred):
    print('Accuracy Score: ',accuracy_score(ytest,ypred))
    print(confusion_matrix(ytest,ypred))
    print(classification_report(ytest,ypred))

## Applying Logistic Regression

In [12]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, classification_report

In [13]:
m1 = LogisticRegression()
m1.fit(x_train,y_train)

LogisticRegression()

In [14]:
print('Train Score',m1.score(x_train,y_train))
print('Test Score',m1.score(x_test,y_test))

Train Score 0.6453333333333333
Test Score 0.616


In [15]:
ypred_m1 = m1.predict(x_test)
print(ypred_m1)

[0 2 0 3 1 2 3 0 3 3 0 1 2 3 3 2 2 2 1 0 0 1 0 2 1 1 3 3 3 0 1 0 3 0 2 3 2
 1 3 0 1 2 3 0 3 3 3 1 3 1 3 2 0 0 2 0 1 2 0 0 1 3 3 2 2 0 3 3 1 1 2 1 0 1
 2 0 0 3 2 1 3 2 1 0 1 3 3 3 3 0 3 3 3 0 3 2 2 3 2 1 0 1 0 0 1 3 3 0 0 1 0
 0 3 3 2 1 3 3 0 2 1 3 2 2 3 3 0 3 0 2 3 0 2 2 0 2 1 1 0 2 3 1 3 3 0 0 1 2
 1 2 3 1 1 0 2 3 0 1 0 1 3 3 1 2 1 0 0 2 1 3 3 1 0 0 3 1 1 2 0 1 0 0 0 1 3
 2 0 2 0 0 0 0 1 3 3 1 0 1 1 1 1 2 1 2 3 3 1 3 0 1 1 1 1 1 3 1 1 3 1 1 3 2
 3 0 0 3 0 2 0 0 1 0 2 3 2 1 0 2 3 1 3 3 2 3 0 3 2 2 2 3 3 1 1 3 2 1 2 3 3
 3 3 0 2 2 2 2 3 0 3 3 2 2 2 0 1 3 0 2 3 1 3 1 1 2 0 3 0 0 3 0 1 2 3 2 2 0
 1 0 0 3 3 0 1 1 2 0 3 3 3 3 1 3 2 0 3 2 3 2 0 0 1 3 1 3 1 1 2 0 3 3 2 0 2
 2 2 1 3 1 0 3 1 2 1 1 1 1 2 2 3 3 1 1 1 2 2 0 3 0 0 2 0 0 2 2 2 3 0 1 2 3
 3 3 2 3 1 2 0 2 1 3 3 0 1 3 1 3 2 3 1 0 3 2 0 0 3 3 1 2 3 2 0 3 0 2 2 2 0
 1 1 1 0 0 1 0 3 3 2 1 2 1 3 1 0 3 1 0 0 3 0 3 0 1 1 2 3 0 2 0 2 1 3 3 1 3
 1 2 1 0 3 2 0 2 2 2 2 2 1 1 2 3 1 0 3 1 1 1 3 3 3 2 0 2 2 0 1 2 3 1 2 0 0
 0 2 3 0 1 2 2 2 3 1 2 2 

In [16]:
eval_model(y_test,ypred_m1)

Accuracy Score:  0.616
[[95 36  1  0]
 [24 59 26  9]
 [ 1 23 59 37]
 [ 0  3 32 95]]
              precision    recall  f1-score   support

           0       0.79      0.72      0.75       132
           1       0.49      0.50      0.49       118
           2       0.50      0.49      0.50       120
           3       0.67      0.73      0.70       130

    accuracy                           0.62       500
   macro avg       0.61      0.61      0.61       500
weighted avg       0.62      0.62      0.62       500



## Applying KNN

In [17]:
from sklearn.neighbors import KNeighborsClassifier

In [18]:
knn = KNeighborsClassifier(n_neighbors=11)
knn.fit(x_train,y_train)

KNeighborsClassifier(n_neighbors=11)

In [19]:
print('Train score',knn.score(x_train,y_train))  # Train Acc
print('Test score',knn.score(x_test,y_test))

Train score 0.952
Test score 0.938


In [20]:
ypred_k1 = knn.predict(x_test)
print(ypred_k1)

[0 2 1 3 1 1 2 0 3 1 0 1 2 3 2 2 3 3 1 0 0 1 1 2 0 1 3 2 2 0 0 0 3 0 1 1 2
 0 3 0 2 2 2 0 3 2 2 1 3 1 3 1 0 0 0 1 1 3 0 0 1 3 3 1 0 0 3 3 1 2 2 2 0 1
 2 0 0 3 2 1 3 2 1 0 1 3 1 3 3 0 3 3 2 1 3 2 2 3 1 1 0 0 1 0 0 3 2 0 1 1 0
 0 3 1 3 2 3 2 0 2 1 3 2 1 3 3 0 2 0 2 3 0 2 2 0 3 1 0 0 2 2 1 2 2 0 0 0 1
 1 2 3 1 1 0 2 2 0 1 0 2 2 3 3 2 1 0 1 2 2 3 3 0 1 0 3 1 1 2 1 0 0 0 0 0 3
 2 0 3 0 0 0 0 1 3 3 1 0 1 1 1 1 1 2 3 3 3 1 2 0 0 0 2 1 1 3 1 0 2 1 1 3 2
 3 0 0 2 1 3 0 1 2 0 2 3 2 0 1 3 3 0 1 3 2 3 0 3 1 2 3 3 2 1 1 3 3 1 3 3 3
 3 3 0 2 2 2 1 3 0 1 3 2 2 2 1 0 1 0 3 3 1 3 1 0 3 1 2 0 0 3 0 1 2 3 3 3 1
 1 0 1 3 3 0 1 2 2 0 3 3 2 3 2 3 2 0 2 1 1 1 0 0 0 3 2 3 1 0 1 0 1 2 3 0 3
 3 2 1 2 0 0 2 1 3 2 0 1 1 1 0 1 3 2 0 0 3 3 0 3 0 0 2 0 1 2 2 2 3 0 3 2 2
 3 3 3 2 1 1 0 3 1 3 3 0 2 3 2 3 3 3 0 0 2 3 0 0 2 3 2 1 1 2 1 2 1 3 1 2 0
 0 1 0 1 0 1 0 2 2 3 2 1 1 3 1 0 3 1 0 0 3 0 1 0 0 1 3 3 0 2 0 1 1 3 3 1 2
 0 2 0 0 3 3 0 2 2 1 3 1 2 0 1 3 1 0 3 1 0 0 3 2 3 2 0 2 1 0 1 2 3 2 1 1 0
 1 2 2 1 1 1 3 1 2 0 2 2 

In [21]:
eval_model(y_test,ypred_k1)

Accuracy Score:  0.938
[[127   5   0   0]
 [  4 113   1   0]
 [  0  11 106   3]
 [  0   0   7 123]]
              precision    recall  f1-score   support

           0       0.97      0.96      0.97       132
           1       0.88      0.96      0.91       118
           2       0.93      0.88      0.91       120
           3       0.98      0.95      0.96       130

    accuracy                           0.94       500
   macro avg       0.94      0.94      0.94       500
weighted avg       0.94      0.94      0.94       500



## Applying SVM with linear kernel

In [22]:
from sklearn.svm import SVC

In [23]:
svm1 = SVC(kernel='linear',C=10)
svm1.fit(x_train,y_train)

SVC(C=10, kernel='linear')

In [24]:
print('Train score',svm1.score(x_train,y_train))
print('Test score',svm1.score(x_test,y_test)) 

Train score 0.9833333333333333
Test score 0.964


In [25]:
ypred_s1 = svm1.predict(x_test)
print(ypred_s1)

[0 2 1 3 1 1 2 0 3 1 0 1 2 3 3 2 3 3 1 0 0 2 1 2 0 1 3 3 2 0 0 0 3 0 1 1 2
 0 3 0 2 3 2 0 2 3 2 1 3 1 3 1 0 0 1 1 1 3 0 0 1 3 3 1 0 0 3 3 1 2 2 2 0 1
 2 0 1 3 2 2 3 2 1 0 1 3 1 3 3 0 3 3 2 1 3 2 2 3 1 1 0 0 1 0 1 3 2 0 1 1 0
 0 3 1 3 2 3 2 0 2 1 3 2 1 3 3 0 2 0 2 3 0 2 2 0 3 1 0 0 2 2 1 2 2 0 0 0 1
 1 2 3 1 1 0 2 2 0 1 0 2 2 3 3 3 1 0 1 2 2 3 3 0 1 0 3 1 1 2 1 0 0 0 0 0 3
 2 0 3 0 0 0 0 1 3 3 1 0 1 1 1 1 2 2 3 3 3 1 2 0 0 0 2 1 1 3 1 1 2 1 1 3 2
 3 0 0 2 1 3 0 1 2 0 2 3 2 0 1 3 3 0 1 3 3 3 0 3 1 2 3 3 2 1 0 3 3 1 3 3 3
 3 3 0 1 2 2 1 3 0 2 3 2 2 2 1 0 1 0 2 3 1 3 1 1 3 1 2 0 0 3 0 1 2 3 3 3 1
 1 0 1 3 3 0 1 2 2 0 3 3 2 3 2 3 2 0 2 1 1 1 0 0 0 3 3 3 1 0 1 0 1 2 3 0 3
 3 2 1 3 0 0 2 1 3 2 0 1 1 1 1 1 3 2 0 0 3 3 0 3 0 0 2 0 1 2 2 2 3 0 3 2 3
 3 3 3 2 1 1 0 3 1 3 3 0 2 3 2 3 3 3 0 0 2 3 0 0 2 3 2 1 1 2 1 3 0 3 1 2 0
 0 1 0 1 0 1 0 2 2 3 2 1 1 2 1 1 3 1 0 0 3 0 1 0 0 2 3 3 0 2 0 1 1 3 3 1 2
 0 2 0 0 3 3 0 2 2 2 3 1 2 0 1 3 1 0 3 1 0 0 3 2 3 2 0 2 1 0 1 2 3 2 1 1 0
 1 2 2 1 0 1 3 1 2 0 2 2 

In [26]:
eval_model(y_test,ypred_s1)

Accuracy Score:  0.964
[[127   5   0   0]
 [  1 117   0   0]
 [  0   5 110   5]
 [  0   0   2 128]]
              precision    recall  f1-score   support

           0       0.99      0.96      0.98       132
           1       0.92      0.99      0.96       118
           2       0.98      0.92      0.95       120
           3       0.96      0.98      0.97       130

    accuracy                           0.96       500
   macro avg       0.96      0.96      0.96       500
weighted avg       0.97      0.96      0.96       500



## Applying SVM with rbf kernel

In [27]:
svm2 = SVC(kernel='rbf',C=10)
svm2.fit(x_train,y_train)

SVC(C=10)

In [28]:
print('Train score',svm2.score(x_train,y_train))
print('Test score',svm2.score(x_test,y_test)) 

Train score 0.966
Test score 0.956


In [29]:
ypred_s2 = svm2.predict(x_test)
print(ypred_s2)

[0 2 1 3 1 1 2 0 3 1 0 1 2 3 3 2 3 3 1 0 0 1 1 2 0 1 3 2 2 0 0 0 3 0 1 1 2
 0 3 0 2 3 2 0 2 2 2 1 3 1 3 1 0 0 1 1 1 3 0 0 1 3 3 1 0 0 3 3 1 2 2 2 0 1
 2 0 1 3 2 2 3 2 1 0 1 3 1 3 3 0 3 3 2 1 3 2 2 3 1 1 0 0 1 0 0 3 2 0 1 1 0
 0 3 1 3 2 3 2 0 2 1 3 2 1 3 3 0 3 0 2 3 0 2 2 0 3 1 0 0 2 2 1 2 2 0 0 0 1
 1 2 3 1 1 0 2 2 0 1 0 2 2 3 3 3 1 0 1 2 2 3 3 0 1 0 3 1 1 2 1 0 0 0 0 0 3
 2 0 3 0 0 0 0 1 3 3 1 0 1 1 1 1 2 2 3 3 3 1 2 0 0 0 2 1 1 3 1 0 2 1 1 3 2
 3 0 0 2 1 3 0 1 2 0 2 3 2 0 1 3 3 0 1 3 3 3 0 3 1 2 3 3 2 1 1 3 3 1 3 3 3
 3 3 0 1 2 2 1 3 0 2 3 2 3 2 1 0 1 0 3 3 1 3 1 0 3 1 2 0 0 3 0 1 2 3 3 3 1
 1 0 1 3 3 0 1 2 2 0 3 3 2 3 2 3 2 0 2 1 1 1 0 0 0 2 3 3 1 0 1 0 1 2 3 0 3
 3 2 1 3 0 0 2 1 3 2 0 1 1 1 1 1 3 2 0 0 3 3 0 3 0 0 2 0 1 2 2 2 3 0 3 2 2
 3 3 3 2 1 1 0 3 1 3 3 0 2 3 2 3 3 3 0 0 2 3 0 0 2 3 2 1 1 2 1 3 1 3 1 2 0
 0 1 0 1 0 1 0 2 2 3 2 1 1 2 1 1 3 1 0 0 3 0 1 0 0 1 3 3 0 2 0 1 1 3 3 1 2
 0 2 0 0 3 3 0 2 2 2 3 1 2 0 1 3 1 0 3 1 0 0 3 2 3 2 0 2 1 0 1 2 3 2 1 1 0
 1 2 2 1 1 1 3 1 2 0 2 2 

In [30]:
eval_model(y_test,ypred_s2)

Accuracy Score:  0.956
[[127   5   0   0]
 [  0 118   0   0]
 [  0   7 107   6]
 [  0   0   4 126]]
              precision    recall  f1-score   support

           0       1.00      0.96      0.98       132
           1       0.91      1.00      0.95       118
           2       0.96      0.89      0.93       120
           3       0.95      0.97      0.96       130

    accuracy                           0.96       500
   macro avg       0.96      0.96      0.96       500
weighted avg       0.96      0.96      0.96       500



## Applying Decision Trees

In [31]:
from sklearn.tree import DecisionTreeClassifier

In [32]:
dt1 = DecisionTreeClassifier(criterion='gini', max_depth=5, min_samples_split=12)
dt1.fit(x_train,y_train)

DecisionTreeClassifier(max_depth=5, min_samples_split=12)

In [33]:
print('Train Score',dt1.score(x_train,y_train))
print('Test Score',dt1.score(x_test,y_test))

Train Score 0.8773333333333333
Test Score 0.794


In [34]:
ypred_dt1 = dt1.predict(x_test)
print(ypred_dt1)

[0 2 1 3 1 2 2 0 2 1 0 1 2 3 2 2 3 3 1 0 0 1 1 1 0 1 2 2 2 0 1 0 3 0 1 1 3
 0 3 0 2 2 2 0 3 2 1 1 3 1 3 1 0 0 1 1 1 2 0 0 1 3 3 2 0 0 3 3 1 2 2 2 0 1
 2 0 0 3 2 2 3 2 1 0 1 3 2 3 3 0 3 3 2 1 3 2 2 3 2 1 0 0 1 1 0 3 2 0 1 1 0
 0 2 2 2 2 2 3 0 2 1 3 2 1 3 3 0 3 1 2 3 0 2 2 0 3 1 1 0 2 3 1 2 3 0 0 0 1
 2 2 3 1 1 0 2 2 0 1 0 2 2 3 3 3 1 0 1 2 2 3 2 1 1 0 3 1 2 2 1 0 0 0 0 0 3
 2 0 3 0 0 0 0 1 3 3 1 0 1 2 1 1 1 2 2 3 3 0 2 0 0 0 2 1 1 3 1 1 3 2 1 3 1
 2 0 0 2 1 2 0 1 2 0 1 3 2 0 1 3 3 0 2 3 3 3 0 2 1 2 3 3 2 1 1 3 3 1 3 3 3
 3 3 0 2 2 2 2 2 0 2 3 2 2 2 1 0 2 0 2 3 1 3 1 0 3 1 2 0 0 3 0 1 2 3 3 3 0
 1 0 1 3 3 0 1 1 2 0 3 3 2 3 1 3 2 0 2 1 2 1 0 0 1 3 2 3 1 0 1 1 1 3 3 1 2
 2 2 1 2 0 0 3 1 3 2 0 1 1 2 1 2 3 1 0 0 2 3 0 3 0 0 1 0 0 2 2 2 3 1 3 2 2
 3 3 3 2 1 2 0 3 2 3 3 0 2 3 2 3 3 3 1 0 2 3 0 0 2 3 1 1 1 2 1 2 1 3 1 2 0
 0 1 0 1 0 2 1 1 2 3 2 1 0 3 1 0 3 1 0 1 3 0 1 0 0 1 3 3 0 2 0 1 1 2 3 0 2
 1 2 0 0 3 3 0 2 2 1 3 1 2 0 1 3 1 0 2 1 0 0 3 2 3 2 0 3 1 0 1 2 3 2 1 1 0
 1 2 2 1 1 1 3 2 2 1 3 3 

In [35]:
eval_model(y_test,ypred_dt1)

Accuracy Score:  0.794
[[112  20   0   0]
 [  7  93  18   0]
 [  0  17  87  16]
 [  0   0  25 105]]
              precision    recall  f1-score   support

           0       0.94      0.85      0.89       132
           1       0.72      0.79      0.75       118
           2       0.67      0.72      0.70       120
           3       0.87      0.81      0.84       130

    accuracy                           0.79       500
   macro avg       0.80      0.79      0.79       500
weighted avg       0.80      0.79      0.80       500



## Applying Random Forest

In [36]:
from sklearn.ensemble import RandomForestClassifier

In [37]:
rf1 = RandomForestClassifier(n_estimators=80,criterion='entropy',max_depth=7,random_state=22)
rf1.fit(x_train,y_train)

RandomForestClassifier(criterion='entropy', max_depth=7, n_estimators=80,
                       random_state=22)

In [38]:
print('Train Score',rf1.score(x_train,y_train))
print('Test Score',rf1.score(x_test,y_test))

Train Score 0.9753333333333334
Test Score 0.864


In [39]:
ypred_rf1 = rf1.predict(x_test)
print(ypred_rf1)

[0 2 1 3 1 2 2 0 3 1 0 1 2 3 2 2 3 3 1 0 0 1 1 1 0 1 3 2 2 0 0 0 3 0 1 1 2
 0 3 0 2 3 3 0 3 2 2 1 3 1 3 1 0 0 1 1 1 3 0 0 1 3 3 2 0 0 3 3 1 2 2 3 0 1
 2 0 0 3 2 2 3 2 1 0 1 3 2 3 3 0 3 3 2 1 3 2 2 3 2 1 0 0 1 0 0 3 2 0 1 1 0
 0 3 1 3 2 3 2 0 2 1 3 2 1 3 3 0 3 0 2 3 0 2 2 0 3 1 0 0 2 3 0 3 2 0 0 0 1
 2 2 3 1 1 0 2 2 0 1 0 2 2 3 3 2 1 0 0 2 2 3 3 1 1 0 3 1 1 2 1 0 0 0 0 0 3
 2 0 3 0 1 0 0 1 3 3 1 0 1 2 1 1 2 2 3 3 3 1 2 0 0 0 2 1 1 3 1 0 2 2 1 3 1
 3 0 0 2 1 3 0 0 1 0 1 3 2 0 1 2 3 0 2 3 2 3 0 3 1 2 3 3 2 1 1 3 3 1 3 3 3
 3 3 0 1 2 3 2 2 0 2 3 2 2 2 1 0 2 0 2 3 1 3 1 0 3 1 2 0 0 3 0 1 2 3 2 3 1
 1 0 1 3 3 0 1 1 2 0 3 3 2 3 1 3 2 0 2 1 2 1 0 0 1 3 3 3 1 0 1 0 2 2 2 0 3
 3 2 1 3 0 0 3 1 3 2 0 1 1 2 1 1 3 1 0 0 3 3 0 3 0 0 2 0 0 2 2 2 3 0 3 2 3
 3 3 3 2 1 2 0 3 1 3 3 0 2 3 2 3 3 3 0 0 2 3 0 0 2 3 1 1 1 2 1 3 1 3 1 2 0
 0 1 0 1 0 2 1 1 3 3 2 1 1 3 1 0 3 1 0 0 3 0 1 1 1 1 3 3 0 2 1 1 1 3 3 0 2
 0 2 0 0 3 3 0 2 2 1 3 1 1 0 1 3 1 0 3 1 0 0 3 2 3 2 0 3 1 0 1 2 3 2 1 1 0
 1 2 2 1 1 1 3 1 3 0 3 2 

In [40]:
eval_model(y_test,ypred_rf1)

Accuracy Score:  0.864
[[120  12   0   0]
 [  8  98  12   0]
 [  0  15  91  14]
 [  0   0   7 123]]
              precision    recall  f1-score   support

           0       0.94      0.91      0.92       132
           1       0.78      0.83      0.81       118
           2       0.83      0.76      0.79       120
           3       0.90      0.95      0.92       130

    accuracy                           0.86       500
   macro avg       0.86      0.86      0.86       500
weighted avg       0.86      0.86      0.86       500



### The SVM model has the best accuracy with an accuracy score of 0.96