In [29]:
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler


from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier

from sklearn.metrics import accuracy_score, classification_report, roc_auc_score, f1_score

In [30]:
data = pd.read_csv('Mobile-Price.csv')
data

Unnamed: 0,battery_power,blue,clock_speed,dual_sim,fc,four_g,int_memory,m_dep,mobile_wt,n_cores,...,px_height,px_width,ram,sc_h,sc_w,talk_time,three_g,touch_screen,wifi,price_range
0,842,0,2.2,0,1,0,7,0.6,188,2,...,20,756,2549,9,7,19,0,0,1,1
1,1021,1,0.5,1,0,1,53,0.7,136,3,...,905,1988,2631,17,3,7,1,1,0,2
2,563,1,0.5,1,2,1,41,0.9,145,5,...,1263,1716,2603,11,2,9,1,1,0,2
3,615,1,2.5,0,0,0,10,0.8,131,6,...,1216,1786,2769,16,8,11,1,0,0,2
4,1821,1,1.2,0,13,1,44,0.6,141,2,...,1208,1212,1411,8,2,15,1,1,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1995,794,1,0.5,1,0,1,2,0.8,106,6,...,1222,1890,668,13,4,19,1,1,0,0
1996,1965,1,2.6,1,0,0,39,0.2,187,4,...,915,1965,2032,11,10,16,1,1,1,2
1997,1911,0,0.9,1,1,1,36,0.7,108,8,...,868,1632,3057,9,1,5,1,1,0,3
1998,1512,0,0.9,0,4,1,46,0.1,145,5,...,336,670,869,18,10,19,1,1,1,0


In [31]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2000 entries, 0 to 1999
Data columns (total 21 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   battery_power  2000 non-null   int64  
 1   blue           2000 non-null   int64  
 2   clock_speed    2000 non-null   float64
 3   dual_sim       2000 non-null   int64  
 4   fc             2000 non-null   int64  
 5   four_g         2000 non-null   int64  
 6   int_memory     2000 non-null   int64  
 7   m_dep          2000 non-null   float64
 8   mobile_wt      2000 non-null   int64  
 9   n_cores        2000 non-null   int64  
 10  pc             2000 non-null   int64  
 11  px_height      2000 non-null   int64  
 12  px_width       2000 non-null   int64  
 13  ram            2000 non-null   int64  
 14  sc_h           2000 non-null   int64  
 15  sc_w           2000 non-null   int64  
 16  talk_time      2000 non-null   int64  
 17  three_g        2000 non-null   int64  
 18  touch_sc

In [32]:
y = data["price_range"]
x = data.drop('price_range', axis=1)

In [33]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=42)

In [34]:
x_train.shape, x_test.shape, y_train.shape, y_test.shape

((1500, 20), (500, 20), (1500,), (500,))

In [35]:
y.value_counts()

price_range
1    500
2    500
3    500
0    500
Name: count, dtype: int64

In [36]:
scaler = MinMaxScaler()
x_train_sc = scaler.fit_transform(x_train)
x_test_sc = scaler.transform(x_test)

In [44]:
model = LogisticRegression()
model.fit(x_train_sc, y_train)
y_pred_train = model.predict(x_train_sc)
y_pred_test = model.predict(x_test_sc)
y_proba_train = model.predict_proba(x_train_sc)
y_proba_test = model.predict_proba(x_test_sc)
print(classification_report(y_train, y_pred_train)), print(classification_report(y_test, y_pred_test))

              precision    recall  f1-score   support

           0       0.95      0.98      0.96       368
           1       0.91      0.90      0.91       382
           2       0.93      0.89      0.91       380
           3       0.96      0.99      0.97       370

    accuracy                           0.94      1500
   macro avg       0.94      0.94      0.94      1500
weighted avg       0.94      0.94      0.94      1500

              precision    recall  f1-score   support

           0       0.95      0.95      0.95       132
           1       0.85      0.90      0.88       118
           2       0.92      0.82      0.87       120
           3       0.93      0.98      0.95       130

    accuracy                           0.92       500
   macro avg       0.91      0.91      0.91       500
weighted avg       0.92      0.92      0.92       500



(None, None)

In [39]:
f1_score(y_train, y_pred_train, average='macro'), f1_score(y_test, y_pred_test, average='macro')

(0.9380247859361529, 0.9134716957683465)

In [46]:
roc_auc_score(y_train, y_proba_train, multi_class='ovr')

0.9934145465521438

In [59]:
model = KNeighborsClassifier(n_neighbors=15)
model.fit(x_train_sc, y_train)
y_pred_train = model.predict(x_train_sc)
y_pred_test = model.predict(x_test_sc)
y_proba_train = model.predict_proba(x_train_sc)
y_proba_test = model.predict_proba(x_test_sc)
print(classification_report(y_train, y_pred_train)), print(classification_report(y_test, y_pred_test))

              precision    recall  f1-score   support

           0       0.64      0.72      0.67       368
           1       0.49      0.52      0.50       382
           2       0.49      0.52      0.50       380
           3       0.75      0.56      0.64       370

    accuracy                           0.58      1500
   macro avg       0.59      0.58      0.58      1500
weighted avg       0.59      0.58      0.58      1500

              precision    recall  f1-score   support

           0       0.57      0.60      0.58       132
           1       0.29      0.35      0.32       118
           2       0.34      0.38      0.36       120
           3       0.69      0.46      0.55       130

    accuracy                           0.45       500
   macro avg       0.47      0.45      0.45       500
weighted avg       0.48      0.45      0.46       500



(None, None)

In [48]:
f1_score(y_train, y_pred_train, average='macro'), f1_score(y_test, y_pred_test, average='macro')

(0.6143817094232497, 0.4120090532989469)

In [49]:
roc_auc_score(y_train, y_proba_train, multi_class='ovr')

0.8577997982533802

In [50]:
model = GaussianNB()
model.fit(x_train_sc, y_train)
y_pred_train = model.predict(x_train_sc)
y_pred_test = model.predict(x_test_sc)
y_proba_train = model.predict_proba(x_train_sc)
y_proba_test = model.predict_proba(x_test_sc)
print(classification_report(y_train, y_pred_train)), print(classification_report(y_test, y_pred_test))

              precision    recall  f1-score   support

           0       0.92      0.90      0.91       368
           1       0.74      0.74      0.74       382
           2       0.72      0.74      0.73       380
           3       0.91      0.89      0.90       370

    accuracy                           0.82      1500
   macro avg       0.82      0.82      0.82      1500
weighted avg       0.82      0.82      0.82      1500

              precision    recall  f1-score   support

           0       0.91      0.89      0.90       132
           1       0.73      0.72      0.72       118
           2       0.67      0.71      0.69       120
           3       0.86      0.84      0.85       130

    accuracy                           0.79       500
   macro avg       0.79      0.79      0.79       500
weighted avg       0.79      0.79      0.79       500



(None, None)

In [65]:
model = DecisionTreeClassifier(max_depth=2)
model.fit(x_train_sc, y_train)
y_pred_train = model.predict(x_train_sc)
y_pred_test = model.predict(x_test_sc)
y_proba_train = model.predict_proba(x_train_sc)
y_proba_test = model.predict_proba(x_test_sc)
print(classification_report(y_train, y_pred_train)), print(classification_report(y_test, y_pred_test))

              precision    recall  f1-score   support

           0       0.91      0.77      0.84       368
           1       0.65      0.79      0.71       382
           2       0.71      0.62      0.66       380
           3       0.84      0.88      0.85       370

    accuracy                           0.76      1500
   macro avg       0.78      0.76      0.77      1500
weighted avg       0.77      0.76      0.76      1500

              precision    recall  f1-score   support

           0       0.89      0.77      0.83       132
           1       0.63      0.77      0.69       118
           2       0.66      0.61      0.63       120
           3       0.82      0.83      0.82       130

    accuracy                           0.75       500
   macro avg       0.75      0.75      0.75       500
weighted avg       0.76      0.75      0.75       500



(None, None)

In [66]:
model = RandomForestClassifier(max_depth=2)
model.fit(x_train_sc, y_train)
y_pred_train = model.predict(x_train_sc)
y_pred_test = model.predict(x_test_sc)
y_proba_train = model.predict_proba(x_train_sc)
y_proba_test = model.predict_proba(x_test_sc)
print(classification_report(y_train, y_pred_train)), print(classification_report(y_test, y_pred_test))

              precision    recall  f1-score   support

           0       0.85      0.99      0.91       368
           1       0.78      0.71      0.74       382
           2       0.79      0.59      0.67       380
           3       0.81      0.96      0.88       370

    accuracy                           0.81      1500
   macro avg       0.81      0.81      0.80      1500
weighted avg       0.81      0.81      0.80      1500

              precision    recall  f1-score   support

           0       0.84      0.98      0.91       132
           1       0.76      0.67      0.71       118
           2       0.73      0.55      0.63       120
           3       0.79      0.92      0.85       130

    accuracy                           0.79       500
   macro avg       0.78      0.78      0.77       500
weighted avg       0.78      0.79      0.78       500



(None, None)