### Importing libraries

In [9]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

### Loading the data sets

In [10]:
train_data = pd.read_csv("../dataset/cleaned_train_data.csv")
train_data.head()

Unnamed: 0,battery_power,blue,clock_speed,dual_sim,fc,four_g,int_memory,m_dep,mobile_wt,n_cores,...,touch_screen,wifi,price_range,screen_area,log_ram,log_battery_power,log_int_memory,battery_power_bin,ram_bin,int_memory_bin
0,-0.902597,0,0.830779,0,-0.764629,0.0,-1.382405,0.339276,1.350676,-1.101463,...,0,1,1,63.0,7.843849,6.736967,2.079442,0,2,0
1,-0.495139,1,-1.253064,1,-0.995615,1.0,1.156334,0.686381,-0.120727,-0.664034,...,1,0,2,51.0,7.875499,6.929517,3.988984,1,2,3
2,-1.537686,1,-1.253064,1,-0.533642,1.0,0.494054,1.380591,0.133939,0.210825,...,1,0,2,22.0,7.864804,6.335054,3.73767,0,2,2
3,-1.419319,1,1.198517,0,-0.995615,0.0,-1.216835,1.033486,-0.262208,0.648255,...,0,0,2,128.0,7.926603,6.423247,2.397895,0,2,0
4,1.325906,1,-0.395011,0,2.007209,1.0,0.659624,0.339276,0.020754,-1.101463,...,1,0,1,16.0,7.252762,7.50769,3.806662,3,1,2


In [11]:
train_data.shape

(2000, 28)

In [12]:
from sklearn.model_selection import train_test_split

X = train_data.drop(["price_range"], axis=1)
y = train_data["price_range"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=4)

print("Train set:", X_train.shape, y_train.shape)
print("Test set:", X_test.shape, y_test.shape)

Train set: (1600, 27) (1600,)
Test set: (400, 27) (400,)


# Using Logistic Regression

In [13]:
from sklearn.linear_model import LogisticRegression

LR = LogisticRegression().fit(X_train, y_train)

print(
    "Accuracy of Logistic regression classifier on train set:",
    LR.score(X_train, y_train),
)
print(
    "Accuracy of Logistic regression classifier on test set:", LR.score(X_test, y_test)
)

Accuracy of Logistic regression classifier on train set: 0.88375
Accuracy of Logistic regression classifier on test set: 0.86


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


# Using Random Forest Classifier

In [14]:
from sklearn.ensemble import RandomForestClassifier

# Train a RandomForestClassifier
model = RandomForestClassifier(random_state=4)
model.fit(X_train, y_train)

# Predictions and Evaluation
y_pred = model.predict(X_test)
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

[[ 99   7   0   0]
 [  4  89   8   0]
 [  0   8  74   5]
 [  0   0   6 100]]
              precision    recall  f1-score   support

           0       0.96      0.93      0.95       106
           1       0.86      0.88      0.87       101
           2       0.84      0.85      0.85        87
           3       0.95      0.94      0.95       106

    accuracy                           0.91       400
   macro avg       0.90      0.90      0.90       400
weighted avg       0.91      0.91      0.91       400



# Using Support Vector Machines

In [15]:
from sklearn.svm import SVC

clf = SVC(kernel="linear").fit(X_train, y_train)


print("Accuracy on training set: {:.2f}".format(clf.score(X_train, y_train)))
print("Accuracy on test set: {:.2f}".format(clf.score(X_test, y_test)))

y_pred_linear = clf.predict(X_test)
print(confusion_matrix(y_test, y_pred_linear))
print(classification_report(y_test, y_pred_linear))

Accuracy on training set: 0.97
Accuracy on test set: 0.96
[[102   4   0   0]
 [  2  99   0   0]
 [  0   3  82   2]
 [  0   0   5 101]]
              precision    recall  f1-score   support

           0       0.98      0.96      0.97       106
           1       0.93      0.98      0.96       101
           2       0.94      0.94      0.94        87
           3       0.98      0.95      0.97       106

    accuracy                           0.96       400
   macro avg       0.96      0.96      0.96       400
weighted avg       0.96      0.96      0.96       400



In [17]:
import joblib

joblib.dump(clf, "svc_model.pkl")

['svc_model.pkl']