# Price Range ML Prediction model

- In this notebook, I will build a machine learning model to predict the price range of a mobile phone based on its features.
- Evalute the model and fine-tune it to get the best performance.

In [23]:
# import the neccecary libraries
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pickle   
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC



In [7]:
# import the data

train_data = pd.read_csv('Source Data/train_cleaned.csv')   
test_data = pd.read_csv('Source Data/test - test.csv')


### Implement a function to evaluate the model using ML metrics


In [9]:
# build models
# the problem is classification problem with balanced target variable
# so the evaluation metrics are accuracy, precision, recall, f1_score the average is macro 

def evaluate_model_models(models, X_train, X_test, y_train, y_test):
    compare_list = []
    for name, clf in models :
        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)
        compare_list.append((pd.Series({"Precision" : precision_score(y_test, y_pred,average='macro'),
                           "Recall" : recall_score(y_test, y_pred,average='macro'),
                           "F1" : f1_score(y_test, y_pred,average='macro'),
                           "Accuracy" : accuracy_score(y_test, y_pred),
                           "Train_Score" : clf.score((X_train),y_train),
                           "Test_Score" : clf.score((X_test), y_test)}, name = name)))
    compare_list = pd.DataFrame(compare_list).T
    return compare_list

In [11]:
# the following are the models that will be used in the evaluation
clf_DT=DecisionTreeClassifier()
clf_RF=RandomForestClassifier()
clf_SVC=SVC()
models = [('DT',clf_DT), ('RF',clf_RF),('SVC',clf_SVC)]

In [16]:
# define the target and features
y = train_data["price_range"]

# according the correlation graph and the other metrics the following features are the most important features
X = train_data.drop(['sc_w','price_range','four_g','m_dep','clock_speed','touch_screen'], axis=1)

# split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# run the models
results = evaluate_model_models(models, X_train, X_test, y_train, y_test)
results

Unnamed: 0,DT,RF,SVC
Precision,0.81112,0.873719,0.942955
Recall,0.808006,0.874421,0.943409
F1,0.808891,0.873905,0.942898
Accuracy,0.81,0.8775,0.945
Train_Score,1.0,1.0,0.942982
Test_Score,0.81,0.8775,0.945


#### As shown from the previous table the SVC is the best model with the highest accuracy score of 0.94, we can fine tune the model to get the best performance.

In [17]:
# fine tune the SVC model 

def SVC_Tuning(X,y):
    models_compare_list= []
    X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=0)
    
    for i in np.arange(1,150,.5):
        for j in range(2,9):
            clf=SVC(C=i,random_state=0,kernel='poly',degree=j)
            clf.fit(X_train,y_train)
            y_pred=clf.predict(X_test)
            models_compare_list.append((pd.Series({"Precision" : precision_score(y_test, y_pred,average='macro'),
                           "Recall" : recall_score(y_test, y_pred,average='macro'),
                           "F1" : f1_score(y_test, y_pred,average='macro'),
                           "Accuracy" : accuracy_score(y_test, y_pred),
                           "Train_Score" : clf.score((X_train),y_train),
                           "Test_Score" : clf.score((X_test), y_test)}, name = (i,j))))
    models_compare_list = pd.DataFrame(models_compare_list).T
    return models_compare_list

In [18]:
# Call the function
svc_fine_tuned_list = SVC_Tuning( X, y)

# find the best model
svc_fine_tuned_list.idxmax(axis=1) 

Precision       (42.5, 2)
Recall          (42.5, 2)
F1              (42.5, 2)
Accuracy        (42.5, 2)
Train_Score    (128.5, 8)
Test_Score      (42.5, 2)
dtype: object

In [22]:
# the best values are c= 128.5 and degree = 8
# use degree 6 to avoid overfitting
# run the model with the fine tuned parameters
clf_SVC=SVC(C=128.5,random_state=0,kernel='poly',degree=6) 
models = [('SVC',clf_SVC)]
result_2 = evaluate_model_models(models, X_train, X_test, y_train, y_test)
result_2

Unnamed: 0,SVC
Precision,0.951425
Recall,0.950762
F1,0.951019
Accuracy,0.9525
Train_Score,0.971178
Test_Score,0.9525


In [25]:
# save the model

with open('ML Models/clf_SVC.pkl', 'wb') as file:
    pickle.dump(clf_SVC, file)

In [26]:
# save the test data in json format

test_data.to_json('Source Data/test_devices.json', orient='records', lines=False)

print("CSV file has been converted to JSON successfully!")

CSV file has been converted to JSON successfully!
