In [10]:
import kagglehub
import os
import pandas as pd

import seaborn as sns
import numpy as np

from sklearn.model_selection import GridSearchCV, train_test_split, StratifiedKFold, cross_val_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
# Download latest version
path = kagglehub.dataset_download("iabhishekofficial/mobile-price-classification")

data = pd.read_csv(os.path.join(path, os.listdir(path)[-1]))


In [13]:
X, y = data.drop('price_range', axis=1), data['price_range']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)

scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [18]:
clf_lr = LogisticRegression(max_iter=1000)
params_lr = {'solver': ['liblinear', 'lbfgs'], 'C': [10**(i) for i in range(-4, 3)]}

gridcv = GridSearchCV(clf_lr, param_grid=params_lr, scoring='accuracy', cv=3)    

gridcv.fit(X_train, y_train)

pd.DataFrame(gridcv.cv_results_)[['params', 'mean_test_score', 'std_test_score']].sort_values(by="mean_test_score", ascending=False)

Unnamed: 0,params,mean_test_score,std_test_score
13,"{'C': 100, 'solver': 'lbfgs'}",0.962667,0.009286
11,"{'C': 10, 'solver': 'lbfgs'}",0.948667,0.010499
9,"{'C': 1, 'solver': 'lbfgs'}",0.870667,0.019345
12,"{'C': 100, 'solver': 'liblinear'}",0.836,0.028331
10,"{'C': 10, 'solver': 'liblinear'}",0.814667,0.023795
8,"{'C': 1, 'solver': 'liblinear'}",0.749333,0.028111
7,"{'C': 0.1, 'solver': 'lbfgs'}",0.726667,0.022881
6,"{'C': 0.1, 'solver': 'liblinear'}",0.643333,0.013888
5,"{'C': 0.01, 'solver': 'lbfgs'}",0.622667,0.009978
4,"{'C': 0.01, 'solver': 'liblinear'}",0.537333,0.008994


In [22]:


clf_lr = LogisticRegression(random_state=22, max_iter=2000)
clf_rf = RandomForestClassifier(random_state=22)
clf_svc = SVC(random_state=22)


param_grid_lr = {'solver': ['liblinear', 'lbfgs'], 'C': np.logspace(-4, 2, 9)}

param_grid_rf = [{'n_estimators': [10, 50, 100, 250, 500, 1000], 
                  'min_samples_leaf': [1, 3, 5], 
                  'max_features': ['sqrt', 'log2']}]

param_grid_svc = [{'kernel': ['rbf'], 'C': np.logspace(-4, 4, 9), 'gamma': np.logspace(-4, 0, 4)},
                  {'kernel': ['linear'], 'C': np.logspace(-4, 4, 9)}]


gridcvs = {}

for pgrid, clf, name in zip((param_grid_lr, param_grid_rf, param_grid_svc),
                            (clf_lr, clf_rf, clf_svc),
                            ('LogisticRegression', 'RF', 'SVM')):
    gcv = GridSearchCV(clf, pgrid, cv=3, refit=True)
    gridcvs[name] = gcv



outer_cv = StratifiedKFold(n_splits=3, shuffle=True)
outer_scores = {}

for name, gs in gridcvs.items():
    nested_score = cross_val_score(gs, X_train, y_train, cv=outer_cv)
    outer_scores[name] = nested_score
    print(f'{name}: outer accuracy {100*nested_score.mean():.2f} +/- {100*nested_score.std():.2f}')

LogisticRegression: outer accuracy 96.07 +/- 0.77
RF: outer accuracy 88.00 +/- 1.07
SVM: outer accuracy 95.80 +/- 0.16


In [23]:
from sklearn.metrics import accuracy_score

final_clf = gridcvs['LogisticRegression']
final_clf.fit(X_train, y_train)

print(f'Best Parameters: {final_clf.best_params_}')

train_acc = accuracy_score(y_true=y_train, y_pred=final_clf.predict(X_train))
test_acc = accuracy_score(y_true=y_test, y_pred=final_clf.predict(X_test))

print(f'Training Accuracy: {100*train_acc:.2f}')
print(f'Test Accuracy: {100*test_acc:.2f}')

Best Parameters: {'C': 100.0, 'solver': 'lbfgs'}
Training Accuracy: 98.40
Test Accuracy: 95.40
