In [90]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import metrics, preprocessing
from sklearn.model_selection import GridSearchCV, train_test_split

In [3]:
train_df = pd.read_csv("../input/mobile-price-classification/train.csv")

In [5]:
train_df.info()

In [28]:
train_df.corr()

# Data Visualization

In [46]:
sns.histplot(train_df["battery_power"])

In [48]:
sns.boxplot(train_df["blue"],train_df["price_range"])

In [52]:
sns.histplot(train_df["clock_speed"])

In [54]:
sns.boxplot(x="price_range",y="clock_speed",data=train_df)

In [57]:
plt.pie(train_df["dual_sim"].value_counts().values,labels=["Dual Sim","Non Dual Sim"])

In [75]:
sns.boxplot(train_df["dual_sim"],train_df["price_range"])

In [61]:
sns.boxplot(train_df["fc"],train_df["price_range"])

In [67]:
plt.pie(train_df["four_g"].value_counts().values,labels=["4G-supported","Not supported"],autopct="%1.1f%%")

In [68]:
plt.pie(train_df["three_g"].value_counts().values,labels=["3G-supported","Not supported"],autopct="%1.1f%%")

In [71]:
sns.histplot(train_df["ram"])

In [73]:
sns.boxplot(train_df["price_range"],train_df["talk_time"])

In [76]:
sns.histplot(train_df["clock_speed"])

In [78]:
sns.boxplot(train_df["price_range"],train_df["clock_speed"])

In [83]:
sns.boxplot(train_df["wifi"],train_df["price_range"])

In [86]:
plt.figure(figsize=(10,6))
train_df['fc'].hist(alpha=0.5,color='blue',label='Front camera')
train_df['pc'].hist(alpha=0.5,color='green',label='Primary camera')
plt.legend()
plt.xlabel('MegaPixels')

In [87]:
sns.boxplot(train_df["price_range"],train_df["mobile_wt"])

# X & Y

In [89]:
X = train_df.drop("price_range",axis=1)
y = train_df["price_range"]

# Standardization

In [91]:
scaler = preprocessing.StandardScaler().fit(X)
X = scaler.transform(X)

## Split dataset into training and test sets

In [92]:
X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=42,test_size=0.3)

# Linear regression

In [101]:
from sklearn.linear_model import LinearRegression
lm = LinearRegression().fit(X_train,y_train)

print("MSE for linear regression :",metrics.mean_squared_error(y_train,lm.predict(X_train)))
print("Linear regression score:",lm.score(X_train,y_train))

# Logistic Regression

In [109]:
from sklearn.linear_model import LogisticRegression
logr = LogisticRegression().fit(X_train,y_train)

y_train_pred = logr.predict(X_train)
y_test_pred = logr.predict(X_test)

print("Train-Set accuracy:", metrics.accuracy_score(y_train_pred,y_train)*100)
print("Test-Set accuracy:", metrics.accuracy_score(y_test_pred,y_test)*100)
print("Classification Report:\n", metrics.classification_report(y_test_pred,y_test))

# Decision Tree

In [111]:
from sklearn.tree import DecisionTreeClassifier
tree = DecisionTreeClassifier().fit(X_train,y_train)

print(tree.score(X_train,y_train))
print(tree.score(X_test,y_test))

# Random Forest

In [118]:
from sklearn.ensemble import RandomForestClassifier
rftree = RandomForestClassifier(n_estimators=200).fit(X_train,y_train)

print(rftree.score(X_train,y_train))
print(rftree.score(X_test,y_test))

# SVM

In [122]:
from sklearn.svm import SVC
svc = SVC(kernel="linearimport xgboost as xgb").fit(X_train,y_train)

y_train_pred = svc.predict(X_train)
y_test_pred = svc.predict(X_test)

print("Train-Set accuracy:", metrics.accuracy_score(y_train_pred,y_train)*100)
print("Test-Set accuracy:", metrics.accuracy_score(y_test_pred,y_test)*100)
print("Classification Report:\n", metrics.classification_report(y_test_pred,y_test))

# XGBoost

In [137]:
from xgboost import XGBClassifier
xgbc = XGBClassifier(n_estimators=100).fit(X_train,y_train)

y_train_pred = xgbc.predict(X_train)
y_test_pred = xgbc.predict(X_test)

print("Train-Set accuracy:", metrics.accuracy_score(y_train_pred,y_train)*100)
print("Test-Set accuracy:", metrics.accuracy_score(y_test_pred,y_test)*100)
print("Classification Report:\n", metrics.classification_report(y_test_pred,y_test))