In [4]:
# model.ipynb
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report
import joblib

# Load dataset
data = pd.read_csv(r"dataset.csv")

# Feature engineering
data['pixel_density'] = data['px_width'] * data['px_height']
data['screen_area'] = data['sc_w'] * data['sc_h']
data['camera_total'] = data['pc'] + data['fc']

# Select features
features = [
    'battery_power', 'blue', 'clock_speed', 'dual_sim', 'fc', 'four_g',
    'int_memory', 'mobile_wt', 'n_cores', 'pc', 'ram', 'sc_h', 'sc_w',
    'talk_time', 'three_g', 'touch_screen', 'wifi', 'pixel_density',
    'screen_area', 'camera_total'
]
target = 'price_range'

X = data[features]
y = data[target]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

# Scale numerical features
scaler = StandardScaler()
num_cols = ['battery_power', 'ram', 'pixel_density', 'screen_area', 
            'int_memory', 'camera_total']
X_train[num_cols] = scaler.fit_transform(X_train[num_cols])
X_test[num_cols] = scaler.transform(X_test[num_cols])

# Train XGBoost model (best performance)
model = XGBClassifier(
    objective='multi:softmax', 
    num_class=4,
    random_state=42
)

model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
print(f"Accuracy: {accuracy_score(y_test, y_pred):.4f}")
print(classification_report(y_test, y_pred))

# Save model and scaler
joblib.dump(model, 'phone_price_model.pkl')
joblib.dump(scaler, 'scaler.pkl')
print("Model and scaler saved successfully")

# Feature importance
feature_importances = pd.DataFrame({
    'Feature': features,
    'Importance': model.feature_importances_
}).sort_values('Importance', ascending=False)
print("\nFeature Importances:")
print(feature_importances)

Accuracy: 0.8967
              precision    recall  f1-score   support

           0       0.96      0.93      0.94       151
           1       0.85      0.90      0.87       146
           2       0.86      0.83      0.85       148
           3       0.92      0.92      0.92       155

    accuracy                           0.90       600
   macro avg       0.90      0.90      0.90       600
weighted avg       0.90      0.90      0.90       600

Model and scaler saved successfully

Feature Importances:
          Feature  Importance
10            ram    0.422818
0   battery_power    0.137564
17  pixel_density    0.108736
12           sc_w    0.031733
7       mobile_wt    0.025984
6      int_memory    0.025679
18    screen_area    0.023035
4              fc    0.021307
8         n_cores    0.020753
13      talk_time    0.020611
19   camera_total    0.020259
9              pc    0.019961
3        dual_sim    0.019313
15   touch_screen    0.017890
11           sc_h    0.017251
5         