In [1]:
import pandas as pd
import numpy as np
import pickle
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

In [2]:
import warnings
from sklearn.exceptions import DataConversionWarning

# Suppress the specific warning
warnings.filterwarnings("ignore")

In [3]:
# Load the dataset
df = pd.read_csv('dataset.csv')

In [4]:
# Perform Exploratory Data Analysis (EDA)
print(df.info())
print(df.describe())
print(df.isnull().sum())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2000 entries, 0 to 1999
Data columns (total 21 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   battery_power  2000 non-null   int64  
 1   blue           2000 non-null   int64  
 2   clock_speed    2000 non-null   float64
 3   dual_sim       2000 non-null   int64  
 4   fc             2000 non-null   int64  
 5   four_g         2000 non-null   int64  
 6   int_memory     2000 non-null   int64  
 7   m_dep          2000 non-null   float64
 8   mobile_wt      2000 non-null   int64  
 9   n_cores        2000 non-null   int64  
 10  pc             2000 non-null   int64  
 11  px_height      2000 non-null   int64  
 12  px_width       2000 non-null   int64  
 13  ram            2000 non-null   int64  
 14  sc_h           2000 non-null   int64  
 15  sc_w           2000 non-null   int64  
 16  talk_time      2000 non-null   int64  
 17  three_g        2000 non-null   int64  
 18  touch_sc

In [5]:
# Split features and target variable
X = df.drop('price_range', axis=1)
y = df['price_range']


In [6]:
# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [7]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [8]:
# Hyperparameter tuning with GridSearchCV
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

In [9]:
grid_search = GridSearchCV(RandomForestClassifier(random_state=42, class_weight='balanced'), param_grid, cv=5, n_jobs=-1, verbose=1)
grid_search.fit(X_train, y_train)

Fitting 5 folds for each of 81 candidates, totalling 405 fits


In [10]:
# Train the best model
best_model = grid_search.best_estimator_

In [11]:
# Make predictions
y_pred = best_model.predict(X_test)

In [12]:
# Evaluate the model
print(f'Best Parameters: {grid_search.best_params_}')
print(f'Accuracy: {accuracy_score(y_test, y_pred) * 100:.2f}%')

Best Parameters: {'max_depth': 10, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}
Accuracy: 89.00%


In [13]:
# Save the best model
with open('price_model.pkl', 'wb') as f:
    pickle.dump((best_model, scaler), f)

In [14]:
def predict_price(user_input):
    """Predict mobile price category given user input parameters in dictionary format."""
    with open('price_model.pkl', 'rb') as f:
        model, scaler = pickle.load(f)
    
    feature_order = X.columns.tolist()
    user_data = np.zeros(len(feature_order))
    
    # Fill missing values with column mean
    df_mean = df.mean()
    for i, feature in enumerate(feature_order):
        user_data[i] = user_input.get(feature, df_mean[feature])
    
    user_data_scaled = scaler.transform([user_data])
    prediction = model.predict(user_data_scaled)[0]
    probabilities = model.predict_proba(user_data_scaled)[0]
    
    categories = ['Low', 'Medium', 'High', 'Very High']
    print(f'Prediction Probabilities: {dict(zip(categories, probabilities))}')
    return categories[prediction]

In [15]:
# Example usage
user_input = {
    'battery_power': 1021, 'blue': 1, 'clock_speed': 0.5, 'dual_sim': 0,
    'fc': 0, 'four_g': 1, 'int_memory': 53, 'm_dep': 0.7, 'mobile_wt': 136
}  # Incomplete input

# Attractive output formatting
predicted_category = predict_price(user_input)
print("\n" + "―" * 50)
print("📱 Mobile Price Prediction 📱".center(50))
print("―" * 50)
print(f"\n✨ Predicted Price Category: {predicted_category} ✨")

Prediction Probabilities: {'Low': np.float64(0.128151959755877), 'Medium': np.float64(0.6026382477414967), 'High': np.float64(0.2132742348867092), 'Very High': np.float64(0.05593555761591732)}

――――――――――――――――――――――――――――――――――――――――――――――――――
           📱 Mobile Price Prediction 📱            
――――――――――――――――――――――――――――――――――――――――――――――――――

✨ Predicted Price Category: Medium ✨
