In [38]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
import warnings

warnings.filterwarnings('ignore')

# 1. Load dataset
df = pd.read_csv('E-commerce Dataset.csv')

# 2. Drop irrelevant columns
df = df.drop(['Order_Date', 'Time', 'Customer_Id'], axis=1)

# 3. Handle missing values
df.fillna(0, inplace=True)

# 4. Encode categorical columns
label_encoders = {}
categorical_cols = ['Gender', 'Device_Type', 'Customer_Login_type', 
                    'Product', 'Order_Priority', 'Payment_method']

for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col].astype(str))
    label_encoders[col] = le

# 5. Encode target
target_le = LabelEncoder()
df['Product_Category'] = target_le.fit_transform(df['Product_Category'])

# 6. Separate features and target
X = df.drop('Product_Category', axis=1)
y = df['Product_Category']

# 7. Scale numeric columns
numeric_cols = ['Sales', 'Quantity', 'Discount', 'Profit', 'Shipping_Cost', 'Aging']
scaler = StandardScaler()
X[numeric_cols] = scaler.fit_transform(X[numeric_cols])

# 8. Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# 9. Train better model (RandomForest)
model = RandomForestClassifier(n_estimators=100, class_weight='balanced', random_state=42)
model.fit(X_train, y_train)

# 10. Evaluate model
y_pred = model.predict(X_test)
print("\n Accuracy:", round(accuracy_score(y_test, y_pred) * 100, 2), "%")
print("\n Classification Report:\n")
print(classification_report(y_test, y_pred, target_names=target_le.classes_))

# 11. Helper: Safe encoder
def safe_transform(encoder, value, default=0):
    if value in encoder.classes_:
        return encoder.transform([value])[0]
    else:
        print(f" Warning: '{value}' not seen during training. Using fallback '{encoder.classes_[default]}'")
        return default

# 12. Predict a new sample
new_data = {
    'Aging': [5],
    'Gender': [safe_transform(label_encoders['Gender'], 'Male')],
    'Device_Type': [safe_transform(label_encoders['Device_Type'], 'Mobile')],
    'Customer_Login_type': [safe_transform(label_encoders['Customer_Login_type'], 'Guest')],
    'Product': [safe_transform(label_encoders['Product'], 'Laptop')],
    'Sales': [3000],
    'Quantity': [1],
    'Discount': [0.10],
    'Profit': [500],
    'Shipping_Cost': [50],
    'Order_Priority': [safe_transform(label_encoders['Order_Priority'], 'High')],
    'Payment_method': [safe_transform(label_encoders['Payment_method'], 'Credit Card')]
}

example = pd.DataFrame(new_data)
example.fillna(0, inplace=True)
example[numeric_cols] = scaler.transform(example[numeric_cols])

# Predict
prediction = model.predict(example)
predicted_category = target_le.inverse_transform(prediction)[0]
print("\n Predicted Product Category:", predicted_category)



 Accuracy: 100.0 %

 Classification Report:

                    precision    recall  f1-score   support

Auto & Accessories       1.00      1.00      1.00      1501
        Electronic       1.00      1.00      1.00       540
           Fashion       1.00      1.00      1.00      5129
  Home & Furniture       1.00      1.00      1.00      3088

          accuracy                           1.00     10258
         macro avg       1.00      1.00      1.00     10258
      weighted avg       1.00      1.00      1.00     10258


 Predicted Product Category: Electronic
