In [1]:
import pandas as pd
import numpy as np

df = pd.read_csv('Divi-Engine-WooCommerce-Sample-Products.csv')
df.head()


Unnamed: 0,ID,Type,SKU,Name,Published,Is featured?,Visibility in catalog,Short description,Description,Date sale price starts,...,Attribute 2 name,Attribute 2 value(s),Attribute 2 visible,Attribute 2 global,Attribute 2 default,Attribute 3 name,Attribute 3 value(s),Attribute 3 visible,Attribute 3 global,Attribute 3 default
0,66,simple,,Divi Engine String Bag (Big Logo),1,0,visible,This fashionable string bag is made of 100% co...,,,...,,,,,,,,,,
1,70,simple,,Divi Engine String Bag (Small Logos),1,0,visible,This fashionable string bag is made of 100% co...,,,...,,,,,,,,,,
2,74,variable,,Brand Buttons,1,0,visible,"Represent your favorite CMS, eCommerce Platfor...",,,...,,,,,,,,,,
3,75,variation,,Brand Buttons - Divi,1,0,visible,,Rep your love for Divi!,,...,,,,,,,,,,
4,76,variation,,Brand Buttons - Divi Engine,1,0,visible,,Rep your love for Divi Engine!,,...,,,,,,,,,,


In [2]:
# Part 1: Data Preprocessing
# Clean the dataset and handle missing values
df['Regular price'].fillna(df['Regular price'].median(), inplace=True)  # Fill missing regular prices
df['Sale price'].fillna(df['Regular price'], inplace=True)  # Use regular price as sale price when missing

# Handle categorical data (e.g., Categories)
df['Categories'] = df['Categories'].fillna('Uncategorized')  # Fill missing categories with a default value
df['Categories'] = df['Categories'].astype('category').cat.codes  # Convert categories to numeric codes

# Handle stock: Fill missing stock values with 0 (out of stock) or use median
df['Stock'] = df['Stock'].fillna(0)

# Select relevant columns
df_relevant = df[['Regular price', 'Sale price', 'Categories', 'Stock']]

# Display cleaned dataset
df_relevant.head()

Unnamed: 0,Regular price,Sale price,Categories,Stock
0,19.99,19.99,0,0.0
1,19.99,19.99,0,0.0
2,14.99,14.99,0,0.0
3,9.99,9.99,3,0.0
4,9.99,9.99,3,0.0


In [4]:
# Model Building and Training
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Define features (X) and target (y)
X = df_relevant.drop(columns=['Sale price'])  # Features (everything except Sale price)
y = df_relevant['Sale price']  # Target (Sale price)

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the dynamic pricing model using XGBoost
model = XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=5)
model.fit(X_train, y_train)

# Predict on the test set
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse}")
print(f"R-squared: {r2}")

Mean Squared Error: 0.4151504774806876
R-squared: 0.9927120919313736


In [6]:
# Dynamic Pricing Prediction
# Ensure the new product has the same feature order as the training data
new_product = pd.DataFrame({
    'Regular price': [25],  # Example regular price
    'Categories': [2],      # Example category code (should match the training data encoding)
    'Stock': [100]          # Example stock quantity
})

# Make sure the columns are in the same order as the training data
new_product = new_product[['Regular price', 'Categories', 'Stock']]

# Predict dynamic price for the new product
predicted_price = model.predict(new_product)
print(f"Predicted Dynamic Price: {predicted_price[0]}")


Predicted Dynamic Price: 19.989530563354492
