In [1]:
import pandas as pd
import psycopg2
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler

In [2]:
# Database Configuration
DB_CONFIG = {
    'host': 'localhost',
    'database': 'SQLTEST',
    'user': 'postgres',
    'password': 'Admin'
}

# Step 1: Establish a database connection
connection = psycopg2.connect(**DB_CONFIG)


In [3]:

# Step 2: Define the SQL query
sql_query = """
WITH last_order_date AS (
    SELECT 
        MAX(created_at) AS max_date
    FROM 
        public.orders
)
SELECT 
    p.id AS product_id,
    pn.name AS product_name,
    c.name AS category_name,
    pvs.stock AS current_stock,
    pvp.price AS current_price,
    COALESCE(SUM(o.total_amount), 0) AS total_sales_last_week,
    COALESCE(AVG(pr.rating), 0) AS average_rating,
    COUNT(DISTINCT o.id) AS total_orders_last_week,
    (SELECT COUNT(*) 
     FROM public.orders o2 
     WHERE o2.created_at >= (SELECT max_date FROM last_order_date) - INTERVAL '30 days' 
     AND o2.id IN (SELECT pr2.order_id FROM public.product_ratings pr2 WHERE pr2.product_id = p.id)) AS total_orders_last_month,
    (SELECT COUNT(*) 
     FROM public.orders o3 
     WHERE o3.created_at >= (SELECT max_date FROM last_order_date) - INTERVAL '7 days' 
     AND o3.id IN (SELECT pr3.order_id FROM public.product_ratings pr3 WHERE pr3.product_id = p.id)) AS total_orders_last_7_days,
    pv.weight AS product_weight,
    pv.status AS variation_status,
    -- Assuming you have a way to identify if a product sold out in the next week
    CASE WHEN pvs.stock = 0 THEN 1 ELSE 0 END AS sold_out_next_week
FROM 
    public.products p
JOIN 
    public.product_names pn ON p.name_id = pn.id
JOIN 
    public.categories c ON pn.category_id = c.id
JOIN 
    public.product_variations pv ON p.id = pv.product_id
JOIN 
    public.product_variation_stocks pvs ON pv.id = pvs.product_variation_id
LEFT JOIN 
    public.product_variation_prices pvp ON pv.id = pvp.product_variation_id
LEFT JOIN 
    public.product_ratings pr ON pr.product_id = p.id
LEFT JOIN 
    public.orders o ON o.id = pr.order_id AND o.created_at >= (SELECT max_date FROM last_order_date) - INTERVAL '7 days'
GROUP BY 
    p.id, pn.name, c.name, pvs.stock, pvp.price, pv.weight, pv.status;
"""

In [4]:

# Step 3: Load dataset using the SQL query
data = pd.read_sql(sql_query, connection)


  data = pd.read_sql(sql_query, connection)


In [5]:
# Step 4: Prepare features and target
X = data[["current_stock", "current_price", "total_sales_last_week", "average_rating", "product_weight"]]
y = data["sold_out_next_week"]  # Binary target variable


In [6]:
# Step 5: Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [7]:
# Step 6: Feature Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [8]:
# Step 7: Train the model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train_scaled, y_train)

In [9]:
# Step 8: Make predictions
y_pred = model.predict(X_test_scaled)

In [11]:
print(y_pred)

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0]


In [10]:
# Step 9: Evaluate the model
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

[[47]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        47

    accuracy                           1.00        47
   macro avg       1.00      1.00      1.00        47
weighted avg       1.00      1.00      1.00        47

