<a href="https://colab.research.google.com/github/andreydesousa92-byte/ML_models/blob/main/Intermitent_Sales_Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Setting up environment

In [None]:
!pip install ipython-sql sqlalchemy pandas
import pandas as pd

# Importing databases

In [None]:
import io

# Upload CSV File
from google.colab import files
uploaded = files.upload()

# Load CSV into DataFrame
df_sales = pd.read_csv(io.BytesIO(list(uploaded.values())[0]))

# Summary
df_sales.head()

# Transform
First step is to pick up our sales orders database and transform that into features that will be usefull for our model.
The reason why we are using sales orders and not billed sales is because the orders reflect the real demand requested by customer.

In [None]:
!pip install duckdb
import duckdb

duckdb.query("""
CREATE OR REPLACE TEMP VIEW salesFormat AS
WITH calendar AS (
    SELECT
        MIN(Date) AS Date,
        year AS sales_year,
        month AS sales_month
    FROM calendar
    where Date > '2024-01-01' AND Date < current_date
    group by
        year,
        month
),
parts_orgs AS (
    SELECT DISTINCT
        Part_Number,
        Pack_Size_Group_Desc,
        Sales_Organization,
        CONCAT(Sales_Organization, Part_Number) AS keyId
    FROM salesordermaster
    WHERE Order_Qty_BuOM > 0 -- AND Sales_Organization = 'UKS1'
),
sales AS (
    SELECT
        YEAR(Standard_Offer_Delivery_Date)  AS sales_year,
        MONTH(Standard_Offer_Delivery_Date) AS sales_month,
        Part_Number,
        Sales_Organization,
        CONCAT(Sales_Organization, Part_Number) AS keyId,
        SUM(Order_Qty_BuOM) AS Sales_EA,
        COUNT(*) AS Num_Sales_Orders,
        COUNT (DISTINCT Payer_Code) AS Num_Of_Customers
    FROM salesordermaster
    GROUP BY
        YEAR(Standard_Offer_Delivery_Date),
        MONTH(Standard_Offer_Delivery_Date),
        Part_Number,
        Sales_Organization
    HAVING SUM(Order_Qty_BuOM) > 0
)
SELECT
    g.Date,
    g.sales_year,
    g.sales_month,
    g.keyId,
    COALESCE(s.Sales_EA, 0) AS Sales_EA,

    -- CAPTURES NEXT MONTH SALES
    LEAD(COALESCE(s.Sales_EA, 0)) OVER (
        PARTITION BY g.keyId
        ORDER BY g.sales_year, g.sales_month
    ) AS Next_Month_Sales_EA,

    -- BINARY TO INDICATE IF NEXT MONTH THERE WILL BE SALES OR NOT
    CASE
        WHEN
        LEAD(COALESCE(s.Sales_EA, 0)) OVER (
            PARTITION BY g.keyId
            ORDER BY g.sales_year, g.sales_month
        ) > 0
        THEN 1
        ELSE 0
    END AS Have_Sales_NMonth,

    -- SIN AND COS ARE RELEVANT FOR SEASONALITY
    SIN(2 * PI() * g.sales_month / 12) AS month_sin,
    COS(2 * PI() * g.sales_month / 12) AS month_cos,

    -- SALES LAGS
    LAG(COALESCE(s.Sales_EA, 0), 1) OVER (
        PARTITION BY g.keyId
        ORDER BY g.sales_year, g.sales_month
    ) AS Sales_Lag_1,
    LAG(COALESCE(s.Sales_EA, 0), 2) OVER (
        PARTITION BY g.keyId
        ORDER BY g.sales_year, g.sales_month
    ) AS Sales_Lag_2,
    LAG(COALESCE(s.Sales_EA, 0), 3) OVER (
        PARTITION BY g.keyId
        ORDER BY g.sales_year, g.sales_month
    ) AS Sales_Lag_3,

    -- ROLLING SUMS
    SUM(COALESCE(s.Sales_EA, 0)) OVER (
        PARTITION BY g.keyId
        ORDER BY g.sales_year, g.sales_month
        ROWS BETWEEN 2 PRECEDING AND CURRENT ROW
    ) AS Sum_Last_3_Months,

    -- MONTHS SINCE LAST SALES
    CASE
        WHEN MAX(CASE WHEN COALESCE(s.Sales_EA, 0) > 0 THEN (g.sales_year * 12 + g.sales_month) END)
             OVER (PARTITION BY g.keyId ORDER BY g.sales_year, g.sales_month
                   ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) IS NULL
        THEN 999
        ELSE (g.sales_year * 12 + g.sales_month) -
             MAX(CASE WHEN COALESCE(s.Sales_EA, 0) > 0 THEN (g.sales_year * 12 + g.sales_month) END)
                 OVER (PARTITION BY g.keyId ORDER BY g.sales_year, g.sales_month
                       ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)
    END AS months_since_last_sale,

    -- L2M WITHOUT SALES RULE
    CASE
        WHEN
            LAG(COALESCE(s.Sales_EA, 0), 1) OVER (
                PARTITION BY g.keyId
                ORDER BY g.sales_year, g.sales_month
            ) = 0 AND
            LAG(COALESCE(s.Sales_EA, 0), 2) OVER (
                PARTITION BY g.keyId
                ORDER BY g.sales_year, g.sales_month
            ) =0
        THEN 1
        ELSE 0
    END AS no_sales_L2M,

    -- SALES TREND
    COALESCE(LAG(s.Sales_EA,1) OVER (PARTITION BY g.keyId ORDER BY g.sales_year, g.sales_month),0) -
    COALESCE(LAG(s.Sales_EA,2) OVER (PARTITION BY g.keyId ORDER BY g.sales_year, g.sales_month),0)
    AS Lag_Trend_1_2,

    COALESCE(LAG(s.Sales_EA,2) OVER (PARTITION BY g.keyId ORDER BY g.sales_year, g.sales_month),0) -
    COALESCE(LAG(s.Sales_EA,3) OVER (PARTITION BY g.keyId ORDER BY g.sales_year, g.sales_month),0)
    AS Lag_Trend_2_3,

    -- PERCENTAGE OF MONTHS WITH SALES
    SUM(
        CASE WHEN
            COALESCE(s.Sales_EA,0) > 0
        THEN 1 ELSE 0 END
    ) OVER (PARTITION BY g.keyId)
    /
    COUNT(*) OVER (PARTITION BY g.keyId)
    AS Product_Activity_Ratio,

    -- STANDARD DEVIATION OF LAST 6 MONTHS
    STDDEV(COALESCE(s.Sales_EA,0)) OVER (
        PARTITION BY g.keyId
        ORDER BY g.sales_year, g.sales_month
        ROWS BETWEEN 5 PRECEDING AND CURRENT ROW
    ) AS Sales_Std_6M,

    -- 6M ACITIVY RATE
    SUM(
        CASE WHEN COALESCE(s.Sales_EA,0) > 0 THEN 1 ELSE 0 END
    ) OVER (
        PARTITION BY g.keyId
        ORDER BY g.sales_year, g.sales_month
        ROWS BETWEEN 5 PRECEDING AND CURRENT ROW
    ) / 6.0 AS Activity_Rate_6M,

    -- 3M ACITIVY RATE
    SUM(
        CASE WHEN COALESCE(s.Sales_EA,0) > 0 THEN 1 ELSE 0 END
    ) OVER (
        PARTITION BY g.keyId
        ORDER BY g.sales_year, g.sales_month
        ROWS BETWEEN 2 PRECEDING AND CURRENT ROW
    ) / 3.0 AS Activity_Rate_3M,

    -- RECENCY INTERACTION
    Product_Activity_Ratio * months_since_last_sale AS Activity_Recency_Interaction,

    -- SEASONALITY INTERACTION
    month_sin * Product_Activity_Ratio AS Season_Activity_Interaction,

    -- NUMBER OF CUSTOMERS ON LAST 6M
    SUM(COALESCE(s.Num_Of_Customers,0)) OVER (
        PARTITION BY g.keyId
        ORDER BY g.sales_year, g.sales_month
        ROWS BETWEEN 5 PRECEDING AND CURRENT ROW
    ) AS Num_Customers_L6M,

    -- NUMBER OF SALES ORDERS
    SUM(COALESCE(s.Num_Sales_Orders,0)) OVER (
        PARTITION BY g.keyId
        ORDER BY g.sales_year, g.sales_month
        ROWS BETWEEN 5 PRECEDING AND CURRENT ROW
    ) AS Num_SalesOrders_L6M,

    -- LOW MONTHS SALES
    CASE
        WHEN
            g.sales_month = '8' OR g.sales_month = '12'
        THEN 1 ELSE 0
    END AS Low_Sales_Months,

    -- BULK SALE
    CASE
        WHEN
            g.Pack_Size_Group_Desc='Bulk'
        THEN 1 ELSE 0
    END AS Bulk_Sale

FROM (
    SELECT
        c.Date,
        c.sales_year,
        c.sales_month,
        p.keyId,
        p.Pack_Size_Group_Desc
    FROM calendar c
    CROSS JOIN parts_orgs p
) g
LEFT JOIN sales s
    ON g.sales_year = s.sales_year
   AND g.sales_month = s.sales_month
   AND g.keyId = s.keyId
ORDER BY
    g.keyId,
    g.sales_year,
    g.sales_month;
""")