In [1]:
import pandas as pd 
import numpy as np 

In [2]:
fact_sales = pd.read_csv("C:/Users/User/Desktop/dashboard/PROJECTS_NOT_ON_GITHUB/Retail/data/processed/fact_sales_daily.csv", parse_dates=["date"])
dim_product = pd.read_csv("C:/Users/User/Desktop/dashboard/PROJECTS_NOT_ON_GITHUB/Retail/data/processed/dim_product.csv")

In [3]:
# Identify slow-moving products
product_velocity = (
    fact_sales.groupby("product_id").agg(
    avg_daily_units=("quantity_sold", "mean")
    )
    .reset_index()
)

slow_movers = product_velocity[
    product_velocity["avg_daily_units"] < 
    product_velocity["avg_daily_units"].quantile(0.4)
]["product_id"]


In [4]:
# Promotion day selection
date_range = fact_sales["date"].unique()

promo_days = np.random.choice(
    date_range,
    size=int(len(date_range) * 0.25),  # promotions on 25% of days
    replace=False
)


In [5]:
# Generate PROMOTION rows
promotion_rows = []

for date in promo_days:
    for product_id in slow_movers.sample(frac=0.3, random_state=42):
        
        discount = np.random.choice([10, 15, 20, 25, 30])
        promo_type = np.random.choice(
            ["Clearance", "Seasonal", "Flash"]
        )
        
        promotion_rows.append({
            "date": date,
            "product_id": product_id,
            "discount_percentage": discount,
            "promotion_type": promo_type,
            "is_promotion": 1
        })


In [6]:
fact_promotions = pd.DataFrame(promotion_rows)


In [7]:
# Generate NON-PROMOTION rows
all_product_dates = (
    fact_sales[["date", "product_id"]]
    .drop_duplicates()
)

fact_promotions = all_product_dates.merge(
    fact_promotions,
    on=["date", "product_id"],
    how="left"
)

In [8]:
fact_promotions["date"] = pd.to_datetime(fact_promotions["date"])
fact_promotions["is_promotion"] = fact_promotions["is_promotion"].fillna(0).astype(int)
fact_promotions["discount_percentage"] = fact_promotions["discount_percentage"].fillna(0).astype(int)
fact_promotions["promotion_type"] = fact_promotions["promotion_type"].fillna("No Promotion")


In [9]:
fact_promotions = fact_promotions.merge(
    dim_product[["product_id", "category", "brand"]],
    on="product_id",
    how="left"
)


In [10]:
fact_promotions["promotion_flag"] = fact_promotions["is_promotion"].map(
    {1: "Promotion", 0: "No Promotion"}
)


In [11]:
fact_promotions.to_csv(
    "C:/Users/User/Desktop/dashboard/PROJECTS_NOT_ON_GITHUB/Retail/data/processed/fact_promotions.csv",
    index=False
)


In [12]:
fact_promotions["is_promotion"].nunique()

2