In [1]:
import sys
import os

project_root = os.path.abspath("..")
if project_root not in sys.path:
    sys.path.append(project_root)


In [2]:
import pandas as pd
import numpy as np


In [3]:
daily_kpis = pd.read_csv("../data/kpi/daily_kpis.csv", parse_dates=["date"])
incidents = pd.read_csv("../data/kpi/revenue_incidents.csv", parse_dates=["date"])
transactions = pd.read_csv("../data/processed/cleaned_transactions.csv", parse_dates=["InvoiceDate"])

daily_kpis.head(), incidents.head()


(        date   revenue  orders  customers  quantity         aov
 0 2010-12-01  46376.49     121         95     24215  383.276777
 1 2010-12-02  47316.53     137         99     31142  345.376131
 2 2010-12-03  23921.71      57         50     11839  419.679123
 3 2010-12-05  31771.60      87         75     16449  365.190805
 4 2010-12-06  31215.64      94         82     16291  332.081277,
         date    revenue  revenue_zscore incident_type
 0 2011-05-01    6973.66       -2.040549      NEGATIVE
 1 2011-05-12   59901.56        2.116806      POSITIVE
 2 2011-07-28   54198.37        2.072513      POSITIVE
 3 2011-12-09  184349.28        2.177567      POSITIVE)

In [4]:
incident_date = incidents.iloc[0]["date"]
incident_date


Timestamp('2011-05-01 00:00:00')

In [5]:
PRE_DAYS = 7
POST_DAYS = 1

before_start = incident_date - pd.Timedelta(days=PRE_DAYS)
before_end = incident_date - pd.Timedelta(days=1)

after_start = incident_date
after_end = incident_date + pd.Timedelta(days=POST_DAYS)


In [6]:
revenue_before = daily_kpis[
    (daily_kpis["date"] >= before_start) &
    (daily_kpis["date"] <= before_end)
]["revenue"].mean()

revenue_after = daily_kpis[
    (daily_kpis["date"] >= after_start) &
    (daily_kpis["date"] <= after_end)
]["revenue"].mean()

revenue_change = revenue_after - revenue_before

revenue_before, revenue_after, revenue_change


(np.float64(22166.966666666664),
 np.float64(6973.66),
 np.float64(-15193.306666666664))

In [7]:
transactions["date"] = transactions["InvoiceDate"].dt.date
transactions["date"] = pd.to_datetime(transactions["date"])

before_df = transactions[
    (transactions["date"] >= before_start) &
    (transactions["date"] <= before_end)
]

after_df = transactions[
    (transactions["date"] >= after_start) &
    (transactions["date"] <= after_end)
]


In [8]:
product_before = (
    before_df.groupby("StockCode")["Revenue"]
    .sum()
    .reset_index(name="revenue_before")
)

product_after = (
    after_df.groupby("StockCode")["Revenue"]
    .sum()
    .reset_index(name="revenue_after")
)

product_contrib = product_before.merge(
    product_after,
    on="StockCode",
    how="outer"
).fillna(0)

product_contrib["revenue_change"] = (
    product_contrib["revenue_after"] -
    product_contrib["revenue_before"]
)

product_contrib.sort_values(
    "revenue_change"
).head()


Unnamed: 0,StockCode,revenue_before,revenue_after,revenue_change
597,22423,3390.0,63.75,-3326.25
444,22084,1245.9,0.0,-1245.9
268,21556,933.3,0.0,-933.3
1239,85123A,907.5,35.4,-872.1
1041,47566,791.6,94.05,-697.55


In [9]:
product_contrib["contribution_pct"] = (
    product_contrib["revenue_change"] / revenue_change
) * 100

top_negative_products = product_contrib.sort_values(
    "revenue_change"
).head(10)

top_negative_products


Unnamed: 0,StockCode,revenue_before,revenue_after,revenue_change,contribution_pct
597,22423,3390.0,63.75,-3326.25,21.892864
444,22084,1245.9,0.0,-1245.9,8.200322
268,21556,933.3,0.0,-933.3,6.142837
1239,85123A,907.5,35.4,-872.1,5.740028
1041,47566,791.6,94.05,-697.55,4.591166
642,22499,654.1,0.0,-654.1,4.305185
1114,84077,563.52,0.0,-563.52,3.709002
445,22087,654.55,106.2,-548.35,3.609155
2,15036,544.92,19.92,-525.0,3.455469
726,22655,500.0,0.0,-500.0,3.290923


In [10]:
from src.root_cause import attribute_revenue_change

product_rc = attribute_revenue_change(
    before_df,
    after_df,
    group_col="StockCode"
)

product_rc.head()


Unnamed: 0,StockCode,before,after,change
597,22423,3390.0,63.75,-3326.25
444,22084,1245.9,0.0,-1245.9
268,21556,933.3,0.0,-933.3
1239,85123A,907.5,35.4,-872.1
1041,47566,791.6,94.05,-697.55


In [11]:
product_rc.to_csv(
    f"../data/kpi/product_root_cause_{incident_date.date()}.csv",
    index=False
)
