In [1]:
import sys
import os

project_root = os.path.abspath("..")
if project_root not in sys.path:
    sys.path.append(project_root)


In [3]:
import pandas as pd
import numpy as np
daily_kpis = pd.read_csv("../data/kpi/daily_kpis.csv", parse_dates=["date"])
incidents = pd.read_csv("../data/kpi/revenue_incidents.csv", parse_dates=["date"])
forecast = pd.read_csv("../data/kpi/revenue_forecast.csv", parse_dates=["date"])

daily_kpis.head(), incidents.head(), forecast.head()


(        date   revenue  orders  customers  quantity         aov
 0 2010-12-01  46376.49     121         95     24215  383.276777
 1 2010-12-02  47316.53     137         99     31142  345.376131
 2 2010-12-03  23921.71      57         50     11839  419.679123
 3 2010-12-05  31771.60      87         75     16449  365.190805
 4 2010-12-06  31215.64      94         82     16291  332.081277,
         date    revenue  revenue_zscore incident_type
 0 2011-05-01    6973.66       -2.040549      NEGATIVE
 1 2011-05-12   59901.56        2.116806      POSITIVE
 2 2011-07-28   54198.37        2.072513      POSITIVE
 3 2011-12-09  184349.28        2.177567      POSITIVE,
         date  forecast_revenue    upper_bound   lower_bound  risk_flag
 0 2011-12-10      67665.542857  174834.437389 -39503.351674       True
 1 2011-12-11      67665.542857  174834.437389 -39503.351674       True
 2 2011-12-12      67665.542857  174834.437389 -39503.351674       True
 3 2011-12-13      67665.542857  174834.43738

In [4]:
def classify_risk(z_score, forecast_risk):
    if abs(z_score) > 3 or forecast_risk:
        return "HIGH"
    elif abs(z_score) > 2:
        return "MEDIUM"
    else:
        return "LOW"


In [5]:
def generate_insight(row, baseline):
    direction = "decrease" if row["revenue_zscore"] < 0 else "increase"
    
    insight = (
        f"Revenue showed an abnormal {direction} on {row['date'].date()} "
        f"with a z-score of {row['revenue_zscore']:.2f}. "
    )

    if row["revenue"] < baseline:
        insight += "Revenue fell below historical baseline levels. "

    return insight


In [6]:
baseline_revenue = daily_kpis["revenue"].mean()

insight_rows = []

for _, row in incidents.iterrows():
    future_risk = forecast["lower_bound"].min() < baseline_revenue * 0.9

    insight_rows.append({
        "date": row["date"],
        "risk_level": classify_risk(row["revenue_zscore"], future_risk),
        "insight": generate_insight(row, baseline_revenue)
    })

insights_df = pd.DataFrame(insight_rows)
insights_df


Unnamed: 0,date,risk_level,insight
0,2011-05-01,HIGH,Revenue showed an abnormal decrease on 2011-05...
1,2011-05-12,HIGH,Revenue showed an abnormal increase on 2011-05...
2,2011-07-28,HIGH,Revenue showed an abnormal increase on 2011-07...
3,2011-12-09,HIGH,Revenue showed an abnormal increase on 2011-12...


In [7]:
from src.insight_engine import classify_risk, generate_insight

test_row = incidents.iloc[0]
test_insight = generate_insight(test_row, baseline_revenue)
test_insight


'Revenue showed an abnormal decrease on 2011-05-01 with a z-score of -2.04. Revenue fell below historical baseline levels. '

In [8]:
insights_df.to_csv(
    "../data/kpi/decision_insights.csv",
    index=False
)
