In [6]:
import os

os.makedirs("data/raw", exist_ok=True)
os.makedirs("data/scenarios", exist_ok=True)


In [7]:
from google.colab import files
uploaded = files.upload()  # Burada bilgisayardan train.csv ve store.csv se√ß

import shutil

shutil.move("train.csv", "data/raw/train.csv")
shutil.move("store.csv", "data/raw/store.csv")


Saving store.csv to store.csv
Saving train.csv to train.csv


'data/raw/store.csv'

In [10]:
!pip install pandas numpy statsmodels




In [11]:
%%writefile demand_model.py
import pandas as pd
import numpy as np
import statsmodels.formula.api as smf

class DemandModel:
    """
    Log-elasticity demand model for retail sales forecasting.
    Designed for Rossmann-style data: train.csv + store.csv.
    """

    def __init__(self):
        self.model = None
        self.fitted_ = False

    def preprocess(self, df: pd.DataFrame) -> pd.DataFrame:
        df = df.copy()
        df["Date"] = pd.to_datetime(df["Date"])

        if "Open" in df.columns:
            df = df[df["Open"] == 1]

        df = df[df["Sales"] > 0]
        df = df[df["Customers"] > 0]

        df["Footfall"] = df["Customers"]
        df["ABV"] = df["Sales"] / df["Footfall"]
        df["Median_ABV"] = df.groupby("Store")["ABV"].transform("median")
        df["Median_ABV"].replace(0, np.nan, inplace=True)
        df["Median_ABV"].fillna(df["Median_ABV"].median(), inplace=True)
        df["PriceIndex"] = df["ABV"] / df["Median_ABV"]

        eps = 1e-6
        df["Log_Sales"] = np.log(df["Sales"] + 1.0)
        df["Log_PriceIndex"] = np.log(df["PriceIndex"].clip(lower=eps))
        df["Log_Footfall"] = np.log(df["Footfall"] + 1.0)

        df["Availability"] = 1.0
        if "Promo" not in df.columns:
            df["Promo"] = 0

        return df

    def fit(self, df: pd.DataFrame):
        df_prep = self.preprocess(df)

        formula = """
            Log_Sales ~ Log_PriceIndex + Promo + Log_Footfall + Availability
        """

        self.model = smf.ols(formula=formula, data=df_prep).fit()
        self.fitted_ = True
        return df_prep, self.model

    def predict_base(self, df_prep: pd.DataFrame) -> pd.DataFrame:
        if not self.fitted_:
            raise RuntimeError("Model is not fitted. Call .fit() first.")

        df = df_prep.copy()
        df["Log_Sales_Pred"] = self.model.predict(df)
        df["Sales_Pred"] = np.exp(df["Log_Sales_Pred"]) - 1.0
        return df

    def summary(self):
        if not self.fitted_:
            raise RuntimeError("Model is not fitted.")
        return self.model.summary()


Writing demand_model.py


In [12]:
%%writefile scenario_simulator.py
import pandas as pd
import numpy as np
from typing import Optional
from statsmodels.regression.linear_model import RegressionResultsWrapper


def simulate_scenario(
    df_prep: pd.DataFrame,
    model: RegressionResultsWrapper,
    price_change: float = 0.0,
    footfall_change: float = 0.0,
    promo_override: Optional[int] = None,
) -> pd.DataFrame:
    """
    Run a demand scenario on preprocessed data.

    Parameters
    ----------
    df_prep : pd.DataFrame
        Preprocessed DataFrame from DemandModel.preprocess().
    model : statsmodels RegressionResultsWrapper
        Fitted model.
    price_change : float
        Relative change in PriceIndex.
    footfall_change : float
        Relative change in Footfall.
    promo_override : 0, 1 or None
        Override Promo column if not None.

    Returns
    -------
    scen : pd.DataFrame
        Scenario results with predicted sales & revenue.
    """

    scen = df_prep.copy()

    # Price scenario
    scen["PriceIndex_Scn"] = scen["PriceIndex"] * (1.0 + price_change)
    scen["PriceIndex_Scn"] = scen["PriceIndex_Scn"].clip(lower=1e-6)
    scen["Log_PriceIndex"] = np.log(scen["PriceIndex_Scn"])

    # Footfall scenario
    scen["Footfall_Scn"] = scen["Footfall"] * (1.0 + footfall_change)
    scen["Footfall_Scn"] = scen["Footfall_Scn"].clip(lower=0.0)
    scen["Log_Footfall"] = np.log(scen["Footfall_Scn"] + 1.0)

    # Promo scenario
    if promo_override is not None:
        scen["Promo_Scn"] = promo_override
    else:
        scen["Promo_Scn"] = scen["Promo"]

    scen["Promo"] = scen["Promo_Scn"]

    # Predict for scenario
    scen["Log_Sales_Pred_Scn"] = model.predict(scen)
    scen["Sales_Pred_Scn"] = np.exp(scen["Log_Sales_Pred_Scn"]) - 1.0
    scen["Revenue_Scn"] = scen["Sales_Pred_Scn"]

    return scen


Writing scenario_simulator.py


In [13]:
%%writefile scenario_generator.py
import pandas as pd
import numpy as np
from pathlib import Path

from demand_model import DemandModel
from scenario_simulator import simulate_scenario


def main():
    # Paths
    data_raw_dir = Path("data/raw")
    scenarios_dir = Path("data/scenarios")
    scenarios_dir.mkdir(parents=True, exist_ok=True)

    train_path = data_raw_dir / "train.csv"
    store_path = data_raw_dir / "store.csv"
    out_path = scenarios_dir / "model2_scenarios.csv"

    # Load
    print("Loading dataset...")
    # DtypeWarning'dan kurtulmak i√ßin:
    train = pd.read_csv(train_path, low_memory=False)
    store = pd.read_csv(store_path)
    df = train.merge(store, on="Store", how="left")

    print(f"Base shape: {df.shape}")

    # üîπ HIZ ƒ∞√áƒ∞N: T√úM VERƒ∞ YERƒ∞NE SADECE 50.000 SATIR
    if len(df) > 50000:
        df = df.sample(n=50000, random_state=0)
    print(f"Sampled shape: {df.shape}")

    # Train model
    dm = DemandModel()
    print("Training Model 2 (Log-Elasticity)...")
    df_prep, model = dm.fit(df)
    print("Model fitted.")

    # ‚úÖ K√ú√á√úK SCENARIO GRID (TOPLAM 6 SENARYO)
    price_changes = np.array([-0.10, 0.00, 0.10])   # -10%, 0, +10
    footfall_changes = np.array([0.0])              # footfall sabit
    promo_options = [0, 1]                          # promo off / on

    scenario_rows = []
    scenario_id = 1

    print("Generating scenario permutations...")

    for pc in price_changes:
        for fc in footfall_changes:
            for po in promo_options:
                print(f"Scenario {scenario_id}: PC={pc}, FC={fc}, Promo={po}")

                scen_df = simulate_scenario(
                    df_prep=df_prep,
                    model=model,
                    price_change=pc,
                    footfall_change=fc,
                    promo_override=po,
                )

                scen_df["ScenarioID"] = scenario_id
                scen_df["PriceChange"] = pc
                scen_df["FootfallChange"] = fc
                scen_df["PromoOverride"] = po

                scenario_rows.append(
                    scen_df[
                        [
                            "ScenarioID",
                            "Date",
                            "Store",
                            "PriceChange",
                            "FootfallChange",
                            "PromoOverride",
                            "Sales_Pred_Scn",
                            "Revenue_Scn",
                        ]
                    ]
                )

                scenario_id += 1

    full_scenarios = pd.concat(scenario_rows, ignore_index=True)

    full_scenarios = full_scenarios.rename(
        columns={
            "Sales_Pred_Scn": "Sales_Pred",
            "Revenue_Scn": "Revenue_Pred",
        }
    )

    print(full_scenarios.head())
    print(f"Final scenarios shape: {full_scenarios.shape}")

    print("Saving final CSV...")
    full_scenarios.to_csv(out_path, index=False)
    print(f"Saved: {out_path}")


if __name__ == "__main__":
    main()


Writing scenario_generator.py


In [14]:
!python scenario_generator.py


Loading dataset...
Base shape: (1017209, 18)
Sampled shape: (50000, 18)
Training Model 2 (Log-Elasticity)...
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["Median_ABV"].replace(0, np.nan, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["Median_ABV"].fillna(df["Median_ABV"].median(), inplace=True)
Model fi

In [15]:
from google.colab import files
files.download("data/scenarios/model2_scenarios.csv")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>