In [None]:
# scripts/transform.py

import pandas as pd
from extract import extract_data

def transform_data(df):
    """
    Performs data transformation:
    - Converts date columns
    - Calculates profit margin
    - Drops rows with missing values
    """
    print("🔄 Transforming data...")

    # Convert date columns
    df["Order Date"] = pd.to_datetime(df["Order Date"])
    df["Ship Date"] = pd.to_datetime(df["Ship Date"])

    # Calculate profit margin
    df["Profit Margin"] = df["Profit"] / df["Sales"]

    # Drop rows with missing values
    df.dropna(inplace=True)

    print("✅ Data transformation complete.")
    return df

if __name__ == "__main__":
    # Step 1: Extract data from raw CSV
    df = extract_data("data/superstore.csv")

    if df is not None:
        # Step 2: Transform the data
        transformed_df = transform_data(df)

        # Step 3: Save to standardized output location
        output_path = "output/exports/transformed_superstore.csv"
        transformed_df.to_csv(output_path, index=False)
        print(f"📁 Transformed data saved to: {output_path}")
