In [None]:
"""
data_preprocessing.py
----------------------
Reads traffic.csv, cleans, and prepares data for analysis.
"""

import pandas as pd
import numpy as np


def load_and_clean_data(filepath: str) -> pd.DataFrame:
    """
    Load traffic data and perform cleaning.

    Args:
        filepath (str): Path to traffic.csv

    Returns:
        pd.DataFrame: Cleaned traffic dataset
    """
    # Load CSV
    df = pd.read_csv(filepath)

    # Convert timestamp to datetime
    if 'timestamp' in df.columns:
        df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')

    # Drop rows with missing timestamps or location_id
    df = df.dropna(subset=['timestamp', 'location_id'])

    # Fill missing numeric values with median
    num_cols = df.select_dtypes(include=np.number).columns
    df[num_cols] = df[num_cols].fillna(df[num_cols].median())

    # Remove duplicates
    df = df.drop_duplicates()

    # Optional: remove outliers (z-score > 3)
    for col in num_cols:
        z_scores = np.abs((df[col] - df[col].mean()) / df[col].std())
        df = df[z_scores < 3]

    print(f"Data cleaned: {len(df)} rows, {len(df.columns)} columns.")
    return df


if __name__ == "__main__":
    data = load_and_clean_data("data/raw/traffic.csv")
    data.to_csv("data/processed/traffic_cleaned.csv", index=False)
    print("✅ Cleaned data saved to data/processed/traffic_cleaned.csv")


In [None]:
"""
feature_prioritization.py
--------------------------
Scores features using RICE and classifies them using MoSCoW.
"""

import pandas as pd


def calculate_rice_score(df: pd.DataFrame) -> pd.DataFrame:
    """
    Calculate RICE score for features.
    Expects columns: 'Feature', 'Reach', 'Impact', 'Confidence', 'Effort'
    """
    df['RICE'] = (df['Reach'] * df['Impact'] * df['Confidence']) / df['Effort']
    return df.sort_values(by='RICE', ascending=False)


def assign_moscow_priority(rice_score: float) -> str:
    """
    Assign MoSCoW priority based on RICE score.
    """
    if rice_score >= 80:
        return "Must Have"
    elif rice_score >= 50:
        return "Should Have"
    elif rice_score >= 20:
        return "Could Have"
    else:
        return "Won't Have"


if __name__ == "__main__":
    # Example feature scoring dataset
    data = {
        "Feature": ["GPS Tracking", "Dynamic Ticketing", "Traffic Forecasting", "Modal Shift Campaign"],
        "Reach": [90, 85, 70, 60],
        "Impact": [5, 4, 5, 3],
        "Confidence": [0.9, 0.8, 0.85, 0.7],
        "Effort": [2, 3, 5, 2]
    }

    df = pd.DataFrame(data)

    # Calculate RICE
    df = calculate_rice_score(df)

    # Assign MoSCoW
    df['MoSCoW'] = df['RICE'].apply(assign_moscow_priority)

    print("📊 Feature Prioritization:")
    print(df[['Feature', 'RICE', 'MoSCoW']])

    # Save results
    df.to_csv("results/feature_prioritization.csv", index=False)
    print("✅ Results saved to results/feature_prioritization.csv")
