In [None]:
"""
Flood Report Street Frequency Analysis

Loads raw street names and raw flood reports from a CSV file, Uses regex to normalize text (removes common street abbreviations, removes
punctuation, and lowercases data), and tallies incidence of street flooding to help identify the most flood-prone areas within data.
"""

import re
from collections import Counter
import pandas as pd

In [19]:
def normalize_data(data: str) -> str:
    """
    Normalizes a Street Name or Report String.
    - Lowercase
    - Remove Punctuation
    - Strip Common Street Suffixes
    - Strip Whitespace

    Args:
    data (str): Input string.

    Returns:
    str: Normalized string.
    """
    if pd.isna(data):
        return ""
    
    data = data.lower()
    data = re.sub(r"[^\w\s]", "", data)
    data = re.sub(r"\b(ave|blvd|ct|dr|ln|pkwy|pl|rd|st)\b", "", data)
    return data.strip()

In [None]:
def count_street_mentions(csv_path: str, str_col: str, rep_col: str) -> pd.DataFrame:
    """
    Counts how many times each normalized street appears in normalized reports.

    Args:
    csv_path (str): Path to CSV file containing raw street and report columns.

    Returns:
    pd.DataFrame: Table of streets with their mention frequencies.
    """
    # Load dataset
    df = pd.read_csv(csv_path)

    # Build dictionary: normalized -> original
    street_map = {
        normalize_data(street): street
        for street in df[str_col].dropna().unique()
    }

    # Normalize columns
    streets = df[str_col].dropna().apply(normalize_data).tolist()
    reports = df[rep_col].dropna().apply(normalize_data).tolist()

    # Count mentions
    counts = Counter()
    for report in reports:
        for street in streets: 
            if re.search(rf"\b{re.escape(street)}\b", report):
             counts[street] += 1

    # Convert results to DataFrame
    results = pd.DataFrame(counts.items(), columns=["Street_Normalized", "Frequency"])
    results = results.sort_values(by="Frequency", ascending=False).reset_index(drop=True)

    # Add back original names using dictionary
    results["Street"] = results["Street_Normalized"].map(street_map)
    
    return results

In [None]:
if __name__ == "__main__":
    results = count_street_mentions("Luling_Demo.csv", "LULING STREETS", "LULING REPORTS")
    print("Flood Event Mentions by Street:")
    print(results.to_string(index=False))

Flood Event Mentions by Street (Ranked):
Street_Normalized  Frequency                 Street
           barton         99             BARTON AVE
         monsanto         54           MONSANTO AVE
         lakewood         46            LAKEWOOD DR
             wade         42                WADE ST
            river         42               RIVER RD
          milling         35            MILLING AVE
              oak         28                 OAK LN
              ivy         28                 IVY LN
             post         28                POST DR
            davis         25               DAVIS DR
           mimosa         21             MIMOSA AVE
           kinler         20              KINLER ST
       willowdale         18        WILLOWDALE BLVD
        courville         17           COURVILLE DR
             west         15                WEST CT
       santa cruz         14          SANTA CRUZ CT
         maryland         14            MARYLAND DR
           desoto      