In [47]:
import re
import pandas as pd
data = pd.read_csv('/Users/acsoteldo/Desktop/datasets/Data Project 12 Dec 2025/data/NOAABillionDollarDisasters_dataset.csv')

In [48]:
data.head()
data.info()
data.describe()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 403 entries, 0 to 402
Data columns (total 7 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Name               403 non-null    object 
 1   Disaster           403 non-null    object 
 2   Begin Date         403 non-null    int64  
 3   End Date           403 non-null    int64  
 4   CPI-Adjusted Cost  403 non-null    float64
 5   Unadjusted Cost    403 non-null    float64
 6   Deaths             403 non-null    int64  
dtypes: float64(2), int64(3), object(2)
memory usage: 22.2+ KB


Unnamed: 0,Begin Date,End Date,CPI-Adjusted Cost,Unadjusted Cost,Deaths
count,403.0,403.0,403.0,403.0,403.0
mean,20100850.0,20101030.0,7237.871216,5111.904963,41.980149
std,120975.7,120870.1,18086.543452,13540.309989,195.234594
min,19800410.0,19800420.0,1063.8,400.0,0.0
25%,20020850.0,20021020.0,1627.0,1104.75,0.5
50%,20130910.0,20131120.0,2456.9,1738.5,5.0
75%,20200760.0,20200810.0,4923.2,3235.65,25.0
max,20241010.0,20241230.0,201297.5,125029.5,2981.0


In [49]:
data.dropna()
data.drop_duplicates()

Unnamed: 0,Name,Disaster,Begin Date,End Date,CPI-Adjusted Cost,Unadjusted Cost,Deaths
0,Southern Severe Storms and Flooding (April 1980),Flooding,19800410,19800417,2749.4,706.8,7
1,Hurricane Allen (August 1980),Tropical Cyclone,19800807,19800811,2236.2,590.0,13
2,Central/Eastern Drought/Heat Wave (Summer-Fall...,Drought,19800601,19801130,40681.2,10020.0,1260
3,Florida Freeze (January 1981),Freeze,19810112,19810114,2076.4,572.0,0
4,"Severe Storms, Flash Floods, Hail, Tornadoes (...",Severe Storm,19810505,19810510,1409.1,401.4,20
...,...,...,...,...,...,...,...
398,Hurricane Debby (August 2024),Tropical Cyclone,20240805,20240809,2476.0,2476.0,17
399,Hurricane Francine (September 2024),Tropical Cyclone,20240911,20240912,1291.0,1291.0,0
400,Hurricane Helene (September 2024),Tropical Cyclone,20240924,20240929,78721.0,78721.0,219
401,Hurricane Milton (October 2024),Tropical Cyclone,20241009,20241010,34250.0,34250.0,32


In [50]:
data = data.rename(columns={
    "Name": "event_name",
    "Disaster": "disaster_type",
    "Begin Date": "begin_date",
    "End Date": "end_date",
    "CPI-Adjusted Cost": "cost_cpi_musd",
    "Unadjusted Cost": "cost_nominal_musd",
    "Deaths": "deaths"
})

In [51]:
data["begin_date"] = pd.to_datetime(
    data["begin_date"].astype(str),
    format="%Y%m%d",
    errors="coerce"
)

data["end_date"] = pd.to_datetime(
    data["end_date"].astype(str),
    format="%Y%m%d",
    errors="coerce"
)

In [52]:
num_cols = [
    "cost_cpi_musd",
    "cost_nominal_musd",
    "deaths"
]

for c in num_cols:
    data[c] = pd.to_numeric(data[c], errors="coerce")

In [53]:
type_map = {
    "Flooding": "Flood",
    "Tropical Cyclone": "Hurricane",
    "Severe Storm": "Severe Storm",
    "Drought": "Drought",
    "Freeze": "Freeze",
    "Winter Storm": "Winter Storm",
    "Wildfire": "Wildfire",
    "Heat Wave": "Heat Wave"
}

data["disaster_group"] = (
    data["disaster_type"]
    .map(type_map)
    .fillna(data["disaster_type"])
)

In [54]:
# Tier A: Explicit South Florida ONLY
sf_only_keywords = [
    r"\bSouth Florida\b",
    r"\bFort Lauderdale\b",
    r"\bMiami\b",
    r"\bMiami-Dade\b",
    r"\bBroward\b",
    r"\bPalm Beach\b",
    r"\bMonroe\b",
    r"\bFlorida Keys\b",
    r"\bKey West\b",
    r"\bEverglades\b",
]

# Tier B: Known storms that clearly affected SoFlo
# (curated + defensible)
sf_affected_allowlist = [
    "Hurricane Andrew",
    "Hurricane Wilma",
    "Hurricane Irma",
    "Hurricane Ian",
    "Hurricane Nicole",
    "Hurricane Idalia",
    "Tropical Storm Eta",
    "Tropical Storm Elsa",
    "Fort Lauderdale Flash Flood",
    "South Florida Flooding",
]

# Explicit NON-Florida regions
non_fl_markers = [
    "Midwest", "Plains", "North Central", "Central Plains",
    "Rockies", "Mountain West",
    "California", "Arizona", "Colorado",
    "Texas", "Oklahoma", "Kansas",
    "Ohio", "Pennsylvania", "Michigan", "Minnesota",
    "New England", "Northeast",
    "Missouri River", "Tennessee",
    "Hawaii", "Alaska", "Typhoon"
]

# Florida-inclusive regional labels (counts as affected, not only)
fl_inclusive_regions = [
    "Southeast",
    "Gulf States",
    "East Coast",
    "South and Southeast",
    "Southeastern"
]

def classify_south_florida(event_name: str, disaster_type: str) -> str:
    name = (event_name or "").strip()

    # Tier A: Explicit South Florida
    if any(re.search(pat, name, flags=re.IGNORECASE) for pat in sf_only_keywords):
        return "south_florida_only"

    # Tier B: Curated known SoFlo impacts
    if any(key.lower() in name.lower() for key in sf_affected_allowlist):
        return "south_florida_affected"

    # Tier C: Explicit non-Florida geography
    if any(m.lower() in name.lower() for m in non_fl_markers):
        return "not_relevant"

    # Tier D: Tropical cyclones
    if disaster_type == "Tropical Cyclone":
        # Pacific / non-Atlantic storms already excluded above
        return "south_florida_affected"

    # Tier E: Florida-inclusive regional wording
    if any(r.lower() in name.lower() for r in fl_inclusive_regions):
        return "south_florida_affected"

    # Tier F: Remaining vague "Southern / Central / Eastern"
    if any(v in name.lower() for v in ["southern", "central", "eastern"]):
        return "not_relevant"

    return "not_relevant"


data["south_florida_relevance"] = data.apply(
    lambda r: classify_south_florida(r["event_name"], r["disaster_type"]),
    axis=1
)

In [56]:
# Count each flag value
flag_counts = (
    data["south_florida_relevance"]
    .value_counts(dropna=False)
    .rename_axis("south_florida_relevance")
    .reset_index(name="count")
)

flag_counts

Unnamed: 0,south_florida_relevance,count
0,not_relevant,299
1,south_florida_affected,102
2,south_florida_only,2


In [58]:
data.loc[
    data["south_florida_relevance"].isin(
        ["south_florida_only", "south_florida_affected"]
    )
].to_csv(
    "/Users/acsoteldo/Desktop/datasets/Data Project 12 Dec 2025/data/NOAABillionDollarDisasters_dataset CLEANED.csv",
    index=False
)