## What Is The Effect Of The Earth's Temperature on Cyclonic Storms?

- https://berkeleyearth.org/data/
- https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.ncdc:C01552
- https://www.spc.noaa.gov/wcm/#data

        - https://www.spc.noaa.gov/wcm/data/SPC_severe_database_description.pdf

- https://www.ncei.noaa.gov/sites/g/files/anmtlf171/files/2025-04/IBTrACS_version4r01_Technical_Details.pdf
- https://www.ncei.noaa.gov/sites/default/files/2021-07/IBTrACS_v04_column_documentation.pdf

> Basin - All storms that have at least one position in that basin. This allows analysis of a
given basin but also means that different basin files should not be combined since some
storms will be in both files.

>SID* A unique storm identifier (SID) assigned by IBTrACS algorithm

### Earth's Temperature

In [14]:
import pandas as pd
import numpy as np
import re

NH_PATH = "NH.Ts+dSST.csv"
SH_PATH = "SH.Ts+dSST.csv"

MONTHS = ["Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"]
MONTH_NUM = {m:i+1 for i, m in enumerate(MONTHS)}

def read_and_long(path, hemisphere: str) -> pd.DataFrame:
    df = pd.read_csv(path, skip_blank_lines=True, header=1)
    # Standardize headers and drop auto Unnamed cols
    df.columns = [c.strip() for c in df.columns]
    df = df.loc[:, ~df.columns.str.match(r"^Unnamed")]

    # Find a year column, fallback to first column
    year_col = next((c for c in ["Year","year","YEAR","Date","date"] if c in df.columns), df.columns[0])

    # Map many header variants to Jan..Dec
    month_alias = {
        "JAN":"Jan","FEB":"Feb","MAR":"Mar","APR":"Apr","MAY":"May","JUN":"Jun",
        "JUL":"Jul","AUG":"Aug","SEP":"Sep","OCT":"Oct","NOV":"Nov","DEC":"Dec",
        "Jan":"Jan","Feb":"Feb","Mar":"Mar","Apr":"Apr","May":"May","Jun":"Jun",
        "Jul":"Jul","Aug":"Aug","Sep":"Sep","Oct":"Oct","Nov":"Nov","Dec":"Dec",
        "JAN.":"Jan","FEB.":"Feb","MAR.":"Mar","APR.":"Apr","MAY.":"May","JUN.":"Jun",
        "JUL.":"Jul","AUG.":"Aug","SEP.":"Sep","OCT.":"Oct","NOV.":"Nov","DEC.":"Dec",
    }

    rename = {}
    month_cols = []
    for c in df.columns:
        if c == year_col:
            continue
        token = re.sub(r"[^A-Za-z]", "", c)  # strip digits, dots, asterisks
        if token in month_alias:
            rename[c] = month_alias[token]
            month_cols.append(c)

    # If detection failed, fall back to literal Jan..Dec if present
    if not month_cols:
        for m in MONTHS:
            if m in df.columns:
                month_cols.append(m)
                rename[m] = m

    # Rename and keep only Year + months
    if rename:
        df = df.rename(columns=rename)
    keep = [year_col] + [m for m in MONTHS if m in df.columns]
    df = df[keep]

    # Numeric coercion
    df[year_col] = pd.to_numeric(df[year_col], errors="coerce")
    for m in keep[1:]:
        df[m] = pd.to_numeric(df[m], errors="coerce")

    # Long format
    long_df = df.melt(id_vars=[year_col], var_name="month", value_name="temp")
    long_df = long_df.dropna(subset=[year_col, "month"]).copy()
    long_df["hemisphere"] = hemisphere

    # Build a month midpoint date
    year_int = long_df[year_col].apply(lambda x: int(np.floor(x)) if pd.notna(x) else np.nan)
    long_df["date"] = pd.to_datetime(
        {"year": year_int, "month": long_df["month"].map(MONTH_NUM), "day": 15},
        errors="coerce"
    )

    long_df = long_df.rename(columns={year_col: "year"})
    long_df = long_df.sort_values(["hemisphere", "year", "month"], kind="mergesort").reset_index(drop=True)
    return long_df[["hemisphere","year","month","date","temp"]]

# Build outputs
nh_long = read_and_long(NH_PATH, "NH")
sh_long = read_and_long(SH_PATH, "SH")
temperature_df = pd.concat([nh_long, sh_long], ignore_index=True)

In [15]:
temperature_df

Unnamed: 0,hemisphere,year,month,date,temp
0,NH,1880,Apr,1880-04-15,-0.31
1,NH,1880,Aug,1880-08-15,-0.28
2,NH,1880,Dec,1880-12-15,-0.41
3,NH,1880,Feb,1880-02-15,-0.52
4,NH,1880,Jan,1880-01-15,-0.37
...,...,...,...,...,...
3499,SH,2025,Mar,2025-03-15,0.88
3500,SH,2025,May,2025-05-15,0.94
3501,SH,2025,Nov,2025-11-15,
3502,SH,2025,Oct,2025-10-15,


### Hurricanes & Tropical Cyclones

In [20]:
df = pd.read_csv("noaa_hurricanes.csv")

# Helper to find likely column names regardless of exact casing/naming
def find_col(candidates, columns):
    lower = {c.lower(): c for c in columns}
    for cand in candidates:
        if cand in lower:
            return lower[cand]
    for c in columns:
        lc = c.lower()
        if any(cand in lc for cand in candidates):
            return c
    return None

iso_col = find_col(["iso_time", "iso time", "isotime", "time_iso", "time"], df.columns)
lat_col = find_col(["latitude", "lat"], df.columns)
lon_col = find_col(["longitude", "lon", "lng"], df.columns)

if iso_col is None:
    raise ValueError("ISO time column not found")
if lat_col is None:
    raise ValueError("Latitude column not found")
if lon_col is None:
    raise ValueError("Longitude column not found")

# Parse to datetime
df["_parsed_dt"] = pd.to_datetime(df[iso_col], errors="coerce", utc=True)

# Snap each record to the 15th of its month
month_start = df["_parsed_dt"].dt.to_period("M").dt.to_timestamp()
df["month_15"] = month_start + pd.Timedelta(days=14)  # 1st + 14 days = 15th

# Optional string version like YYYY-MM-15
df["iso_time_15th"] = df["month_15"].dt.strftime("%Y-%m-15")

# Hemisphere: NH for lat >= 0, SH for lat < 0
df["hemisphere"] = df[lat_col].apply(
    lambda x: "NH" if pd.notna(x) and float(x) >= 0 else ("SH" if pd.notna(x) else pd.NA)
)

hurricanes_df = df.copy()

  month_start = df["_parsed_dt"].dt.to_period("M").dt.to_timestamp()


In [21]:
hurricanes_df

Unnamed: 0,season,sid,basin,latitude,longitude,iso_time,usa_sshs,usa_wind,_parsed_dt,month_15,iso_time_15th,hemisphere
0,1990,1989349S08065,SI,-10.8700,55.4851,1989-12-20 21:00:00 UTC,4,135.0,1989-12-20 21:00:00+00:00,1989-12-15,1989-12-15,SH
1,1990,1989349S08065,SI,-13.6008,56.8324,1989-12-24 21:00:00 UTC,1,82.0,1989-12-24 21:00:00+00:00,1989-12-15,1989-12-15,SH
2,1990,1990031S09070,SI,-16.1083,67.0982,1990-02-04 09:00:00 UTC,2,92.0,1990-02-04 09:00:00+00:00,1990-02-15,1990-02-15,SH
3,1990,1990208N18132,WP,28.8612,139.3840,1990-08-02 09:00:00 UTC,1,82.0,1990-08-02 09:00:00+00:00,1990-08-15,1990-08-15,NH
4,1990,1990236N13314,,16.9425,-58.1225,1990-08-27 15:00:00 UTC,2,92.0,1990-08-27 15:00:00+00:00,1990-08-15,1990-08-15,NH
...,...,...,...,...,...,...,...,...,...,...,...,...
224989,2015,2015117S12115,SI,-17.8000,109.9250,2015-04-30 06:00:00 UTC,4,120.0,2015-04-30 06:00:00+00:00,2015-04-15,2015-04-15,SH
224990,2015,2015263N14148,WP,24.2088,122.0160,2015-09-28 09:00:00 UTC,4,120.0,2015-09-28 09:00:00+00:00,2015-09-15,2015-09-15,NH
224991,2016,2016278N23300,,30.6000,-66.2000,2016-10-13 06:00:00 UTC,4,120.0,2016-10-13 06:00:00+00:00,2016-10-15,2016-10-15,NH
224992,2018,2018073S09129,SI,-18.1025,106.1640,2018-03-22 15:00:00 UTC,4,120.0,2018-03-22 15:00:00+00:00,2018-03-15,2018-03-15,SH


### Tornadoes

In [22]:
# Load
df = pd.read_csv("1950-2024_actual_tornadoes.csv")

# Ensure types
df['slat'] = pd.to_numeric(df.get('slat'), errors='coerce')
df['slon'] = pd.to_numeric(df.get('slon'), errors='coerce')
df['date'] = pd.to_datetime(df.get('date'), errors='coerce', infer_datetime_format=True)

# Mid-month column set to the 15th of each record's month
month_start = df['date'].dt.to_period('M').dt.start_time
df['mid_month'] = month_start + pd.offsets.Day(14)  # 1st + 14 days = 15th

# Hemisphere from latitude
# NH if latitude > 0, SH if latitude < 0, Equator if exactly 0, NA if missing
df['hemisphere'] = np.where(
    df['slat'].isna(), pd.NA,
    np.where(df['slat'] > 0, 'NH',
             np.where(df['slat'] < 0, 'SH', 'Equator'))
)

tornadoes_df = df.copy()


  df['date'] = pd.to_datetime(df.get('date'), errors='coerce', infer_datetime_format=True)


In [23]:
tornadoes_df

Unnamed: 0,om,yr,mo,dy,date,time,tz,st,stf,stn,...,ns,sn,sg,f1,f2,f3,f4,fc,mid_month,hemisphere
0,192,1950,10,1,1950-10-01,21:00:00,3,OK,40,23,...,1,1,1,25,0,0,0,0,1950-10-15,NH
1,193,1950,10,9,1950-10-09,02:15:00,3,NC,37,9,...,1,1,1,47,0,0,0,0,1950-10-15,NH
2,195,1950,11,20,1950-11-20,02:20:00,3,KY,21,1,...,1,1,1,177,0,0,0,0,1950-11-15,NH
3,196,1950,11,20,1950-11-20,04:00:00,3,KY,21,2,...,1,1,1,209,0,0,0,0,1950-11-15,NH
4,197,1950,11,20,1950-11-20,07:30:00,3,MS,28,14,...,1,1,1,101,0,0,0,0,1950-11-15,NH
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
71808,624887,2024,9,7,2024-09-07,05:58:00,3,IN,18,0,...,1,1,1,127,0,0,0,0,2024-09-15,NH
71809,624888,2024,9,9,2024-09-09,14:19:00,3,WY,56,0,...,1,1,1,17,0,0,0,0,2024-09-15,NH
71810,624889,2024,9,9,2024-09-09,14:33:00,3,NY,36,0,...,1,1,1,29,0,0,0,0,2024-09-15,NH
71811,624890,2024,9,9,2024-09-09,15:15:00,3,NY,36,0,...,1,1,1,45,0,0,0,0,2024-09-15,NH
