First, set the time range to fetch PIREPs for:

In [30]:
import datetime as dt

date_s = dt.datetime(2024, 11, 16, 0, 0, 0, tzinfo=dt.UTC)
date_e = dt.datetime(2024, 11, 17, 0, 0, 0, tzinfo=dt.UTC)

Construct the URL to fetch PIREPs from:

In [31]:
from urllib import parse

base_url = "https://mesonet.agron.iastate.edu/cgi-bin/request/gis/pireps.py?"
params = {
    "sts": date_s.isoformat().replace("+00:00", "Z"),
    "ets": date_e.isoformat().replace("+00:00", "Z"),
    "artcc": "_ALL",
    "fmt": "csv",
}
url = base_url + parse.urlencode(params)
url

'https://mesonet.agron.iastate.edu/cgi-bin/request/gis/pireps.py?sts=2024-11-16T00%3A00%3A00Z&ets=2024-11-17T00%3A00%3A00Z&artcc=_ALL&fmt=csv'

Next, read the CSV file from the URL:

In [32]:
import pandas as pd

df = pd.read_csv(url)

In [33]:
df = df[["VALID", "REPORT"]]
df = df.rename(columns={"VALID": "Timestamp", "REPORT": "Report"})
df["Timestamp"] = pd.to_datetime(df["Timestamp"], format=f"%Y%m%d%H%M")

Clean up the columns and extract the relevant data:

In [34]:
FLAGS = {
    "Station": r"^(\w{3,4})",
    "Urgency": r"(UAA?)",
    "Location": r"/OV\s?(.*?)\s?/TM",
    "Flight Level": r"/FL\s?(\d{3})\s?/TP",
    "Aircraft Type": r"/TP\s?(.*?)\s?/|$",
    "Turbulence": r"/TB\s?(.*?)\s?/|$",
}

In [35]:
for flag in FLAGS:
    df[flag] = df["Report"].str.extract(FLAGS[flag])

In [36]:
# Replace unknown stations
df["Station"] = df["Station"].fillna("None")

In [37]:
# Replace unknown flight levels
# TODO: Revisit this
df["Flight Level"] = pd.to_numeric(df["Flight Level"].fillna(value=0)) * 100

In [38]:
# Replace urgencies
df["Urgency"] = df["Urgency"].replace({"UA": "Routine", "UAA": "Urgent"})

In [39]:
# Replace unknown turbulence by assuming no turbulence
# TODO: Revisit this
df["Turbulence"] = df["Turbulence"].fillna("NEG")
df["Turbulence"] = df["Turbulence"].replace(["SMOOTH", "NONE"], "NEG")

In [40]:
df["Turbulence"].value_counts()

Turbulence
NEG                          2017
MOD                            46
LGT CHOP                       44
OCNL LGT CHOP                  32
MOD CHOP                       25
                             ... 
CONS LGT-MOD CHOP 195-220       1
LGT CONS 260-280                1
MOD 010                         1
CONS CHOP                       1
INTMT MOD CONS LGT              1
Name: count, Length: 174, dtype: int64

In [41]:
df["Turbulence Duration"] = df["Turbulence"].str.extract(r"((?:INT(?:ER)?M(?:ITTEN)?T)|(?:OC(?:C?ASS?IO)?NA?L)|(?:CON(?:TINUOU)?S(?:TANT)?))")
df["Turbulence Intensity"] = df["Turbulence"].str.extract(r"((?:NEG)|(?:LI?G?H?T)|(?:MOD)|(?:SEV)|(?:EXTRE?ME?)(?:-(?:LI?G?H?T)|(?:MOD)|(?:SEV)|(?:EXTRE?ME?))?)")
df["Turbulence Type"] = df["Turbulence"].str.extract(r"((?:CAT)|(?:CHOP))")
df["Turbulence Altitude"] = df["Turbulence"].str.extract(r"((?:(?:BE?LOW?)|(?:ABO?VE?))?\s\d{3}(?:-\d{3})?)")

In [47]:
non_neg = df[df["Turbulence"] != "NEG"]
non_neg = non_neg[non_neg[["Turbulence Duration", "Turbulence Intensity", "Turbulence Type", "Turbulence Altitude"]].isna().all(axis=1)]
non_neg

Unnamed: 0,Timestamp,Report,Station,Urgency,Location,Flight Level,Aircraft Type,Turbulence,Turbulence Duration,Turbulence Intensity,Turbulence Type,Turbulence Altitude
1311,2024-11-16 17:07:00,ACY UA /OV KACY/TM 1707/FL005/TP C172/TB /TB L...,ACY,Routine,KACY,500,C172,,,,,
1841,2024-11-16 19:50:00,PAE UUA /OV PAE/TM 1950/FL015/TP C172/TB CLOM/...,PAE,Routine,PAE,1500,C172,CLOM,,,,
1961,2024-11-16 20:18:00,AMA UA /OV PNH180025/TM 2018/FLDURD/TP BE9L/TB...,AMA,Routine,PNH180025,0,BE9L,L-M,,,,
2341,2024-11-16 22:52:00,DLH UA /OV DLH/TM 2252/FL018/TP SR20/TB +/- 8/...,DLH,Routine,DLH,1800,SR20,+,,,,
2367,2024-11-16 23:01:00,MZZ UA /OV MZZ/TM 2301/FL250/TP B350/TB M18/IC...,MZZ,Routine,MZZ,25000,B350,M18,,,,


In [50]:
df = df.dropna(how="all", subset=["Turbulence Duration", "Turbulence Intensity", "Turbulence Type", "Turbulence Altitude"])

Parse locations: