First, set the time range to fetch PIREPs for:

In [17]:
import datetime as dt

date_s = dt.datetime(2024, 11, 16, 0, 0, 0, tzinfo=dt.timezone.utc)
date_e = dt.datetime(2024, 11, 17, 0, 0, 0, tzinfo=dt.timezone.utc)

Construct the URL to fetch PIREPs from:

In [18]:
from urllib import parse

base_url = "https://mesonet.agron.iastate.edu/cgi-bin/request/gis/pireps.py?"
params = {
    "sts": date_s.isoformat().replace("+00:00", "Z"),
    "ets": date_e.isoformat().replace("+00:00", "Z"),
    "artcc": "_ALL",
    "fmt": "csv",
}
url = base_url + parse.urlencode(params)
url

'https://mesonet.agron.iastate.edu/cgi-bin/request/gis/pireps.py?sts=2024-11-16T00%3A00%3A00Z&ets=2024-11-17T00%3A00%3A00Z&artcc=_ALL&fmt=csv'

Next, read the CSV file from the URL:

In [19]:
import pandas as pd

df = pd.read_csv(url)

In [20]:
df = df[["VALID", "REPORT"]]
df = df.rename(columns={"VALID": "Timestamp", "REPORT": "Report"})
df["Timestamp"] = pd.to_datetime(df["Timestamp"], format=f"%Y%m%d%H%M")

Clean up the columns and extract the relevant data:

In [None]:
FLAGS = {
    "Station": r"^(\w{3,4})",
    "Urgency": r"(UAA?)",
    "Location": r"/OV\s?(.*?)\s?/TM",
    "Flight Level": r"/FL\s?(\d{3})\s?/TP",
    "Aircraft Type": r"/TP\s?(.*?)\s?/|$",
    "Turbulence": r"/TB\s?(.*?)\s?/|$",
}

In [22]:
for flag in FLAGS:
    df[flag] = df["Report"].str.extract(FLAGS[flag])

In [23]:
# Replace unknown stations
df["Station"] = df["Station"].fillna("None")

In [24]:
# Replace unknown flight levels
# TODO: Revisit this
df["Flight Level"] = pd.to_numeric(df["Flight Level"].fillna(value=0)) * 100

In [25]:
# Replace urgencies
df["Urgency"] = df["Urgency"].replace({"UA": "Routine", "UAA": "Urgent"})

In [26]:
# Replace unknown turbulence by assuming no turbulence
# TODO: Revisit this
df["Turbulence"] = df["Turbulence"].fillna("NEG")
df["Turbulence"] = df["Turbulence"].replace(["SMOOTH", "NONE"], "NEG")

In [27]:
df["Turbulence"].value_counts()

Turbulence
NEG                          2017
MOD                            46
LGT CHOP                       44
OCNL LGT CHOP                  32
MOD CHOP                       25
                             ... 
CONS LGT-MOD CHOP 195-220       1
LGT CONS 260-280                1
MOD 010                         1
CONS CHOP                       1
INTMT MOD CONS LGT              1
Name: count, Length: 174, dtype: int64

In [28]:
df["Turbulence Duration"] = df["Turbulence"].str.extract(r"((?:INT(?:ER)?M(?:ITTEN)?T)|(?:OC(?:C?ASS?IO)?NA?L)|(?:CON(?:TINUOU)?S(?:TANT)?))")
df["Turbulence Intensity"] = df["Turbulence"].str.extract(r"((?:NEG)|(?:LI?G?H?T)|(?:MOD)|(?:SEV)|(?:EXTRE?ME?)(?:-(?:LI?G?H?T)|(?:MOD)|(?:SEV)|(?:EXTRE?ME?))?)")
df["Turbulence Type"] = df["Turbulence"].str.extract(r"((?:CAT)|(?:CHOP))")
df["Turbulence Altitude"] = df["Turbulence"].str.extract(r"((?:(?:BE?LOW?)|(?:ABO?VE?))?\s\d{3}(?:-\d{3})?)")

In [29]:
non_neg = df[df["Turbulence"] != "NEG"]
non_neg = non_neg[non_neg[["Turbulence Duration", "Turbulence Intensity", "Turbulence Type", "Turbulence Altitude"]].isna().all(axis=1)]
non_neg

Unnamed: 0,Timestamp,Report,Station,Urgency,Location,Flight Level,Aircraft Type,Turbulence,Turbulence Duration,Turbulence Intensity,Turbulence Type,Turbulence Altitude
1311,2024-11-16 17:07:00,ACY UA /OV KACY/TM 1707/FL005/TP C172/TB /TB L...,ACY,Routine,KACY,500,C172,,,,,
1841,2024-11-16 19:50:00,PAE UUA /OV PAE/TM 1950/FL015/TP C172/TB CLOM/...,PAE,Routine,PAE,1500,C172,CLOM,,,,
1961,2024-11-16 20:18:00,AMA UA /OV PNH180025/TM 2018/FLDURD/TP BE9L/TB...,AMA,Routine,PNH180025,0,BE9L,L-M,,,,
2341,2024-11-16 22:52:00,DLH UA /OV DLH/TM 2252/FL018/TP SR20/TB +/- 8/...,DLH,Routine,DLH,1800,SR20,+,,,,
2367,2024-11-16 23:01:00,MZZ UA /OV MZZ/TM 2301/FL250/TP B350/TB M18/IC...,MZZ,Routine,MZZ,25000,B350,M18,,,,


In [30]:
df = df.dropna(how="all", subset=["Turbulence Duration", "Turbulence Intensity", "Turbulence Type", "Turbulence Altitude"])

Parse aircraft types:

In [31]:
aircraft = pd.read_csv("AircraftTypeDesignators.csv")
aircraft = aircraft[["Type Designator", "WTC"]]
aircraft = aircraft.rename(columns={
    "Type Designator": "Aircraft Type", 
    "WTC": "Size Class",
})
aircraft["Size Class"] = aircraft["Size Class"].replace({
    "L": "Light",
    "L/M": "Light/Medium",
    "M": "Medium",
    "H": "Heavy",
    "J": "Heavy",
})
aircraft = aircraft.drop_duplicates()
aircraft

Unnamed: 0,Aircraft Type,Size Class
0,J328,Medium
1,UL45,Light
2,TR55,Light
6,CE22,Light
7,ELSP,Light
...,...,...
7261,CH30,Light
7266,CH62,Light
7267,CH64,Light
7283,EDGE,Light


In [32]:
df.loc[:, "Aircraft Type"] = df["Aircraft Type"].replace(aircraft["Aircraft Type"].values, aircraft["Size Class"].values)
df

Unnamed: 0,Timestamp,Report,Station,Urgency,Location,Flight Level,Aircraft Type,Turbulence,Turbulence Duration,Turbulence Intensity,Turbulence Type,Turbulence Altitude
0,2024-11-16 00:00:00,SAW UA /OV KSAW/TM 0000/FL036/TP B190/SK OVC03...,SAW,Routine,KSAW,3600,Medium,NEG,,NEG,,
1,2024-11-16 00:00:00,CRG UA /OV CRG/TM 0000/FL370/TP B738/TB NEG,CRG,Routine,CRG,37000,Medium,NEG,,NEG,,
2,2024-11-16 00:02:00,MTJ UA /OV MTJ045010/TM 0002/FL180/TP EA50/TB ...,MTJ,Routine,MTJ045010,18000,Light,MOD 200-180,,MOD,,200-180
3,2024-11-16 00:02:00,PGA UA /OV TBC080040/TM 0002/FL200/TP PC12/TB ...,PGA,Routine,TBC080040,20000,Light,NEG,,NEG,,
4,2024-11-16 00:03:00,ORD UA /OV CGT330020/TM 0003/FL060/TP B738/SK ...,ORD,Routine,CGT330020,6000,Medium,NEG,,NEG,,
...,...,...,...,...,...,...,...,...,...,...,...,...
2465,2024-11-16 23:57:00,PLU UA /OV SEA160016 /TM 2357 /FL040 /TP C172 ...,PLU,Routine,SEA160016,4000,Light,NEG,,NEG,,
2466,2024-11-16 23:58:00,UA /OV 5430N11900W /TM 2358 /FL310 /TP B77L /T...,,Routine,5430N11900W,31000,Heavy,NEG,,NEG,,
2467,2024-11-16 23:58:00,MKK UA /OV BAMBO/TM 2358/FL130/TP A333/SK TOPS...,MKK,Routine,BAMBO,13000,Heavy,NEG,,NEG,,
2468,2024-11-16 23:59:00,CID UA /OV CID/TM 2359/FL300/TP B738/TB CONS L...,CID,Routine,CID,30000,Medium,NEG,,NEG,,


Parse locations:

In [None]:
import re

FORMAT_LATLON = re.compile(
    (
        r"\s?(?P<lat>[0-9]{2,4})(?P<latsign>[NS])"
        r"\s?(?P<lon>[0-9]{2,5})(?P<lonsign>[EW])"
    )
)
FORMAT_LOCDIR = re.compile(
    r".*?(?P<loc>[A-Z0-9]{3,4})\s?(?P<dir>[0-9]{3})(?P<dist>[0-9]{3})"
)
FORMAT_TWOLOC = re.compile(r"(?P<loc1>[A-Z0-9]{3,4})\s?-\s?(?P<loc2>[A-Z0-9]{3,4})")
FORMAT_OFFSET = re.compile(
    (
        r"(?P<dist>[0-9]{1,3})\s?"
        "(?P<dir>NORTH|EAST|SOUTH|WEST|N|NNE|NE|ENE|E|ESE|"
        r"SE|SSE|S|SSW|SW|WSW|W|WNW|NW|NNW)\s+(OF )?(?P<loc>[A-Z0-9]{3,4})"
    )
)