-
Notifications
You must be signed in to change notification settings - Fork 18
/
pdo.py
52 lines (42 loc) · 1.85 KB
/
pdo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import datetime
from pathlib import Path
import pandas as pd
from wsfr_read.config import DATA_ROOT
FILE_PATH_PARTS = ("teleconnections", "pdo.txt")
def read_pdo_data(
issue_date: str | datetime.date | pd.Timestamp,
path: Path | None = None,
) -> pd.DataFrame:
"""Loads the Pacific Decadal Oscillation (PDO) index data appropriately subset for issue_date.
Args:
issue_date (str | datetime.date | pd.Timestamp): Issue date of forecast. This is used to
subset the data returned so that the forecast does not use future data. Must be
something pandas.to_datetime can parse, e.g., the string "2021-03-15".
path (Path | None, optional): Path to data file. Default of None will use a default
path relative to the data root directory.
Returns:
pd.DataFrame: Dataframe with the columns "year", "month", "pdo_index"
"""
issue_date = pd.to_datetime(issue_date)
# Melt to get long format
df = pd.melt(
frame=_read_full_pdo_data(path=path),
id_vars=("Year",),
var_name="month",
value_name="pdo_index",
)
df = df.rename(columns={"Year": "year"})
# Convert month abbrevation to number value
df["month"] = df["month"].apply(datetime.datetime.strptime, args=(r"%b",)).dt.month
df = df.sort_values(["year", "month"], ignore_index=True)
# Subset by issue_date
df = df[
(df["year"] < issue_date.year)
| ((df["year"] == issue_date.year) & (df["month"] < issue_date.month))
]
return df
def _read_full_pdo_data(path: Path | None = None) -> pd.DataFrame:
"""Loads the full PDO index dataframe. You should use the `read_pdo_data` function instead to
properly subset by time."""
path = path or DATA_ROOT.joinpath(*FILE_PATH_PARTS)
return pd.read_fwf(path, widths=(5,) + (6,) * 12, skiprows=1, na_values=("99.99",))