# Unique filenames for Climate Forecast System (CFS)

[NCEP Description](https://www.nco.ncep.noaa.gov/pmb/products/cfs/)

## Time Series Data

In [39]:
import requests
from bs4 import BeautifulSoup
import polars as pl

# URL of the web directory
url = "https://nomads.ncep.noaa.gov/pub/data/nccf/com/cfs/prod/cfs.20241229/06/time_grib_01/"

# Step 1: Fetch the directory listing
response = requests.get(url)
response.raise_for_status()  # Raise an error if the request failed

# Step 2: Parse the HTML content
soup = BeautifulSoup(response.text)

# Step 3: Extract file names and additional metadata if available
files = []
for link in soup.find_all("a"):
    href = link.get("href")
    if href and not href.startswith("?") and not href.endswith("/"):
        files.append({"filename": href})

# Step 4: Convert to a Polars DataFrame
df = pl.DataFrame(files)

# Only grib files (not index files)
df = df.filter(pl.col("filename").str.ends_with(".grb2"))

df

filename
str
"""chi200.01.2024122906.daily.grb…"
"""chi850.01.2024122906.daily.grb…"
"""cprat.01.2024122906.daily.grb2"""
"""csdlf.01.2024122906.daily.grb2"""
"""csdsf.01.2024122906.daily.grb2"""
…
"""z200.01.2024122906.daily.grb2"""
"""z500.01.2024122906.daily.grb2"""
"""z700.01.2024122906.daily.grb2"""
"""z850.01.2024122906.daily.grb2"""


In [50]:
a = df.select(pl.col("filename").str.split(".")).select(
    a=pl.col("filename")
    .list.slice(0, pl.col("filename").list.len() - 4)
    .list.join("."),
    b=pl.col("filename").list.get(-4),
    c=pl.col("filename").list.get(-3),
    d=pl.col("filename").list.get(-2),
    e=pl.col("filename").list.get(-1),
)
a

a,b,c,d,e
str,str,str,str,str
"""chi200""","""01""","""2024122906""","""daily""","""grb2"""
"""chi850""","""01""","""2024122906""","""daily""","""grb2"""
"""cprat""","""01""","""2024122906""","""daily""","""grb2"""
"""csdlf""","""01""","""2024122906""","""daily""","""grb2"""
"""csdsf""","""01""","""2024122906""","""daily""","""grb2"""
…,…,…,…,…
"""z200""","""01""","""2024122906""","""daily""","""grb2"""
"""z500""","""01""","""2024122906""","""daily""","""grb2"""
"""z700""","""01""","""2024122906""","""daily""","""grb2"""
"""z850""","""01""","""2024122906""","""daily""","""grb2"""


In [59]:
for i in a.columns:
    print(i, a[i].unique().sort().to_numpy())


a ['chi200' 'chi850' 'cprat' 'csdlf' 'csdsf' 'csusf' 'dlwsfc' 'dswsfc'
 'gflux' 'icecon' 'icethk' 'ipv450' 'ipv550' 'ipv650' 'lhtfl' 'nddsf'
 'ocndt10c' 'ocndt15c' 'ocndt2.5c' 'ocndt20c' 'ocndt25c' 'ocndt28c'
 'ocndt5c' 'ocnheat' 'ocnmld' 'ocnsal15' 'ocnsal5' 'ocnsild' 'ocnslh'
 'ocnsst' 'ocnt15' 'ocntchp' 'ocnu15' 'ocnu5' 'ocnv15' 'ocnv5' 'ocnvv55'
 'prate' 'pressfc' 'prmsl' 'psi200' 'psi850' 'pwat' 'q2m' 'q500' 'q700'
 'q850' 'q925' 'runoff' 'shtfl' 'snohf' 'soilm1' 'soilm2' 'soilm3'
 'soilm4' 'soilt1' 't1000' 't2' 't200' 't250' 't50' 't500' 't700' 't850'
 'tcdcclm' 'tmax' 'tmin' 'tmp2m' 'tmphy1' 'tmpsfc' 'ulwsfc' 'ulwtoa'
 'uswsfc' 'uswtoa' 'vddsf' 'vvel500' 'weasd' 'wnd1000' 'wnd10m' 'wnd200'
 'wnd250' 'wnd500' 'wnd700' 'wnd850' 'wnd925' 'wndstrs' 'z1000' 'z200'
 'z500' 'z700' 'z850']
b ['01']
c ['2024122906']
d ['daily']
e ['grb2']


## 6 hour mean

In [62]:
import requests
from bs4 import BeautifulSoup
import polars as pl

# URL of the web directory
url = "https://nomads.ncep.noaa.gov/pub/data/nccf/com/cfs/prod/cfs.20241229/06/6hrly_grib_02/"

# Step 1: Fetch the directory listing
response = requests.get(url)
response.raise_for_status()  # Raise an error if the request failed

# Step 2: Parse the HTML content
soup = BeautifulSoup(response.text)

# Step 3: Extract file names and additional metadata if available
files = []
for link in soup.find_all("a"):
    href = link.get("href")
    if href and not href.startswith("?") and not href.endswith("/"):
        files.append({"filename": href})

# Step 4: Convert to a Polars DataFrame
df = pl.DataFrame(files)

# Only grib files (not index files)
df = df.filter(pl.col("filename").str.ends_with(".grb2"))

df

filename
str
"""flxf2024122906.02.2024122906.g…"
"""flxf2024122912.02.2024122906.g…"
"""flxf2024122918.02.2024122906.g…"
"""flxf2024123000.02.2024122906.g…"
"""flxf2024123006.02.2024122906.g…"
…
"""pgbf2025021112.02.2024122906.g…"
"""pgbf2025021118.02.2024122906.g…"
"""pgbf2025021200.02.2024122906.g…"
"""pgbf2025021206.02.2024122906.g…"


In [63]:
a = df.select(pl.col("filename").str.split(".")).select(
    a=pl.col("filename")
    .list.slice(0, pl.col("filename").list.len() - 4)
    .list.join("."),
    b=pl.col("filename").list.get(-4),
    c=pl.col("filename").list.get(-3),
    d=pl.col("filename").list.get(-2),
    e=pl.col("filename").list.get(-1),
)
a


a,b,c,d,e
str,str,str,str,str
"""""","""flxf2024122906""","""02""","""2024122906""","""grb2"""
"""""","""flxf2024122912""","""02""","""2024122906""","""grb2"""
"""""","""flxf2024122918""","""02""","""2024122906""","""grb2"""
"""""","""flxf2024123000""","""02""","""2024122906""","""grb2"""
"""""","""flxf2024123006""","""02""","""2024122906""","""grb2"""
…,…,…,…,…
"""""","""pgbf2025021112""","""02""","""2024122906""","""grb2"""
"""""","""pgbf2025021118""","""02""","""2024122906""","""grb2"""
"""""","""pgbf2025021200""","""02""","""2024122906""","""grb2"""
"""""","""pgbf2025021206""","""02""","""2024122906""","""grb2"""


In [64]:
for i in a.columns:
    print(i, a[i].unique().sort().to_numpy())


a ['']
b ['flxf2024122906' 'flxf2024122912' 'flxf2024122918' 'flxf2024123000'
 'flxf2024123006' 'flxf2024123012' 'flxf2024123018' 'flxf2024123100'
 'flxf2024123106' 'flxf2024123112' 'flxf2024123118' 'flxf2025010100'
 'flxf2025010106' 'flxf2025010112' 'flxf2025010118' 'flxf2025010200'
 'flxf2025010206' 'flxf2025010212' 'flxf2025010218' 'flxf2025010300'
 'flxf2025010306' 'flxf2025010312' 'flxf2025010318' 'flxf2025010400'
 'flxf2025010406' 'flxf2025010412' 'flxf2025010418' 'flxf2025010500'
 'flxf2025010506' 'flxf2025010512' 'flxf2025010518' 'flxf2025010600'
 'flxf2025010606' 'flxf2025010612' 'flxf2025010618' 'flxf2025010700'
 'flxf2025010706' 'flxf2025010712' 'flxf2025010718' 'flxf2025010800'
 'flxf2025010806' 'flxf2025010812' 'flxf2025010818' 'flxf2025010900'
 'flxf2025010906' 'flxf2025010912' 'flxf2025010918' 'flxf2025011000'
 'flxf2025011006' 'flxf2025011012' 'flxf2025011018' 'flxf2025011100'
 'flxf2025011106' 'flxf2025011112' 'flxf2025011118' 'flxf2025011200'
 'flxf2025011206' 'flxf20

In [None]:
# <name>f<forecastdatehour>.<member>.<validdatecycle>.grb2

2024122906
2024122906