## Global Forecast System (GFS)

GFS is a global numerical weather prediction system operated by the National Centers for Environmental Prediction (NCEP) in the United States. It provides forecasts at a global scale with relatively coarse resolution (about 0.25 degrees) but can still offer useful information for regional forecasts, including Argentina.

#### GFS -Operational Real Time Forecast 

In [1]:
import numpy as np
import xarray as xr
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import pandas as pd
from datetime import datetime, timedelta
import tarfile

In [14]:
# Define the start and end times
start_time = datetime.now() #'2024-06-17' #'2024-06-01T00:00:00'
end_time = start_time + timedelta(days=10) #'2024-07-01' #2024-06-02T00:00:00'

# URL for GFS forecast data from Unidata THREDDS Data Server
url = 'https://thredds.ucar.edu/thredds/dodsC/grib/NCEP/GFS/Global_0p25deg/Best'

# Open the GFS dataset using xarray
dataset = xr.open_dataset(url)

# Define location
lat_location = -38.5
lon_location = -62.75
normalize_longitude = (lon_location + 360) % 360

# Define heights
height_80m = 80
height_100m = 100

# Filter the dataset for the specific time range and location
dataset_filtered = dataset.sel(
    time=slice(start_time, end_time),
    lat=lat_location,
    lon=normalize_longitude
)


# Extract specific variables at given heights
specific_humidity_80m = dataset_filtered['Specific_humidity_height_above_ground'].sel(height_above_ground5=height_80m)
v_wind_100m = dataset_filtered['v-component_of_wind_height_above_ground'].sel(height_above_ground2=height_100m)
u_wind_100m = dataset_filtered['u-component_of_wind_height_above_ground'].sel(height_above_ground2=height_100m)
temperature_100m = dataset_filtered['Temperature_height_above_ground'].sel(height_above_ground3=height_100m)
#pressure_80m = dataset_filtered['Pressure_height_above_ground'].sel(height_above_ground=height_80m)


# Convert xarray DataArrays to pandas DataFrames
df_specific_humidity_80m = specific_humidity_80m.to_dataframe(name='Specific_Humidity_80m').reset_index()
df_v_wind_100m = v_wind_100m.to_dataframe(name='V_Wind_100m').reset_index()
df_u_wind_100m = u_wind_100m.to_dataframe(name='U_Wind_100m').reset_index()
df_temperature_100m = temperature_100m.to_dataframe(name='Temperature_100m').reset_index()
df_pressure_80m = pressure_80m.to_dataframe(name='pressure_80m').reset_index()


In [15]:
# Merge DataFrames on the common columns (e.g., 'time', 'latitude', 'longitude')
# dependiendo el rango de fecha consultado la columna time, debe ser reemplazado por time1
#time_column = 'time'
time_column = 'time1'

df_merged = pd.merge(df_specific_humidity_80m[[time_column,'Specific_Humidity_80m']], df_v_wind_100m[[time_column,'V_Wind_100m']], on=[time_column])
df_merged = pd.merge(df_merged, df_u_wind_100m[[time_column,'U_Wind_100m']], on=[time_column])
df_merged = pd.merge(df_merged, df_temperature_100m[[time_column,'Temperature_100m']], on=[time_column])
df_merged = pd.merge(df_merged, df_pressure_80m[[time_column,'pressure_80m']], on=[time_column])

In [16]:
df_merged.tail()

Unnamed: 0,time1,Specific_Humidity_80m,V_Wind_100m,U_Wind_100m,Temperature_100m,pressure_80m
182,2024-09-08 18:00:00,0.007626,-6.86918,0.138794,299.645203,98743.585938
183,2024-09-08 21:00:00,0.008644,-7.510286,0.134998,299.250031,98697.648438
184,2024-09-09 00:00:00,0.009595,-11.081324,-2.837847,295.074982,98763.765625
185,2024-09-09 03:00:00,0.010021,-5.010908,5.425535,292.614136,98893.539062
186,2024-09-09 06:00:00,0.008277,0.887576,6.357952,290.908386,98934.023438


##### Convert UTC(0) to UTC(-3)

In [33]:
# Define the time difference (3 hours) UTC(0) to UTC(-3)
time_difference = np.timedelta64(3, 'h')

# Subtract the time difference from the time coordinate
df_merged['time'] = df_merged['time'] - time_difference

df_merged.rename(columns={'time': 'FechaHora'}, inplace=True)

##### Calculate Wind Speed

In [34]:
# Calculate wind speed from u and v components (magnitude of the vector)
df_merged['ws100'] = np.sqrt(df_merged['U_Wind_100m']**2 + df_merged['V_Wind_100m']**2)

##### Calculate Wind Direction

In [35]:
# Calculate wind dir from u and v components (magnitude of the vector)

# Calculate the angle in radians
theta = np.arctan2(df_merged['V_Wind_100m'], df_merged['U_Wind_100m'])

# Convert the angle from radians to degrees
theta_deg = np.degrees(theta)

df_merged['dir100'] = (270 - theta_deg) % 360

##### Convert Temoerature to °C

In [36]:
# Convert temperature from Kelvin to Celsius
df_merged['temp'] = df_merged['Temperature_100m'] - 273.15

##### Calculate Air density

In [37]:
# Constants
R_dry = 287.058  # Specific gas constant for dry air, J/(kg·K)
R_vapor = 461.495  # Specific gas constant for water vapor, J/(kg·K)
epsilon = R_dry / R_vapor

# Given data (example values, replace with actual data)
T_100m = df_merged['Temperature_100m']  # Temperature at 100 meters (K)
p80 = df_merged['pressure_80m']  # Pressure at 80 meters (Pa)
q80 = df_merged['Specific_Humidity_80m']  # Specific humidity at 80mts (kg/kg)


# Calculate air density
e = q80 * p80 / (epsilon + q80)  # Partial pressure of water vapor (Pa)
P_dry = p80 - e  # Partial pressure of dry air (Pa)
rho_dry = P_dry / (R_dry * T_100m)  # Density of dry air (kg/m³)
rho_vapor = e / (R_vapor * T_100m)  # Density of water vapor (kg/m³)

df_merged['air_density'] = rho_dry + rho_vapor  # Total air density (kg/m³)


##### Calculate Relation Thrust Force T (Fuerza de Empuje) / Coefficient Ct (Coeficiente de Empuje) 

En el contexto de las turbinas eólicas, la fuerza de empuje es la fuerza aerodinámica que el viento ejerce sobre las palas del rotor de una turbina.
Esta fuerza afecta tanto el rendimiento de la turbina como la distribución de cargas en su estructura, lo que es crucial para el diseño y la operación eficientes.

T / Ct = 0.5 * air_density * Area_rotor * wind_speed^2

Ct= es un coeficiente dado por el fabricante



In [38]:
# rotor diameter in meters 
D = 125 # Turbine Acciona AW 125/3150

# Calculate rotor swept area (m²)
A = np.pi * (D / 2) ** 2

# Calculate relation between Thrust Force / Thrust Coefficient C_T
df_merged['rel_Tct'] = 0.5 * df_merged['air_density'] * df_merged['ws100']**2

##### Filter Main Variables

In [39]:
df_gfs_fc = df_merged [['FechaHora','ws100','dir100','temp','air_density','rel_Tct']].copy()

In [40]:
df_gfs_fc.head()

Unnamed: 0,FechaHora,ws100,dir100,temp,air_density,rel_Tct
0,2024-07-12 15:00:00,5.725431,307.596497,8.751892,1.243437,20.380274
1,2024-07-12 18:00:00,5.791431,322.454346,8.789337,1.242727,20.840948
2,2024-07-12 21:00:00,7.346884,338.205719,6.962128,1.25097,33.76162
3,2024-07-13 00:00:00,9.226981,340.597809,5.75,1.255641,53.450871
4,2024-07-13 03:00:00,9.97006,341.805481,4.735809,1.259554,62.601173


##### Resample de 3h a 1h (fill gaps coping values)

In [41]:
# Set 'FechaHora' as the index
df_gfs_fc.set_index('FechaHora', inplace=True)

In [42]:
# Resample the dataframe to hourly frequency 
df_gfs_fc = df_gfs_fc.resample('h').asfreq()

In [None]:
# Linear interpolation
df_gfs_fc = df_gfs_fc.interpolate(method='linear')

In [43]:
# Reset index to make 'time' a column
df_gfs_fc.reset_index(inplace=True)

##### Rename columns

In [44]:
# Rename the columns
df_gfs_fc = df_gfs_fc.rename(columns={
    'ws100': 'ws100_gfs',
    'dir100': 'dir100_gfs',
    'temp': 'temp_gfs',
    'air_density': 'airden_gfs',
    'rel_Tct': 'reltct_gfs'
})

In [45]:
# Find the minimum 'FechaHora'
min_fecha_hora = df_gfs_fc['FechaHora'].min()

# Convert the minimum 'FechaHora' to string (date only)
min_fecha_hora_date_str = min_fecha_hora.strftime('%Y-%m-%d')

# Define the file path with the date string
file_path = f'D:\\Documents\\MMA\\1.0 Tesis\\Datos\\GFS\\ds_fc_gfs_{min_fecha_hora_date_str}.csv'

# Save the DataFrame to a CSV file with the corrected file path
df_gfs_fc.to_csv(file_path, sep=';', index=False, decimal=',')

## ______________________________________________________________________________________________________

### GFS - Hitorical Data


Research Data Archive RDA: 
The Research Data Archive (RDA) is a comprehensive resource for climate and weather data, managed by the National Center for Atmospheric Research (NCAR).

Provide the: GFS (Global Forecast System): Operational weather forecast data produced by NCEP. 

The National Centers for Environmental Prediction (NCEP) is a part of the National Oceanic and Atmospheric Administration (NOAA) within the United States
link: https://rda.ucar.edu/datasets/ds084.1/

Ds084.1: The NCEP operational Global Forecast System analysis and forecast grids are on a 0.25 by 0.25 global latitude longitude grid. Grids include analysis and forecast time steps at a 3 hourly interval from 0 to 240, and a 12 hourly interval from 240 to 384. Model forecast runs occur at 00, 06, 12, and 18 UTC daily.








https://github.com/NCAR/rda-apps-clients/tree/main#rda-apps-clients

NCEP GDAS/FNL 0.25 Degree Global Tropospheric Analyses and Forecast Grids 
ds083.3
https://rda.ucar.edu/datasets/ds083.3/
Description:
Rights: Freely Available
summary: These NCEP FNL (Final) operational global analysis and forecast data are on 0.25-degree by 0.25-degree grids prepared operational**ly every six ho**urs. This product is from the Global Data Assimilation System (GDAS), which continuously collects observational data from the Global Telecommunications System (GTS), and other sources, for many analyse

s. The FNLs are made with the same model which NCEP uses in the Global Forecast System (GFS), but the FNLs are prepared about an hour or so after the GFS is initialized. The FNLs are delayed so that more observational data can be used. The GFS is run earlier in support of time critical forecast needs, and uses the FNL from the previous 6 hour cycle as part of its initializati
on.
summary: The analyses are available on the surface, at 26 mandatory (and other pressure) levels from 1000 millibars to 10 millibars, in the surface boundary layer and at some sigma layers, the tropopause and a few others. Parameters include surface pressure, sea level pressure, geopotential height, temperature, sea surface temperature, soil values, ice cover, relative humidity, u- and v- winds, vertical motion, vorticity and ozone.
summary: The archive time series is continuously extended to a near-current date. It is not maintained in real-time.
NCEP GDAS/FNL 0.25 Degree Global Tropospheric Analyses and Forecast Gridss (ds083.3)
NCEP Model docum








## RDA Request Workflow

In [1]:
import rdams_client as rc
import time
import sys, os
import requests
import xarray as xr
import pandas as pd
import glob
import numpy as np
from datetime import timedelta
import tarfile

**Function to keep checking if a request is ready.**

In [2]:
def check_ready(rqst_id, wait_interval=120):
    """Checks if a request is ready."""
    for i in range(100): # 100 is arbitrary. This would wait 200 minutes for request to complete
        res = rc.get_status(rqst_id)
        request_status = res['data']['status']
        if request_status == 'Completed':
            return True
        print(request_status)
        print('Not yet available. Waiting ' + str(wait_interval) + ' seconds.' )
        time.sleep(wait_interval)
    return False

**Control dict for subsetting.**

In [7]:
control = { 
         'dataset' : 'ds084.1',
         'date':'202401010000/to/202406150000', #conviene descargar lote de archivos por año
         'datetype':'init',
         'param':'V GRD/U GRD/TMP/PRES/SPF H',
         'level':'HTGL:100/80',
         'oformat': 'netCDF', 
         'nlat':-38.5, 
         'slat':-38.5, 
         'elon':-62.75,
         'wlon':-62.75,
         'time': 0,
         'product': '/3-hour Forecast/6-hour Forecast/9-hour Forecast/12-hour Forecast/15-hour Forecast/18-hour Forecast/21-hour Forecast/24-hour Forecast'
         } 

**Submit a request and check if it went through without an error.**

In [8]:
response = rc.submit_json(control)
assert response['http_response'] == 200
rqst_id = response['data']['request_id']

print(response)

{'status': 'ok', 'http_response': 200, 'error_messages': [], 'data': {'request_id': '746296'}, 'contact': 'rdahelp@ucar.edu'}


**Checks if the request is ready. When it is, it will start to download the files**

En el dashboard se puede ver el estado del request, boton Show Request

https://rda.ucar.edu/ajax/#!cgi-bin/dashboard 

In [None]:

#rqst_id=746296
check_ready(rqst_id)
download_response = rc.download(rqst_id)

Downloading gfs.0p25.2024010100.f003-25.2024061500.f024.grib2.nc.tar
100.000 % Completed


**Download the files**

In [13]:
# URL of the file to download
file_url = download_response['data']['web_files'][0]['web_path']

# Directory to save the file
save_dir = 'D:\\Documents\\MMA\\1.0 Tesis\\Datos\\GFS\\Archive'

# Ensure the directory exists
os.makedirs(save_dir, exist_ok=True)

# Extract the filename from the URL
filename = os.path.basename(file_url)

# Path to save the file locally
save_path = os.path.join(save_dir, filename)

# Download the file
print('Downloading', filename)
req = requests.get(file_url, allow_redirects=True)
with open(save_path, 'wb') as file:
    file.write(req.content)

Downloading gfs.0p25.2024010100.f003-25.2024061500.f024.grib2.nc.tar


In [14]:
# Path to the TAR file
tar_file_path = save_path

# Directory to extract the files
extract_dir = 'D:\\Documents\\MMA\\1.0 Tesis\\Datos\\GFS\\Files_3h'

# Ensure the directory exists
os.makedirs(extract_dir, exist_ok=True)

# Open the TAR file for reading
with tarfile.open(tar_file_path, 'r') as tar:
    # Extract all files to the extract_dir
    tar.extractall(path=extract_dir)

**Purge the request. Purge will occur automatically after 10 days**

In [None]:
# Optionally purge request
#rqst_id='743254'
rc.purge_request(rqst_id)

## Transformacion de Archivos Descargados

In [15]:
# Step 1: Define the path to the files
file_directory = 'D:\\Documents\\MMA\\1.0 Tesis\\Datos\\GFS\\Files_3h'
file_pattern = os.path.join(file_directory, 'gfs.0p25.*00.*grib2.nc')
files = glob.glob(file_pattern)

# Step 2: Open multiple files and store them in a list of Datasets
datasets = [xr.open_dataset(file) for file in files]

In [16]:
# Step 3: Extract the required variables and combine the datasets
data_vars = ['U_GRD_L103', 'V_GRD_L103','TMP_L103','SPF_H_L103','PRES_L103']
combined_dataset = xr.concat(datasets, dim='time')

# Step 4: Create a DataFrame with time as the index and the required variables as columns
df100 = combined_dataset[data_vars].to_dataframe()

In [17]:
# Reset the index to flatten the DataFrame
df100.reset_index(inplace=True)

In [18]:
#df100.head(50)

In [19]:
# Group by 'level0' and sum the specified columns
grouped_df = df100.groupby('level0')[['SPF_H_L103', 'PRES_L103']].sum().reset_index()

print(grouped_df)

   level0  SPF_H_L103     PRES_L103
0    80.0   83.950104  1.554682e+09
1   100.0   83.950104  1.554682e+09


In [20]:
# Keep records with 'level0' == 100
df100 = df100[df100['level0'] == 100] 

In [21]:
# Set the time as the index
df100.set_index('time', inplace=True)

# Drop any unwanted columns (lat, lon if they are not needed)
df100.drop(columns=['lat', 'lon','level0'], inplace=True)

In [22]:
df100.tail()

Unnamed: 0_level_0,U_GRD_L103,V_GRD_L103,TMP_L103,SPF_H_L103,PRES_L103
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-06-15 12:00:00,7.507061,-1.691675,284.145081,0.004459,99436.828125
2024-06-15 15:00:00,6.050413,-2.742749,286.968323,0.004779,99562.890625
2024-06-15 18:00:00,8.108137,-3.907915,289.842224,0.004545,99407.359375
2024-06-15 21:00:00,5.115747,-3.477144,289.970764,0.004867,99470.304688
2024-06-16 00:00:00,3.710671,-3.511648,287.762146,0.00465,99613.585938


In [23]:
# Convert the index to DateTime to ensure consistency
df100.index = pd.to_datetime(df100.index)

#Reset Index
df100.reset_index(inplace=True)

# Rename the index column to 'FechaHora'
df100.rename(columns={'time': 'FechaHora'}, inplace=True)

##### Convert UTC(0) to UTC(-3)

In [40]:
df100.tail()

Unnamed: 0,FechaHora,U_GRD_L103,V_GRD_L103,TMP_L103,SPF_H_L103,PRES_L103,ws100,dir100,temp,air_density,rel_Tct
15667,2024-06-15 09:00:00,7.507061,-1.691675,284.145081,0.004459,99436.828125,7.695305,282.699158,10.995087,1.215815,35.998894
15668,2024-06-15 12:00:00,6.050413,-2.742749,286.968323,0.004779,99562.890625,6.643054,294.385559,13.818329,1.205149,26.591703
15669,2024-06-15 15:00:00,8.108137,-3.907915,289.842224,0.004545,99407.359375,9.00076,295.73288,16.69223,1.191502,48.263992
15670,2024-06-15 18:00:00,5.115747,-3.477144,289.970764,0.004867,99470.304688,6.18558,304.203705,16.82077,1.191498,22.794186
15671,2024-06-15 21:00:00,3.710671,-3.511648,287.762146,0.00465,99613.585938,5.10889,313.421509,14.612152,1.202529,15.69345


In [25]:
# Define the time difference (3 hours) UTC(0) to UTC(-3)
time_difference = np.timedelta64(3, 'h')

# Subtract the time difference from the time coordinate
df100['FechaHora'] = df100['FechaHora'] - time_difference


##### Calculate Wind Speed

In [26]:
# Calculate wind speed from u and v components (magnitude of the vector)
df100['ws100'] = np.sqrt(df100['U_GRD_L103']**2 + df100['V_GRD_L103']**2)

##### Calculate Wind Direction

In [27]:
# Calculate wind dir from u and v components (magnitude of the vector)

# Calculate the angle in radians
theta = np.arctan2(df100['V_GRD_L103'], df100['U_GRD_L103'])

# Convert the angle from radians to degrees
theta_deg = np.degrees(theta)

df100['dir100'] = (270 - theta_deg) % 360

##### Convert Temoerature to °C

In [28]:
# Convert temperature from Kelvin to Celsius
df100['temp'] = df100['TMP_L103'] - 273.15

##### Calculate Air density

In [29]:
# Constants
R_dry = 287.058  # Specific gas constant for dry air, J/(kg·K)
R_vapor = 461.495  # Specific gas constant for water vapor, J/(kg·K)
epsilon = R_dry / R_vapor

# Given data (example values, replace with actual data)
T_100m = df100['TMP_L103']  # Temperature at 100 meters (K)
p80 = df100['PRES_L103']  # Pressure (Pa)
q80 = df100['SPF_H_L103']  # Specific humidity (kg/kg)


# Calculate air density
e = q80 * p80 / (epsilon + q80)  # Partial pressure of water vapor (Pa)
P_dry = p80 - e  # Partial pressure of dry air (Pa)
rho_dry = P_dry / (R_dry * T_100m)  # Density of dry air (kg/m³)
rho_vapor = e / (R_vapor * T_100m)  # Density of water vapor (kg/m³)

df100['air_density'] = rho_dry + rho_vapor  # Total air density (kg/m³)


##### Calculate Relation Thrust Force T (Fuerza de Empuje) / Coefficient Ct (Coeficiente de Empuje) 

En el contexto de las turbinas eólicas, la fuerza de empuje es la fuerza aerodinámica que el viento ejerce sobre las palas del rotor de una turbina.
Esta fuerza afecta tanto el rendimiento de la turbina como la distribución de cargas en su estructura, lo que es crucial para el diseño y la operación eficientes.

T / Ct = 0.5 * air_density * Area_rotor * wind_speed^2

Ct= es un coeficiente dado por el fabricante



In [30]:
# rotor diameter in meters 
D = 125 # Turbine Acciona AW 125/3150

# Calculate rotor swept area (m²)
A = np.pi * (D / 2) ** 2

# Calculate relation between Thrust Force / Thrust Coefficient C_T
df100['rel_Tct'] = 0.5 * df100['air_density'] * df100['ws100']**2

##### Filter Main Variables

In [31]:
# Filter records where 'FechaHora' > '2019-02-01', since the plant is in production
df100 = df100[df100['FechaHora'] > '2019-02-01']

In [32]:
df_histo_gfs = df100[['FechaHora','ws100','dir100','temp','air_density','rel_Tct']].copy()

In [33]:
df_histo_gfs.head()

Unnamed: 0,FechaHora,ws100,dir100,temp,air_density,rel_Tct
1,2019-02-01 03:00:00,5.552909,148.340744,21.855316,1.162925,17.929274
2,2019-02-01 06:00:00,6.809348,113.632187,20.449982,1.169856,27.121483
3,2019-02-01 09:00:00,6.788195,176.223724,21.749969,1.168998,26.933479
4,2019-02-01 12:00:00,5.256702,191.177643,25.550018,1.156605,15.980191
5,2019-02-01 15:00:00,7.059056,216.527283,28.449982,1.144593,28.517687


##### Resample de 3h a 1h (fill gaps coping values)

In [34]:
# Set 'FechaHora' as the index
df_histo_gfs.set_index('FechaHora', inplace=True)

In [None]:
# Resample the dataframe to hourly frequency 
df_histo_gfs = df_histo_gfs.resample('h').asfreq()

In [None]:
# Linear interpolation
df_histo_gfs = df_histo_gfs.interpolate(method='linear')

In [36]:
# Reset index to make 'time' a column
df_histo_gfs.reset_index(inplace=True)

##### Rename columns

In [None]:
# Rename the columns
df_histo_gfs = df_histo_gfs.rename(columns={
    'ws100': 'ws100_gfs',
    'dir100': 'dir100_gfs',
    'temp': 'temp_gfs',
    'air_density': 'airden_gfs',
    'rel_Tct': 'reltct_gfs'
})

##### Save the dataset to a csv

In [37]:
# Find the maximum 'FechaHora'
max_fecha_hora = df_histo_gfs['FechaHora'].max()

# Convert maximum 'FechaHora' to string format (date only)
max_fecha_hora_str = max_fecha_hora.strftime('%Y-%m-%d')



In [38]:
max_fecha_hora_str

'2024-06-15'

In [39]:
df_histo_gfs.to_csv(f'D:\\Documents\\MMA\\1.0 Tesis\\Datos\\GFS\\ds_histo_gfs_{max_fecha_hora_str}.csv', sep=';', index=False, decimal=',')



#### ---------------------  **FIN** ---------------------

## __________________________________________________________________________________________________________

# Auxiliar 

Exploracion de parametros de dataset y como armar templates de descagas con rdams_client 

### Retrive Files using rdams_client

In [None]:
# Make sure this is in the same directory as this notebook or is in your PATH environment variable.
import rdams_client as rc

# file rdams_token.txt has to by in the same directory

In [None]:
dsid = 'ds084.1'
param_response = rc.query(['-get_param_summary', dsid, '-np'])

In [None]:
result = rc.query(['-get_summary', 'ds084.1'])

In [None]:
# get_param_summary() returns an RDA response object that has more info that we need, so we'll filter it out
param_data = param_response['data']['data']

# Next we will just pull out the descriptions or long name of each of the parameters.
params = list(map(lambda x: x['param_description'], param_data))
print('\n'.join(params))

#### Available Paramaters 

In [None]:
param_map = {}
for _param in param_data:
    long_name = _param['param_description']
    short_name = _param['param']
    param_map[long_name] = short_name

for k,v in param_map.items(): print('{:7} : {}'.format(v, k))
    

In [None]:
# For large datasets this may take a while as you're pulling all the metadata.
#metadata_response = rc.query(['-get_metadata', dsid])

#### Filter by Param, Date and Level 

In [None]:
# List of dicts representing a variable
_vars = metadata_response['data']['data']

# Get climate variables
TMP_variables = list(filter(lambda v: v['param'] == 'V GRD',_vars)) 

# Let's say we're only interested in 2019
TMP_2010_variables = list(filter(
        lambda v: v['start_date'] < 201901010000 and v['end_date'] > 202001010000 ,TMP_variables
        )) 

# We only should have 1 variable
#assert len(TMP_2010_variables) == 1
my_var = TMP_2010_variables[0]

# Now let's look at the levels available:
for lev in my_var['levels']:
    print('{:6} {:10} {}'.format(lev['level'], lev['level_value'],lev['level_description']))

#### Specified height above ground

In [None]:
HTGL_levels = set()
HTGL_levels_printed = []
for lev in my_var['levels']:
    if lev['level_description'] == 'Specified height above ground' \
            and float(lev['level_value']) <= 200:
        HTGL_levels.add(lev['level_value'])
        HTGL_levels_printed.append(lev)
for lev in HTGL_levels_printed:
    print('{:6} {:10} {}'.format(lev['level'], lev['level_value'],lev['level_description']))

#### Make a request

In [None]:
dsid='ds083.3'
response = rc.get_control_file_template(dsid)

In [None]:
response

In [None]:
dsid='ds084.1'
response2 = rc.get_control_file_template(dsid)

In [None]:
response2

In [None]:
dsid='ds083.3'
# Let's get a template. 
response = rc.get_control_file_template(dsid)
template = response['data']['template'] # Template string

# Parse the string
template_dict = rc.read_control_file(template)

template_dict

In [None]:
# Insert our TMP param
template_dict['param'] = 'TMP/SPF H/V GRD/U GRD/PRES'
template_dict['level'] = 'HTGL:' + '/'.join(HTGL_levels)
template_dict['nlat'] = -38.5
template_dict['slat'] = -38.5
template_dict['elon'] = -62.75
template_dict['wlon'] = -62.75
template_dict

In [None]:
# Now We can submit a request
response = rc.submit_json(template_dict)
assert response['http_response'] == 200
print(response)

print("Success!")

In [None]:
#TEMPLATE

#dataset=dsnnn.n                              # Required, use '-get_metadata' field 'dataset'
#date=YYYYMMDDHHMN/to/YYYYMMDDHHMM            # Required, use '-get_metadata' fields 'startdate' and 'enddate' as bounds
#datetype=init                                # Optional, use if you would like the date range to include data based on model initialization date/time instead of valid date/time
#param=SSSS/SSSS/SSSS                         # Required, use '-get_metadata' field 'param' or 'param_description'.  Separate multiple parameters with "/".
#level=SSSS:NNN/NNN;SSSS:NNN;SSSS:NNN/NNN     # Optional, use '-get_metadata' field 'level' or 'level_description' for 'SSSS'.
                                             # Use '-get_metadata' field 'levelvalue' for 'NNN'.  Separate multiple level values with "/".
#oformat=SSSS                                 # Optional but required if spatial subsetting is requested on select datasets. Current options are netCDF or csv for single grid point extraction. 
#nlat=NN                                      # Optional, use for spatial subset requests 90 to -90
#slat=NN                                      # Optional, use for spatial subset requests 90 to -90
#wlon=NN                                      # Optional, use for spatial subset requests -180 to 180
#elon=NN                                      # Optional, use for spatial subset requests -180 to 180
					     # To extract a single grid point at lat=yy.y,lon=xxx.x, set both nlat and slat=yy.y, and both elon and wlon = xxx.x
#product=SSSS/SSSS/SSSS                       # Optional, use '-get_metadata' field 'product'.  Separate multiple products with "/".
#gridproj=SSSS                                # Optional, use '-get_metadata' field 'gridproj'
#griddef=SSSS                                 # Optional, use '-get_metadata' field 'griddef'
#groupindex=NN                                # Optional, use '-get_summary' field 'groupindex' if available
#compression=NN                               # Optional, use 'GZ' for gzip, 'Z' for unix compress, 'BZ2' for bzip2, or 'ZIP' for Zip, for external users only
#targetdir=SSSS                               # Optional, request output will be created in current working directory if 'targetdir' is not set to a desired output directory.  This option is only available for NCAR HPC users.



# ------------------------------------------------------------------------------------------

### Retrive Archive Data using OPeNDAP

When using OPeNDAP to open datasets with xarray, the library does not download the entire file initially. Instead, it downloads only the metadata and the specific data subset that you request. This means that when you select a specific variable and subset it by latitude and longitude, only that subset of data is transferred over the network. This is efficient and allows you to work with large datasets without having to download the entire file

In [None]:
import pandas as pd
import xarray as xr

# Define the base URL for the FNL dataset
base_url = "https://thredds.rda.ucar.edu/thredds/dodsC/files/g/ds083.3/"
# Guardar es la url de OpenData: https://thredds.rda.ucar.edu/thredds/dodsC/files/g/ds083.3/2019/201901/gdas1.fnl0p25.2019010100.f00.grib2.html


# Define the dataset parameters
start_date = "2019-01-01"
end_date = "2019-12-31"
variables = [
    'v-component_of_wind_height_above_ground',
    'u-component_of_wind_height_above_ground',
#    'Pressure_height_above_ground',
#    'Temperature_height_above_ground',
#    'Relative_humidity_height_above_ground',
#    'Specific_humidity_height_above_ground'
]

# Heights mapping for each variable
variable_height_mapping = {
    'v-component_of_wind_height_above_ground': ('height_above_ground1', 100),
    'u-component_of_wind_height_above_ground': ('height_above_ground1', 100),
#    'Pressure_height_above_ground': ('height_above_ground', 100),
#    'Temperature_height_above_ground': ('height_above_ground4', 100),
#    'Relative_humidity_height_above_ground': ('height_above_ground3', 100),
#    'Specific_humidity_height_above_ground': ('height_above_ground2', 100)
}

lat_location = -38.5
lon_location = -62.75

# Create a function to generate the URL for each file
def generate_file_url(date):
    year = date[:4]
    month = date[5:7]
    day = date[8:10]
    hour = date[11:13]
    return f"{base_url}{year}/{year}{month}/gdas1.fnl0p25.{year}{month}{day}{hour}.f00.grib2"

# Define the date range
dates = pd.date_range(start_date, end_date, freq='6h')  # Adjust frequency to match data availability

# Initialize an empty dictionary to store data arrays for each variable
data_arrays = {var: [] for var in variables}

# Loop over each date and fetch the data
for date in dates:
    file_url = generate_file_url(date.strftime('%Y-%m-%d %H:%M:%S'))
    try:
        # Open the dataset using xarray directly
        ds = xr.open_dataset(file_url)
        for var in variables:
            height_coord, height_value = variable_height_mapping[var]
            # Select the variable and the specific location
            variable_data = ds[var].sel({height_coord: height_value, 'lat': lat_location, 'lon': lon_location}, method='nearest')
            #variable_data = ds[var].sel({'lat': lat_location, 'lon': lon_location}, method='nearest')
            # Append to the list for this variable
            data_arrays[var].append(variable_data)
    except Exception as e:
        print(f"Failed to fetch data for {date}: {e}")

# Combine all the data arrays into a single dataset
combined_data = xr.Dataset()

for var in variables:
    if data_arrays[var]:
        combined_data[var] = xr.concat(data_arrays[var], dim="time")
    else:
        print(f"No data was fetched for {var}.")

# Define the output filename dynamically
output_filename = f"FNL_all_variables_{start_date.replace('-', '')}_{end_date.replace('-', '')}.nc"

# Save the combined dataset to a NetCDF file with the dynamic filename
combined_data.to_netcdf(output_filename)
print(f"Data successfully saved to {output_filename}")