# Instructions:

Before being able to run this notebook to run the AI model please follow the points below:

1. Git clone the following repository: `git clone https://github.com/jejjohnson/ddf.git`
2. Add path path to where you have clone the repository in : `sys.path.append('path/to/ddp/installation')`
3. Run the following code in the same order as provided and modify the simluation configuration as you like.

In [1]:
import sys
sys.path.append('/home/bernatj/ddf')

import numpy as np
from pathlib import Path
import xarray as xr
import os
import datetime
from earth2mip import inference_ensemble, registry
from earth2mip.networks import get_model
from earth2mip.initial_conditions import cds
from earth2mip.inference_ensemble import run_basic_inference
from ddf._src.data.local.xrda import LocalDataSourceXArray

### Chose model and GPU device you want to run it

In [2]:
model_name = "fcnv2_sm" # "pangu_6" #change for different model names
model = f"e2mip://{model_name}"
device = "cuda:0"

### We load the model on the GPU

In [3]:
time_loop  = get_model(
    model=model,
    device=device,
)
channel_names = time_loop.in_channel_names

### **Configuration Section**

In [4]:
#configuration
file_format = 'grib' # netcdf #
ai_model='fcnv2'     # pangu #

#we want to run our model for different dates
t0_i = datetime.datetime(2018,8,8,18)
t0_f = datetime.datetime(2018,8,8,18)
delta_h = 6

# number of forecast steps (15 days in this case)
num_steps = 4 * 15 # 6h intervals

#change depending on where you have your initial conditions data
root=f'/home/bernatj/Data/ai-forecasts/input/{file_format}'
ic_type='' # 'PGW_multimodel_'                              # use this second option to run the PGW simulations
outputdir="/home/bernatj/Data/ai-forecasts/fcst/"           #output directory

#Selection of the variables you wnat to save: 
vars_to_save = ['t2m', 'msl', 'v100', 'u100', 'u850', 'v850', 't850', 'z500','tcwv']


In [5]:
#generate inititialisation times 
init_times = []
current_time = t0_i
while current_time <= t0_f:
    init_times.append(current_time)
    current_time += datetime.timedelta(hours=delta_h)

#depending on the file format chosen we need to change this parameters
if file_format == 'grib':
    ending='grib'
    engine='cfgrib'
elif file_format == 'netcdf':
    ending='nc'
    engine='netcdf4'

- We define a function to be able to run the  AI weather model for any init date

In [6]:
def do_forecast(channel_names, file_paths, pressure_name='isobaricInhPa', engine='netcdf4'):
    """
    Perform an AI weather forecast using specified atmospheric data channels and files.

    This function initializes a data source using the provided channel names and file paths,
    then runs a basic inference model to produce a forecast.

    Parameters:
    - channel_names (list of str): A list of names of the data channels to be used in the forecast.
      These typically represent different atmospheric variables (e.g., temperature, humidity).
    - file_paths (list of str): A list of file paths to the data files that contain the necessary
      atmospheric information for the forecast.
    - pressure_name (str, optional): The name used to identify the pressure levels in the data.
    - engine (str, optional): The engine to be used for reading the data files. Defaults to 'netcdf4',
      which is a common format for atmospheric data files.

    Returns:
    - forecast: The forecast result generated by the model.
    """
    # Get the initial data
    data_source_xr = LocalDataSourceXArray(
        channel_names=channel_names,
        file_paths=file_paths,
        pressure_name=pressure_name, 
        name_convention="short_name",
        engine=engine, 
    )

    # Run the model
    forecast = run_basic_inference(
        time_loop, 
        n=num_steps, 
        data_source=data_source_xr, 
        time=t0
    )

    return forecast


### Example of how we can quickly tun the model for a series of initialization dates

In [7]:
for t0 in init_times:

    yyyymmddhh = t0.strftime('%Y%m%d%H')
    
    file_paths = [  
    f"{root}/{yyyymmddhh}/{ai_model}_sl_{ic_type}{yyyymmddhh}.{ending}",
    f"{root}/{yyyymmddhh}/{ai_model}_pl_{ic_type}{yyyymmddhh}.{ending}"
    ]
    
    #run one forecast
    forecast = do_forecast(channel_names, file_paths, engine=engine)

    #store the data in the outputdir. One file per varibale
    os.makedirs(outputdir+'/'+yyyymmddhh, exist_ok=True)
    for var in vars_to_save:
        print(var)
        forecast.sel(channel=var).squeeze().drop_vars('channel').to_dataset(name=var).to_netcdf(outputdir+f'{yyyymmddhh}/{var}_{ai_model}_{ic_type}{yyyymmddhh}.nc')

    print(f'finished forecast for init {t0}')

t2m
msl
v100
u100
u850
v850
t850
z500
tcwv
finished forecast for init 2018-08-08 18:00:00
