In [16]:
import xarray as xr
import netCDF4
import pandas as pd
import numpy as np
import pathlib
import matplotlib.pyplot as plt
import calendar
from datetime import datetime, timedelta,date
from tqdm import tqdm
import cftime

In [17]:
MY_KEYWORDS = {'0':["pr","MOHC-HadGEM2-ES","historical"],
            '1':["pr","MOHC-HadGEM2-ES","rcp85"],
            '2':["tas","MOHC-HadGEM2-ES","historical"],
            '3':["tas","MOHC-HadGEM2-ES","rcp85"],
            '4':["pr","MPI-M-MPI-ESM-LR","historical"],
            '5':["pr","MPI-M-MPI-ESM-LR","rcp85"],
            '6':["tas","MPI-M-MPI-ESM-LR","historical"],
            '7':["tas","MPI-M-MPI-ESM-LR","rcp85"]}

MY_ROOT=pathlib.Path("EAS-22")

# Target lat and lon
# [For most of the domain, CORDEX data is based on rotated latitude and longitude, so we need to convert the coordinates of target location to rotated lat and lon]
# Go to this website: https://agrimetsoft.com/Cordex%20Coordinate%20Rotation
MY_LAT = 3.91
MY_LON = -30.05

In [18]:
def generate_file_paths(root):

    '''
    Args: Parent directory where all the folder are located.
    Purpose: To extract paths for all files.
    Requirment: Need to have folder in tree format.
    '''
    file_path_collection = []
    for climatic_var in root.iterdir():
        if climatic_var.is_dir():
            for model in climatic_var.iterdir():
                if model.is_dir():
                    for scenario in model.iterdir():
                        if scenario.is_dir():
                            for file in scenario.iterdir():
                                file_path_collection.append(file)
    return file_path_collection

In [19]:
def nc_to_txt_specific_location(root,file_path_collection,file_keywords,lat,lon):
    '''
    Purpose: To extract data with specific attributes as denoted by keywords for specific coordinates as specified by users from netcdf file collection as indicated by file_path_collection
    '''

    filtered_paths = [file for file in file_path_collection if all(keyword in str(file) for keyword in file_keywords)]
    #filtered_paths = generate_file_paths()

    to_excel_df = pd.DataFrame()

    for a_path in filtered_paths:
        whole_nc = xr.open_dataset(a_path)
        target_nc = whole_nc.sel(rlat = lat, rlon = lon, method='nearest')
        nc_dates = target_nc['time'].values
        Dates =[]
        VariableValues = []
        #standard_dates = [datetime.utcfromtimestamp(cft_date.timestamp()) for cft_date in cft_dates]
        if all(isinstance(dt, cftime.Datetime360Day) for dt in nc_dates):
            '''
            This branch was necessary because one climate model data has the date in "Datetime360Day" format.
            So, this will convert date format from this format to regular format.
            '''
            days = [date.day for date in nc_dates]
            months = [date.month for date in nc_dates]
            years = [date.year for date in nc_dates]
            if file_keywords[0] == 'pr':
                climatic_variable = target_nc[file_keywords[0]].values *86400
            elif file_keywords[0] == 'tas':
                climatic_variable = target_nc[file_keywords[0]].values-273.15
            else:
                print('Error! Please select appropriate climatic variable')

            for year,month, day, val  in zip(years,months,days,climatic_variable):
                last_day_of_month = calendar.monthrange(year,month)[1]
                if day <= last_day_of_month:
                    Dates.append(date(year,month,day))
                    VariableValues.append(val)

        else:
            '''
            This branch was necessary because one climate model data has the date in "datetime64" format. This is regular format, so
            no necessary to preprocess before saving in dataframe.
            '''
            Dates = nc_dates
            if file_keywords[0] == 'pr':
                VariableValues = target_nc[file_keywords[0]].values *86400
            elif file_keywords[0] == 'tas':
                VariableValues = target_nc[file_keywords[0]].values-273.15
            else:
                print('Error! Please select appropriate climatic variable')
   


        temp_df = pd.DataFrame({'Date': Dates,'Climate Variable': VariableValues})
        to_excel_df = pd.concat([to_excel_df,temp_df]
                                ,axis=0)
    to_excel_df.to_csv(f"{root}/{file_keywords[0]}_{file_keywords[1]}_{file_keywords[2]}.txt",index=False)

In [20]:
file_path_collection = generate_file_paths(root=MY_ROOT)
for i in tqdm(range(len(MY_KEYWORDS)),desc='Processing files'):
    print(f" Files relevant to {MY_KEYWORDS[str(i)]} being extracted to text format for Latitde:{MY_LAT}, Longitude:{MY_LON}")
    nc_to_txt_specific_location(root=MY_ROOT,file_path_collection=file_path_collection,file_keywords=MY_KEYWORDS[str(i)],lat= MY_LAT, lon = MY_LON)
    
    
    

Processing files:   0%|          | 0/8 [00:00<?, ?it/s]

 Files relevant to ['pr', 'MOHC-HadGEM2-ES', 'historical'] being extracted to text format for Latitde:3.91, Longitude:-30.05


Processing files:  12%|█▎        | 1/8 [00:59<06:53, 59.13s/it]

 Files relevant to ['pr', 'MOHC-HadGEM2-ES', 'rcp85'] being extracted to text format for Latitde:3.91, Longitude:-30.05


Processing files:  25%|██▌       | 2/8 [03:44<12:08, 121.34s/it]

 Files relevant to ['tas', 'MOHC-HadGEM2-ES', 'historical'] being extracted to text format for Latitde:3.91, Longitude:-30.05


Processing files:  38%|███▊      | 3/8 [04:23<06:59, 83.87s/it] 

 Files relevant to ['tas', 'MOHC-HadGEM2-ES', 'rcp85'] being extracted to text format for Latitde:3.91, Longitude:-30.05


Processing files:  50%|█████     | 4/8 [06:16<06:22, 95.58s/it]

 Files relevant to ['pr', 'MPI-M-MPI-ESM-LR', 'historical'] being extracted to text format for Latitde:3.91, Longitude:-30.05


Processing files:  62%|██████▎   | 5/8 [07:11<04:02, 80.88s/it]

 Files relevant to ['pr', 'MPI-M-MPI-ESM-LR', 'rcp85'] being extracted to text format for Latitde:3.91, Longitude:-30.05


Processing files:  75%|███████▌  | 6/8 [09:55<03:38, 109.04s/it]

 Files relevant to ['tas', 'MPI-M-MPI-ESM-LR', 'historical'] being extracted to text format for Latitde:3.91, Longitude:-30.05


Processing files:  88%|████████▊ | 7/8 [10:34<01:26, 86.21s/it] 

 Files relevant to ['tas', 'MPI-M-MPI-ESM-LR', 'rcp85'] being extracted to text format for Latitde:3.91, Longitude:-30.05


Processing files: 100%|██████████| 8/8 [12:20<00:00, 92.54s/it]
