# Preprocess U10 Series


In [1]:
import xarray as xr
import numpy as np
import os
import pandas as pd
import scipy
import geopandas as gpd
import shapely.vectorized
from itertools import product

### Function to find, read, and concatenate data

In [2]:
#GCM FUNCTIONS
def yearcheck(file, year,month):
    check = False
    with xr.open_dataset(file) as ds:
        time = ds.coords['time'].values[:]
        
        if type(time[0]) == np.datetime64:
            time = pd.to_datetime(time) 
            
        ds_years= [time[i].year for i in range(len(time))]
        
        if year in ds_years:
            check = True
        
        if check == True:
            ds = ds.sel(time=ds.time.dt.year.isin([year]))
            ds = ds.sel(time=ds.time.dt.month.isin([month]))
            dt = np.array(ds.coords['time'][:])
            if len(dt) == 0:
                check = False
    
    return check

def extract_gcm_data(ncfile,var,year,month):
    with xr.open_dataset(ncfile) as ds:
        ds = ds.sel(time=ds.time.dt.year.isin([year]))
        ds = ds.sel(time=ds.time.dt.month.isin([month]))
        dt = np.array(ds.coords['time'][:])
        if 'lat' in ds.variables and 'lon' in ds.variables:
            x,y = np.array(ds.variables['lon'][:]),np.array(ds.variables['lat'][:])
        elif 'latitude' in ds.variables and 'longitude' in ds.variables:
            x,y = np.array(ds.variables['longitude'][:]),np.array(ds.variables['latitude'][:])
        else:
            raise ValueError("Unable to find latitude and longitude variables in the dataset.")
        x = np.where(x<0,x+360,x)
        var_field = np.array(ds.variables[var][:])
    return var_field,x,y,dt


def concatenate_wind(root1,var1,root2,var2,year0,yearn,month):
    years = np.arange(year0,yearn,1)
    for count,year in enumerate(years):
        #Returns the gcm file that contains the year.
        file = find_gcm_file(root1,year,month)
        temp1,x,y,temp_dt = extract_gcm_data(file,var1,year,month)
        
        file = find_gcm_file(root2,year,month)
        temp2,x,y,temp_dt = extract_gcm_data(file,var2,year,month)
        
        temp = ((temp1**2) + (temp2**2))**0.5
        temp = np.nanpercentile(temp, 90,axis= 0)
        temp = temp[np.newaxis,:]
        
        if count == 0:
            series = temp
            dt = temp_dt
        else:
            series = np.vstack((series,temp))
            dt = np.append(dt,temp_dt)
    return series,dt,x,y

def find_gcm_file(root_directory,year,month):
    for root, dirs, files in os.walk(root_directory):
        filecheck = False
        for file in files:
            filecheck = yearcheck(os.path.join(root, file),year,month)
            if filecheck == False:
                continue
            else:
                matching_file = (os.path.join(root, file))
                break
        if filecheck == False:
            matching_file = []
            print(f'A file which contained the year {year} was not found!')
    return matching_file



### Define the function to take regional averages

In [3]:
def sea_aves(field,seas,x,y):
    sea_names = []
    if np.nanmax(x)>180:
        x = np.where(x>180,x-360,x)
    xv, yv = np.meshgrid(x, y)
    for index, row in seas.iterrows():
        name = row['name']
        sea_names.append(name)
        poly = row['geometry']
        nodes= np.where(shapely.vectorized.contains(poly, xv, yv))
        sea_nodes = field[:,nodes[0],nodes[1]]
        if index == 0:
            sea_sum = np.nanmean(sea_nodes,axis = 1)
        else:
            temp = np.nanmean(sea_nodes,axis =1)
            sea_sum = np.vstack((sea_sum,temp)) 
    return sea_sum,sea_names

# Derive the timeseries interating over models

In [None]:
months = [7,9,11]
year0 = 2020
yearn = 2070

fn = 'Regions.shp'
seas = gpd.GeoDataFrame.from_file(fn)

#loop ove the climate models' data
for model_name in ['CNRM','ECEARTH','MPI','MRI']

    root1 = f'WIND_ANALYSIS/{model_name}/uas'
    root2 = f'WIND_ANALYSIS/{model_name}/vas'

    for i,month in enumerate(months):
        u10,dt,x,y = concatenate_wind(root1,'uas',root2,'vas',year0,yearn,month)
        u10_temp,regions = sea_aves(u10,seas,x,y)
        if i == 0:
            seas_u10 = u10_temp
        else:
            seas_u10 = np.vstack((seas_u10,u10_temp))
    
    months = ['July','September','November']
    cols = [x+str(' ')+y for (x,y) in product(months,regions)]

    df = pd.DataFrame(seas_u10.T, columns=cols)
    years = np.arange(year0,yearn,1)
    df.insert(0, ('year', ''), years)
    df.to_csv(f'{model_name}_u10_90_series.csv', index=False)
    print(f"{model_name}_u10_90_series.csv file saved successfully.")
