In [1]:
import gstools as gs
import numpy as np

import pandas as pd

from tqdm import tqdm
import xarray as xr
from glob import glob
from multiprocess import Pool
import os 
os.chdir("/g/data/k10/dl6968/Semi-variogram_AU/")

In [2]:
from gstools import CovModel

class Stab(CovModel):
    def variogram(self, r):
        
        return self.nugget + self.sill * (1 - np.exp(-(3 * r) / self.len_scale))

In [3]:
from scipy.optimize import curve_fit

# Define the piecewise function
def custom_curve(h, c, b, alpha):
    if np.isscalar(h):
        # Handle the scalar case
        if h == 0:
            return 0
        else:
            return c + b * (1 - np.exp(-3 * h / alpha))
    else:
        # Handle array inputs
        return np.where(h == 0, 0, c + b * (1 - np.exp(-3 * h / alpha)))

def fit_sci_curve(h_values, y_values,bins):

    # Use curve_fit to fit the custom function to the data
    # Initial guess for c, b, and alpha
    initial_guess = [0, 0.5, 5]  
    
    # Perform the curve fitting
    params, covariance = curve_fit(custom_curve, h_values, y_values, p0=initial_guess)
    
    # Extract the fitted parameters
    c_fitted, b_fitted, alpha_fitted = params
    
    y_fit = custom_curve(bins, c_fitted, b_fitted, alpha_fitted)
    return y_fit, alpha_fitted

In [4]:
fit_model = Stab(dim=2)


In [5]:

def generate_scale_file(bins_file):
    df_bins = pd.read_csv(bins_file)
    df_stations = pd.read_csv(bins_file.replace("bins", "station"))
    bins = np.arange(5, 360, 10)#df_bins["Bins"][df_bins["Day"]==0]

    y_fit_arr = np.zeros((df_bins["Day"].max()+1, len(bins)))
    scale_arr =  np.zeros(df_bins["Day"].max()+1)
    no_fit_days = []
    # fit_model = Stab(dim=3)#gs.Stable(dim=1)
    for days in range(0, df_bins["Day"].max()+1): # df_bins["Day"].max()+1
        bins = df_bins["Bins"][df_bins["Day"]==days]
        gamma = df_bins["gamma"][df_bins["Day"]==days]
        N11 = df_bins["N11"][df_bins["Day"]==days].values
        N10 = df_bins["N10"][df_bins["Day"]==days].values
        
        gamma_fit1 = gamma.values
        bins_fit1 = bins.values.astype(float)
        ## don't analyse if not more than two stations are in one bin
        gamma_fit1[N11+N10<=2] = np.nan
        bins_fit1[N11+N10<=2] = np.nan
        ## make sure start with zeros
        gamma_fit1[0] = 0
        bins_fit1[0] =0
        ## remove NaNs 
        gamma_fit = gamma_fit1[~np.isnan(gamma_fit1)]
        bins_fit = bins_fit1[~np.isnan(bins_fit1)]
        
        if len(gamma[gamma>0])==0:
            y_fit_arr[days,:] = 0.5
            continue

        try:
            _ = fit_model.fit_variogram(bins_fit, gamma_fit,  nugget=0, sill=0.50)#)#
            y_fit = fit_model.variogram(bins.values)
            scale_arr[days] = fit_model.len_scale

        except RuntimeError:

            try: 
                ## adjust the cape a little bit so the code can pass through
                _ = fit_model.fit_variogram(bins_fit, gamma_fit,  nugget=0, sill=0.51)#)#
                y_fit = fit_model.variogram(bins.values)
                scale_arr[days] = fit_model.len_scale
            except RuntimeError:
                y_fit,alpha = fit_sci_curve(bins_fit,gamma_fit,bins.values)
                no_fit_days.append(days)
                scale_arr[days] = alpha

        y_fit_arr[days,:] = y_fit
    scale_dict = {"extreme_dates": np.unique(df_bins["Date"]), "scale": scale_arr}


    df_scale = pd.DataFrame.from_dict(scale_dict)
    df_scale.to_csv(bins_file.replace("bins","scale"))

In [6]:
files = sorted(glob("./data/all_AU_p90/*_bins_list_all_events.csv"))

In [7]:
max_pool = 14
with Pool(max_pool) as p:
    pool_outputs = list(
        tqdm(
            p.imap(generate_scale_file,
                   files),
            total=len(files),
            position=0, leave=True
        )
    )
p.join()

100%|██████████| 2/2 [00:12<00:00,  6.01s/it]


removed 49084, 78046, 78047