In [1]:
import xarray as xr
import numpy as np
import os
from glob import glob
from tqdm import tqdm
import pandas as pd
from geopy.distance import geodesic 
import math
from sklearn.metrics.pairwise import haversine_distances
from math import radians
from multiprocess import Pool
import warnings

In [2]:
from gstools import CovModel

class Stab(CovModel):
    def variogram(self, r):
        
        return self.nugget + self.sill * (1 - np.exp(-(3 * r) / self.len_scale))

In [3]:
from scipy.optimize import curve_fit

# Define the piecewise function
def custom_curve(h, c, b, alpha):
    if np.isscalar(h):
        # Handle the scalar case
        if h == 0:
            return 0
        else:
            return c + b * (1 - np.exp(-3 * h / alpha))
    else:
        # Handle array inputs
        return np.where(h == 0, 0, c + b * (1 - np.exp(-3 * h / alpha)))

def fit_sci_curve(h_values, y_values,bins):

    # Use curve_fit to fit the custom function to the data
    # Initial guess for c, b, and alpha
    initial_guess = [0, 0.5, 5]  
    
    # Perform the curve fitting
    params, covariance = curve_fit(custom_curve, h_values, y_values, p0=initial_guess)
    
    # Extract the fitted parameters
    c_fitted, b_fitted, alpha_fitted = params
    
    y_fit = custom_curve(bins, c_fitted, b_fitted, alpha_fitted)
    return y_fit, alpha_fitted

In [10]:
def calc_ratio_gamma(N11,N10):
    with warnings.catch_warnings():
        warnings.filterwarnings("ignore", category=RuntimeWarning)
        ratio = np.where(N10 + N11 != 0, 0.5 * (N10 / (N10 + N11)), np.nan)
    return ratio

In [47]:
def calc_alpha(gamma_fit,bins_fit,bins):
    # gamma_fit = gamma[~np.isnan(gamma)]
    # bins_fit = bins[~np.isnan(bins)]
    y_fit = 0
    scale = 0
    try:
        _ = fit_model.fit_variogram(bins_fit, gamma_fit,  nugget=0, sill=0.5)#)#
        y_fit = fit_model.variogram(bins)
        scale = fit_model.len_scale
    except RuntimeError:
        try: 
            ## adjust the cape a little bit so the code can pass through
            _ = fit_model.fit_variogram(bins_fit, gamma_fit,  nugget=0, sill=0.51)#)#
            y_fit = fit_model.variogram(bins)
            scale = fit_model.len_scale
        except RuntimeError:
            y_fit,alpha = fit_sci_curve(bins_fit,gamma_fit,bins)
            # no_fit_days.append(days)
            scale = alpha
    return y_fit, scale

In [34]:

def calc_pair_alpha(gamma, N11, N10): 
    gamma_fit = gamma.copy()
    bins_fit = bins.copy().astype(float)
    gamma_fit[N11+N10<=2] = np.nan
    bins_fit[N11+N10<=2] = np.nan
    ## make sure start with zeros
    # gamma_fit[0] = 0
    # bins_fit[0] =0
    ## remove NaNs 
    gamma_fit1 = gamma_fit[~np.isnan(gamma_fit)]
    bins_fit1 = bins_fit[~np.isnan(bins_fit)]
    ## in case all NaNs occured
    if len(gamma_fit1)<=2:
        y_fit = np.zeros_like(bins)
        scale = 0
    else:
        y_fit, scale = calc_alpha(gamma_fit1, bins_fit1, bins)

    return y_fit, scale

In [7]:
## my own directory
os.chdir("/g/data/k10/dl6968/Semi-variogram_AU/")


In [8]:
fit_model = Stab(dim=2)#gs.Stable(dim=1)


In [9]:
# Define distance bins
bins = np.arange(5, 360, 10)


In [43]:
files = sorted(glob("./data/all_AU_p90/*_pair_bins_list_all_events.csv"))

In [44]:
def fix_gamma(file):
    df_bins = pd.read_csv(file)
    df_scale = pd.read_csv(file.replace("bins","scale"))
    df_bins.loc[:,"gamma"] = calc_ratio_gamma(df_bins["N11"].values,df_bins["N10"].values)
    y_fit_list = []
    scale_list = []
    for days in np.unique(df_bins["Day"]):
        gamma = df_bins[df_bins["Day"]==days]["gamma"].values
        N11 = df_bins[df_bins["Day"]==days]["N11"].values
        N10 = df_bins[df_bins["Day"]==days]["N10"].values
        y_fit, scale = calc_pair_alpha(gamma, N11, N10)
        y_fit_list.append(y_fit)
        scale_list.append(scale)

    df_bins.loc[:,"y_fit"] = np.concatenate(y_fit_list)
    df_scale.loc[:,"scale"] = scale_list
    
    df_bins.to_csv(file)
    df_scale.to_csv(file.replace("bins","scale"))

In [48]:
max_pool = 28
with Pool(max_pool) as p:
    pool_outputs = list(
        tqdm(
            p.imap(fix_gamma,
                   files),
            total=len(files),
            position=0, leave=True
        )
    )
p.join()

100%|██████████| 9841/9841 [26:49<00:00,  6.11it/s]
