In [1]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt


folder_path = 'C:/1.1 Asia frequency data'
file_names = os.listdir(folder_path)

dataframes=[]
for file_name in file_names:
    file_path=os.path.join(folder_path, file_name)
    df=pd.read_csv(file_path, sep=';') 
    df.iloc[:,1]=df.iloc[:,1]/1000+50
    dataframes.append(df)


# dataframes[0] is Australia, [1] is Indonesia, [2] is Malaysia01, [3] is Malaysia02, [4] is Sinagpore



# combine dataframe[2] and dataframe[3]
combined_df = pd.concat([dataframes[2], dataframes[3]], ignore_index=True)

# replace dataframe[2] and dataframe[3] with combined_df in dataframes list
dataframes[2:4] = [combined_df]

# dataframes[0] is Australia
# dataframes[1] is Indonesia
# dataframes[2] is Malaysia
# dataframes[3] is Sinagpore


# Only select quality QI=0
flt_dataframes = []
for df in dataframes:
    flt_df = df[df['QI'] == 0]
    flt_dataframes.append(flt_df) 
    
region_dict = {0: 'Australia', 1: 'Indonesia', 2: 'Malaysia', 3: 'Singapore'}

# Group the dataframes by region
region_groups = [df.groupby(lambda x: region_dict[i]) for i, df in enumerate(flt_dataframes)]

In [3]:
from sklearn.metrics import mean_squared_error, mean_absolute_error

def LTtest (data):
    # Compute Fourier transform 
    fft_0 = np.fft.fft(data)
    
    # Randomize phases of Fourier coefficients
    rand_phases = np.random.uniform(0, 2*np.pi, size=len(fft_0))
    surrogate = np.abs(fft_0) * np.exp(1j * rand_phases)
    
    # Compute inverse Fourier transform to obtain surrogate data
    surrogate = np.real(np.fft.ifft(surrogate))
    
    data = data[~np.isnan(data)]
    L = len(data)
    
    tau = np.arange(1, 82801,900 )
    #tau = np.arange(0, L//2, 3600)
    res_1 = np.zeros(len(tau))
    res_2 = np.zeros(len(tau))
    surr_1 = np.zeros(len(tau))
    surr_2 = np.zeros(len(tau))

    for i in range(len(tau)):
        x_t = data[0 :L-tau[i]]
        x_tau = data[tau[i]:L]
        y_t = surrogate[0:(L-tau[i])]
        y_tau = surrogate[(tau[i]):L]


        # First method LT1
        res_1[i] = np.mean(x_t**2 * x_tau)-np.mean(x_t * x_tau**2)
        surr_1[i] = np.mean(y_t**2 * y_tau)-np.mean(y_t *y_tau**2)

        # Second method LT2
        res_2[i] = np.mean((x_t-x_tau)**3)/np.mean((x_t-x_tau)**2)
        surr_2[i] = np.mean((y_t-y_tau)**3)/np.mean((y_t-y_tau)**2)
        
    # Calculate the rmse(LT1)
    mse_lt1 = mean_squared_error(np.nan_to_num(res_1), np.nan_to_num(surr_1))
    rmse_lt1 = np.sqrt(mse_lt1)
        
    # Calculate the rmse(LT2)
    mse_lt2 = mean_squared_error(np.nan_to_num(res_2), np.nan_to_num(surr_2))
    rmse_lt2 = np.sqrt(mse_lt2)
    
    return res_1,surr_1,res_2,surr_2,rmse_lt1,rmse_lt2

In [4]:
aus_res_1,aus_surr_1,aus_res_2,aus_surr_2,aus_rmse_lt1,aus_rmse_lt2 = LTtest(flt_dataframes[0].f50.dropna().values)
idn_res_1,idn_surr_1,idn_res_2,idn_surr_2,idn_rmse_lt1,idn_rmse_lt2 = LTtest(flt_dataframes[1].f50.dropna().values)
mys_res_1,mys_surr_1,mys_res_2,mys_surr_2,mys_rmse_lt1,mys_rmse_lt2 = LTtest(flt_dataframes[2].f50.dropna().values)
sgp_res_1,sgp_surr_1,sgp_res_2,sgp_surr_2,sgp_rmse_lt1,sgp_rmse_lt2 = LTtest(flt_dataframes[3].f50.dropna().values)

In [5]:
asia_rmse_lt1=[aus_rmse_lt1,idn_rmse_lt1,mys_rmse_lt1,sgp_rmse_lt1]
asia_rmse_lt2=[aus_rmse_lt2,idn_rmse_lt2,mys_rmse_lt2,sgp_rmse_lt2]
%store asia_rmse_lt1
%store asia_rmse_lt2

Stored 'asia_rmse_lt1' (list)
Stored 'asia_rmse_lt2' (list)
