In [1]:
import pandas as pd

# Set the directory path and filenames
dir_path = 'C:/1. Power grid frequency data/'
file_names = ['ES_PM01.csv', 'IS02.csv', 'IRL01.csv']

# Load the CSV files into dataframes
dfs = []
for file_name in file_names:
    file_path = dir_path + file_name
    df = pd.read_csv(file_path, sep=';', 
                     usecols=[0,1,2], names=['Time','f50','QI'],
                     header=0)
    dfs.append(df)


# Only select quality QI=0
flt_dfs = []
for df in dfs:
    flt_df = df[df['QI'] == 0]
    flt_dfs.append(flt_df)
    
region_dict = {0: 'Balearic', 1: 'Iceland', 2: 'Ireland'}

# Group the dataframes by region
region_groups = [df.groupby(lambda x: region_dict[i]) for i, df in enumerate(flt_dfs)]

In [3]:
skewness = [g.skew(numeric_only=True)['f50'] for g in region_groups]
kurtosis = [g['f50'].apply(pd.Series.kurtosis) for g in region_groups]
std_devs = [g.std(numeric_only=True)['f50'] for g in region_groups]
means = [g.mean(numeric_only=True)['f50'] for g in region_groups]

# Calculate the standardized third and fourth moments around the mean for each region
std_third_moments = [(skewness[i] * std_devs[i]**3) / len(list(region_groups[i]['f50'])) for i in range(len(region_groups))]
std_fourth_moments = [(kurtosis[i] * std_devs[i]**4) / len(list(region_groups[i]['f50'])) for i in range(len(region_groups))]

# Calculate the square of the standardized third moment around the mean for each region
square_std_third_moments = [std_third_moments[i]**2 for i in range(len(region_groups))]

# Calculate the difference between the standardized fourth moment around the mean and the square of the standardized third moment around the mean for each region
euro_differences = [std_fourth_moments[i] - square_std_third_moments[i] for i in range(len(region_groups))]

In [4]:
import diptest

euro_dip=[]
for i, df in enumerate (flt_dfs):
    dip,pval = diptest.diptest(df['f50'].astype(float))
    region=region_dict[i]
    euro_dip.append((dip))
    
%store euro_dip