In [1]:
import sys
import os
import pandas as pd
import seaborn as sns

# Add the path to the sys.path
path = '/home/lexi/WeatherValidation/WeatherValidation/'
if path not in sys.path:
    sys.path.append(path)
from duplexity.deterministic_score import DeterministicScore,CategoricalScore
from duplexity.metric_map import grid_point_calculate,plot_metrics_map

from datetime import datetime, timedelta
import time
import numpy as np
import tqdm

import matplotlib.pyplot as plt



### Load raw data

In [2]:
# Define the directory containing the .npz files
data_dir = "/mnt/local/ERA5_example/"
# Get all .npz files in the directory
file_list = [os.path.join(data_dir, f) for f in os.listdir(data_dir) if f.endswith('.npy')]
m = len(file_list)
print(f"Number of files found: {m}")


Number of files found: 3600


In [3]:
# Initialize lists to store observed and output data
observed_list = []
output_list = []

# Load data from each file
for file in file_list:
    observed = np.load(file)
    output = observed + np.random.normal(0, 0.1, observed.shape)
    observed_list.append(observed)
    output_list.append(output)

# Convert lists to numpy arrays
observed_data = np.array(observed_list)
output_data = np.array(output_list)

print(f"Loaded observed data shape: {observed_data.shape}")
print(f"Loaded output data shape: {output_data.shape}")


Loaded observed data shape: (3600, 192, 144)
Loaded output data shape: (3600, 192, 144)


In [4]:
# Set a threshold for binary classification
threshold = 0.5

# Initialize the DeterministicScore class for continuous data
det_score = DeterministicScore(observed=observed_data, output=output_data)
cat_score = CategoricalScore(observed=observed_data, output=output_data, threshold=threshold)


In [5]:

# Calculate and print all continuous metrics
det_metrics = det_score.calculate_metrics()
print("Continuous Metrics:")
for metric, value in det_metrics.items():
    print(f"{metric}: {value}")

# Calculate and print all categorical metrics
cat_metrics = cat_score.calculate_metrics()
print("\nCategorical Metrics:")
for metric, value in cat_metrics.items():
    print(f"{metric}: {value}")


Continuous Metrics:
MSE: 0.00999814198170731
RMSE: 0.09999070947696746
MAE: 0.07978075964320808
Bias: -1.2706922263116837e-05
Correlation: 0.9040225044740102

Categorical Metrics:
Accuracy: 0.9982894181616513
CSI: 0.8855780138602896
ETS: 0.8840372480110107
FAR: 0.07017507959446338
POD: 0.9490054668893464
GSS: 0.8840372480110107
HSS: 0.938449862330796
PSS: 0.9479921523968502
SEDI: 0.9892472837877483


### Metric map

In [5]:
metrics = grid_point_calculate(observed_list, output_list)


TypeError: DeterministicScore.mae() takes 1 positional argument but 3 were given

In [None]:
plot_metrics_map(metrics, metric_name: str, title: str, save_path: str = None, vminvmax: tuple = None, camp: str = 'viridis', land_mask: np.array = None)
