Skip to content

Commit

Permalink
Feature 384 agg stat total dir (#389)
Browse files Browse the repository at this point in the history
* issue #384 tests for get_met_version code

* Added code to calculate the total_dir and determining MET version

* Fixed import of utils module for get_met_version

* Update the vl1l2 data that was produced by the MET stat_analysis tool on vl1l2 data with the total_dir column

* updated config file for vl1l2 total_dir column addition

* modify test_vl1l2 for new column, TOTAL_DIR in linetype

* skip tests for val1l2 and vcnt until new data is being used

* Delete test/data/stat_analysis/point_stat_GRIB1_NAM_GDAS_120000L_20120409_120000V_vl1l2.txt

out of date now that TOTAL_DIR column has been added

* Delete test/vl1l2_agg_stat.yaml

out of date with addition of the TOTAL_DIR column

* Delete test/data/stat_analysis/met_vl1l2_agg.txt

out of date, no longer relevant

* Updated data with TOTAL_DIR column

* updated VL1L2 data with TOTAL_DIR column

* skip test_total_dir test, not yet finished

* Fix directory path

* Fixed more incorrect data paths

* skip the test_total_dir test, data needs to be updated and old files were removed, causing error messages about no files found

* Delete test/data/point_stat/met_v12/point_stat_GRIB2_NAM_NDAS_120000L_20120409_120000V_vcnt.txt

incorrect file

* updates for supporting the TOTAL_DIR column

* Modify path for input data

* udated VAL1L2 data aggregated via MET stat_analysis, contains the TOTAL_DIR column

* support for new TOTAL_DIR column in VAL1L2 linetype for calculating the associated ME, MAE, and MSE stats

* Updates to test for VAL1L2 linetype with new TOTAL_DIR column

* updated VAL1L2 data with TOTAL_DIR column

* Delete test/data/stat_analysis/point_stat_GRIB1_NAM_GDAS_120000L_20120409_120000V_val1l2.txt

out of date

* Delete test/data/stat_analysis/met_val1l2_stat_anal.txt

created with out of date MET data

* update calculation of the dir_xyz stats to use the TOTAL_DIR data

* updated data and tests for VCNT linetype with TOTAL_DIR column

* Updated VCNT data with new TOTAL_DIR column

* Delete Data/dummy_vl1l2.txt

not used

* Delete test/data/stat_analysis/point_stat_GRIB1_NAM_GDAS_120000L_20120409_120000V_vcnt.txt

out of date

* Delete test/data/stat_analysis/met_vcnt_from_vl1l2_aggstat.txt

out of date
  • Loading branch information
bikegeek committed Jun 28, 2024
1 parent 08e24ee commit d7631db
Show file tree
Hide file tree
Showing 22 changed files with 304 additions and 92 deletions.
50 changes: 40 additions & 10 deletions metcalcpy/agg_stat.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
import yaml
import pandas


from metcalcpy import GROUP_SEPARATOR, DATE_TIME_REGEX
from metcalcpy.bootstrap import bootstrap_and_value, BootstrapResults
from metcalcpy.util.ctc_statistics import *
Expand All @@ -54,6 +55,7 @@
from metcalcpy.util.pstd_statistics import *
from metcalcpy.util.rps_statistics import *
from metcalcpy.util.mcts_statistics import *
from metcalcpy.util.utils import get_met_version

from metcalcpy.util.utils import is_string_integer, get_derived_curve_name, \
calc_derived_curve_value, intersection, is_derived_point, parse_bool, \
Expand Down Expand Up @@ -460,39 +462,67 @@ def _prepare_grad_data(self, data_for_prepare):

def _prepare_vl1l2_data(self, data_for_prepare):
"""Prepares vl1l2 data.
Multiplies needed for the statistic calculation columns to the 'total'value
Multiplies needed for the statistic calculation columns to the 'total' value
or 'total_dir' value for MET version 12.0 and above.
Args:
data_for_prepare: a 2d numpy array of values we want to calculate the statistic on
"""
# Determine the MET version for this data. If MET v12.0 or above, use the 'total_dir' column rather than
# the 'total' column.
met_version = get_met_version(data_for_prepare)
major = int(met_version.major)

if self.statistic in self.STATISTIC_TO_FIELDS.keys():
for column in self.STATISTIC_TO_FIELDS[self.statistic]:
data_for_prepare[column] \
= data_for_prepare[column].values * data_for_prepare['total'].values
if major >= int(12):
data_for_prepare[column] \
= data_for_prepare[column].values * data_for_prepare['total_dir'].values
else:
data_for_prepare[column] \
= data_for_prepare[column].values * data_for_prepare['total'].values

def _prepare_val1l2_data(self, data_for_prepare):
"""Prepares val1l2 data.
Multiplies needed for the statistic calculation columns to the 'total' value
Multiplies needed for the statistic calculation columns to the 'total_dir' value
(MET 12.0) or 'total' MET<12.0
Args:
data_for_prepare: a 2d numpy array of values we want to calculate the statistic on
"""
# Determine the MET version for this data. If MET v12.0 or above, use the 'total_dir' column rather than
# the 'total' column.
met_version = get_met_version(data_for_prepare)
major = int(met_version.major)

if self.statistic in self.STATISTIC_TO_FIELDS.keys():
for column in self.STATISTIC_TO_FIELDS[self.statistic]:
data_for_prepare[column] \
= data_for_prepare[column].values * data_for_prepare['total'].values

if major >= int(12):
data_for_prepare[column] \
= data_for_prepare[column].values * data_for_prepare['total_dir'].values
else:
data_for_prepare[column] \
= data_for_prepare[column].values * data_for_prepare['total'].values
def _prepare_vcnt_data(self, data_for_prepare):
"""Prepares vcnt data.
Multiplies needed for the statistic calculation columns to the 'total' value
Multiplies needed for the statistic calculation columns to the 'total_dir' value
Args:
data_for_prepare: a 2d numpy array of values we want to calculate the statistic on
"""
# Determine the MET version for this data. If MET v12.0 or above, use the 'total_dir' column rather than
# the 'total' column.
met_version = get_met_version(data_for_prepare)
major = int(met_version.major)

if self.statistic in self.STATISTIC_TO_FIELDS.keys():
for column in self.STATISTIC_TO_FIELDS[self.statistic]:
data_for_prepare[column] \
= data_for_prepare[column].values * data_for_prepare['total'].values
if major >= int(12):
data_for_prepare[column] \
= data_for_prepare[column].values * data_for_prepare['total_dir'].values
else:
data_for_prepare[column] \
= data_for_prepare[column].values * data_for_prepare['total'].values

def _prepare_ecnt_data(self, data_for_prepare):
"""Prepares ecnt data.
Expand Down
83 changes: 83 additions & 0 deletions metcalcpy/util/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
import pandas as pd
from pandas import DataFrame
import warnings
from dataclasses import make_dataclass
import re

from scipy import stats
from scipy.stats import t, nct
Expand Down Expand Up @@ -454,6 +456,31 @@ def get_total_values(input_data, columns_names, aggregation):
total = sum_column_data_by_name(input_data, columns_names, 'total')
return total

def get_total_dir_values(input_data, columns_names, aggregation):
""" Returns the total value from the TOTAL_DIR column, rather than the TOTAL column
for the VL1L2, VAL1L2, and VCNT linetypes (MET v12.0 and beyond). This is invoked
by the calculate_<linetype>_me|mse|mae in the <linetype>_statistics.py module, where
<linetype> = vl1l2, val1l2, or vcnt.
Args:
input_data: 2-dimensional numpy array with data for the calculation
1st dimension - the row of data frame
2nd dimension - the column of data frame
columns_names: names of the columns for the 2nd dimension as Numpy array
aggregation: if the aggregation on fields was performed
Returns:
1 - if the aggregation was not preformed on the array
sum of all values from 'total_dir' columns
- if the aggregation was preformed on the array
"""

total = 1
if aggregation:
total = sum_column_data_by_name(input_data, columns_names, 'total_dir')
return total

def aggregate_field_values(series_var_val, input_data_frame, line_type):
"""Finds and aggregates statistics for fields with values containing ';'.
Expand Down Expand Up @@ -1390,3 +1417,59 @@ def autocor_coef(data: list) -> Union[None, float]:

n = len(data_valid)
return sx * sy / (sx - (n - 1) * sxx) + sxy / (sxx - sx * sx / (n - 1))


def get_met_version(input_data:Union[pd.DataFrame, np.array], column_names:list=None) -> str:
"""
Determines the version of MET for this data
Args:
@param input_data: The numpy array or pandas dataframe representation of the MET .stat or .text file of data
(e.g. point-stat, grid-stat, stat-analysis, etc.)
@param column_names: An optional list of the column names corresponding to the input_data when the input
data is a numpy array.
Returns:
version: a dataclass containing the major, minor, and bugfix values of the version
"""

if isinstance(input_data, np.ndarray):
if column_names is None:
raise ValueError("numpy array input requires a list of column names.")
else:
lc_column_names = [cur_col.lower() for cur_col in column_names]
df = pd.DataFrame(input_data, index=None, columns=lc_column_names)
elif isinstance(input_data, pd.DataFrame):
df = input_data
# Convert the column names to lower case
cols = df.columns.to_list()
lc_cols = [cur_col.lower() for cur_col in cols]
df.columns = lc_cols
else:
raise ValueError("input data must be either a numpy array or pandas dataframe")

# Get the version from the data (the first row)
versions = df['version'].to_list()
full_version = versions[0]

# Use an immutable (frozen=True) dataclass to hold the major,
# minor, and bugfix values that make up the version number.
Version = make_dataclass("Version",["major", "minor", "bugfix"], frozen=True)

# Parse out the major, minor, and bugfix portions of the version
match = re.match(r'V(\d+).?(\d*).?(\d*)', full_version)
if match:
major = match.group(1)
if match.group(2):
minor = match.group(2)
else:
minor = 0
if match.group(3):
bugfix = match.group(3)
else:
bugfix = 0

version = Version(major, minor, bugfix)

return version


29 changes: 24 additions & 5 deletions metcalcpy/util/val1l2_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@
import warnings
import numpy as np

from metcalcpy.util.utils import round_half_up, sum_column_data_by_name, PRECISION, get_total_values
from metcalcpy.util.utils import round_half_up, sum_column_data_by_name, PRECISION, get_total_values, get_met_version, \
get_total_dir_values

__author__ = 'Tatiana Burek'
__version__ = '0.1.0'
Expand Down Expand Up @@ -91,6 +92,23 @@ def calculate_val1l2_total(input_data, columns_names):
total = sum_column_data_by_name(input_data, columns_names, 'total')
return round_half_up(total, PRECISION)

def calculate_val1l2_total_dir(input_data, columns_names):
"""Performs calculation of Total number of matched pairs for
well-defined forecast and observation wind directions (TOTAL_DIR column)
Args:
input_data: 2-dimensional numpy array with data for the calculation
1st dimension - the row of data frame
2nd dimension - the column of data frame
columns_names: names of the columns for the 2nd dimension as Numpy array
Returns:
calculated Total number of matched pairs as float
or None if some of the data values are missing or invalid
"""
total = sum_column_data_by_name(input_data, columns_names, 'total_dir')
return round_half_up(total, PRECISION)



def calculate_val1l2_dira_me(input_data, columns_names, aggregation=False):
"""Performs calculation of DIRA_ME
Expand All @@ -103,7 +121,7 @@ def calculate_val1l2_dira_me(input_data, columns_names, aggregation=False):
dira_me
"""
try:
total = get_total_values(input_data, np.array(columns_names), aggregation)
total = get_total_dir_values(input_data, np.array(columns_names), aggregation)
result = sum_column_data_by_name(input_data, np.array(columns_names), 'dira_me') / total

result = round_half_up(result, PRECISION)
Expand All @@ -125,7 +143,7 @@ def calculate_val1l2_dira_mae(input_data, columns_names, aggregation=False):
dira_mae statistic
"""
try:
total = get_total_values(input_data, np.array(columns_names), aggregation)
total = get_total_dir_values(input_data, np.array(columns_names), aggregation)
result = sum_column_data_by_name(input_data, np.array(columns_names), 'dira_mae') / total

result = round_half_up(result, PRECISION)
Expand All @@ -146,12 +164,13 @@ def calculate_val1l2_dira_mse(input_data, columns_names, aggregation=False):
dira_mse statistic
"""
try:
total = get_total_values(input_data, np.array(columns_names), aggregation)
total = get_total_dir_values(input_data, np.array(columns_names), aggregation)
result = sum_column_data_by_name(input_data, np.array(columns_names), 'dira_mse') / total

result = round_half_up(result, PRECISION)

except (TypeError, ZeroDivisionError, Warning, ValueError):
result = None
warnings.filterwarnings('ignore')
return result
return result

11 changes: 6 additions & 5 deletions metcalcpy/util/vcnt_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@
import numpy as np

from metcalcpy.util.met_stats import calc_direction, calc_speed
from metcalcpy.util.utils import round_half_up, sum_column_data_by_name, PRECISION, get_total_values
from metcalcpy.util.utils import round_half_up, sum_column_data_by_name, PRECISION, get_total_values, \
get_total_dir_values

__author__ = 'Tatiana Burek'
__version__ = '0.1.0'
Expand Down Expand Up @@ -63,7 +64,7 @@ def calculate_vcnt_obar(input_data, columns_names, aggregation=False):
"""
warnings.filterwarnings('error')
try:
total = get_total_values(input_data, columns_names, aggregation)
total = get_total_dir_values(input_data, columns_names, aggregation)
result = sum_column_data_by_name(input_data, columns_names, 'o_speed_bar') / total
result = round_half_up(result, PRECISION)
except (TypeError, ZeroDivisionError, Warning, ValueError):
Expand Down Expand Up @@ -563,7 +564,7 @@ def calculate_vcnt_anom_corr_uncntr(input_data, columns_names):
def calculate_vcnt_dir_me(input_data, columns_names, aggregation=False):
warnings.filterwarnings('error')
try:
total = get_total_values(input_data, np.array(columns_names), aggregation)
total = get_total_dir_values(input_data, np.array(columns_names), aggregation)
result = sum_column_data_by_name(input_data,np.array(columns_names), 'dir_me') / total

result = round_half_up(result, PRECISION)
Expand All @@ -576,7 +577,7 @@ def calculate_vcnt_dir_me(input_data, columns_names, aggregation=False):
def calculate_vcnt_dir_mae(input_data, columns_names, aggregation=False):
warnings.filterwarnings('error')
try:
total = get_total_values(input_data, np.array(columns_names), aggregation)
total = get_total_dir_values(input_data, np.array(columns_names), aggregation)
result = sum_column_data_by_name(input_data, np.array(columns_names), 'dir_mae') / total

result = round_half_up(result, PRECISION)
Expand All @@ -590,7 +591,7 @@ def calculate_vcnt_dir_mae(input_data, columns_names, aggregation=False):
def calculate_vcnt_dir_mse(input_data, columns_names, aggregation=False):
warnings.filterwarnings('error')
try:
total = get_total_values(input_data, np.array(columns_names), aggregation)
total = get_total_dir_values(input_data, np.array(columns_names), aggregation)
result = sum_column_data_by_name(input_data, np.array(columns_names), 'dir_mse') / total

result = round_half_up(result, PRECISION)
Expand Down
33 changes: 18 additions & 15 deletions metcalcpy/util/vl1l2_statistics.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
# ============================*
# ** Copyright UCAR (c) 2020
# ** University Corporation for Atmospheric Research (UCAR)
# ** National Center for Atmospheric Research (NCAR)
# ** Research Applications Lab (RAL)
# ** P.O.Box 3000, Boulder, Colorado, 80307-3000, USA
# ============================*



# ** Copyright UCAR (c) 2020
# ** University Corporation for Atmospheric Research (UCAR)
# ** National Center for Atmospheric Research (NCAR)
# ** Research Applications Lab (RAL)
# ** P.O.Box 3000, Boulder, Colorado, 80307-3000, USA
# ============================*


"""
Program Name: vl1l2_statistics.py
"""
import warnings
import numpy as np

from metcalcpy.util.met_stats import calc_speed
from metcalcpy.util.utils import round_half_up, sum_column_data_by_name, PRECISION, get_total_values
from metcalcpy.util.utils import round_half_up, sum_column_data_by_name, PRECISION, get_total_values, \
get_total_dir_values

__author__ = 'Tatiana Burek'
__version__ = '0.1.0'
Expand Down Expand Up @@ -250,8 +250,9 @@ def calculate_vl1l2_total(input_data, columns_names):
total = sum_column_data_by_name(input_data, columns_names, 'total')
return round_half_up(total, PRECISION)


def calculate_vl1l2_dir_me(input_data, columns_names, aggregation=False):
"""Performs calculation of DIR_ME
"""Performs calculation of DIR_ME, which was added in MET v12.0
Args:
input_data: 2-dimensional numpy array with data for the calculation
1st dimension - the row of data frame
Expand All @@ -261,7 +262,7 @@ def calculate_vl1l2_dir_me(input_data, columns_names, aggregation=False):
dir_me
"""
try:
total = get_total_values(input_data, np.array(columns_names), aggregation)
total = get_total_dir_values(input_data, np.array(columns_names), aggregation)
result = sum_column_data_by_name(input_data, np.array(columns_names), 'dir_me') / total

result = round_half_up(result, PRECISION)
Expand All @@ -283,7 +284,7 @@ def calculate_vl1l2_dir_mae(input_data, columns_names, aggregation=False):
dir_mae statistic
"""
try:
total = get_total_values(input_data, np.array(columns_names), aggregation)
total = get_total_dir_values(input_data, np.array(columns_names), aggregation)
result = sum_column_data_by_name(input_data, np.array(columns_names), 'dir_mae') / total

result = round_half_up(result, PRECISION)
Expand All @@ -293,6 +294,7 @@ def calculate_vl1l2_dir_mae(input_data, columns_names, aggregation=False):
warnings.filterwarnings('ignore')
return result


def calculate_vl1l2_dir_mse(input_data, columns_names, aggregation=False):
"""Performs calculation of DIR_MSE
Args:
Expand All @@ -304,12 +306,13 @@ def calculate_vl1l2_dir_mse(input_data, columns_names, aggregation=False):
dir_mse statistic
"""
try:
total = get_total_values(input_data, np.array(columns_names), aggregation)
total = get_total_dir_values(input_data, np.array(columns_names), aggregation)
result = sum_column_data_by_name(input_data, np.array(columns_names), 'dir_mse') / total

result = round_half_up(result, PRECISION)

except (TypeError, ZeroDivisionError, Warning, ValueError):
result = None
warnings.filterwarnings('ignore')
return result
return result

Loading

0 comments on commit d7631db

Please sign in to comment.