Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue 306 pstd roc auc #307

Merged
merged 5 commits into from
May 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion metcalcpy/agg_stat.py
Original file line number Diff line number Diff line change
Expand Up @@ -1181,7 +1181,8 @@ def calculate_stats_and_ci(self):
type=argparse.FileType('r'),
default=sys.stdin)
ARGS = PARSER.parse_args()
PARAMS = yaml.load(ARGS.parameters_file, Loader=yaml.FullLoader)
with ARGS.parameters_file as parameters_file:
PARAMS = yaml.load(parameters_file, Loader=yaml.FullLoader)

AGG_STAT = AggStat(PARAMS)
AGG_STAT.calculate_stats_and_ci()
4 changes: 2 additions & 2 deletions metcalcpy/bootstrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -338,8 +338,8 @@ def _get_confidence_interval_and_value(bootstrap_dist, stat_val, alpha, ci_metho
high = None
else:
bd = bootstrap_dist[bootstrap_dist != _np.array([None])]
low = _np.percentile(bd, 100 * (alpha / 2.), interpolation='linear')
high = _np.percentile(bd, 100 * (1 - alpha / 2.), interpolation='linear')
low = _np.percentile(bd, 100 * (alpha / 2.), method='linear')
high = _np.percentile(bd, 100 * (1 - alpha / 2.), method='linear')
val = stat_val
else:
low = None
Expand Down
10 changes: 9 additions & 1 deletion metcalcpy/util/pstd_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ def calculate_pstd_brier(input_data, columns_names):
o_bar_table = df_pct_perm['oy_i'].sum() / t_table
o_bar = input_data[0, get_column_index_by_name(columns_names, 'o_bar')]

t_table.reset_index(inplace=True, drop=True)

reliability = calc_reliability(t_table, df_pct_perm)
resolution = calc_resolution(t_table, df_pct_perm, o_bar)
uncertainty = calc_uncertainty(o_bar_table)
Expand Down Expand Up @@ -74,6 +76,7 @@ def calculate_pstd_bss_smpl(input_data, columns_names):
o_bar_table = df_pct_perm['oy_i'].sum() / t_table
o_bar = input_data[0, get_column_index_by_name(columns_names, 'o_bar')]

t_table.reset_index(inplace=True, drop=True)
reliability = calc_reliability(t_table, df_pct_perm)
resolution = calc_resolution(t_table, df_pct_perm, o_bar)
uncertainty = calc_uncertainty(o_bar_table)
Expand Down Expand Up @@ -158,7 +161,7 @@ def calculate_pstd_resolution(input_data, columns_names):
df_pct_perm = _calc_common_stats(columns_names, input_data)
o_bar = input_data[0, get_column_index_by_name(columns_names, 'o_bar')]
t_table = df_pct_perm['n_i'].sum()

t_table.reset_index(inplace=True, drop=True)
resolution = calc_resolution(t_table, df_pct_perm, o_bar)
result = round_half_up(resolution, PRECISION)
except (TypeError, ZeroDivisionError, Warning, ValueError):
Expand Down Expand Up @@ -274,6 +277,7 @@ def calculate_pstd_roc_auc(input_data, columns_names):
{'thresh': 0, 'n11': 0, 'n10': 0, 'n01': 0, 'n00': 0, 'pody': 0, 'pofd': 0},
index=[0]) ])

final_roc.reset_index(inplace=True, drop=True)
roc_auc = 0
for index, row in final_roc.iterrows():
if index != 0:
Expand Down Expand Up @@ -352,6 +356,8 @@ def _calc_common_stats(columns_names, input_data):
pct_perm['thresh_i'].append(input_data[0, index])
# calculate vectors and constants to use below
df_pct_perm = pd.DataFrame(pct_perm)
df_pct_perm.reset_index(inplace=True, drop=True)

n_i = [row.oy_i + row.on_i for index, row in df_pct_perm.iterrows()]
df_pct_perm['n_i'] = n_i

Expand Down Expand Up @@ -401,6 +407,8 @@ def _calc_pct_roc(data):
df_roc.loc[df_roc.index[df_roc["thresh"] == thresh], 'n01'] = sum(data[is_less]['oy_i'])
df_roc.loc[df_roc.index[df_roc["thresh"] == thresh], 'n00'] = sum(data[is_less]['on_i'])

df_roc.reset_index(inplace=True, drop=True)

# generate the pody and pofd scores from the contingency tables
df_roc['pody'] = [row.n11 / (row.n11 + row.n01) for index, row in df_roc.iterrows()]
df_roc['pofd'] = [row.n10 / (row.n10 + row.n00) for index, row in df_roc.iterrows()]
Expand Down
Loading