Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#510 #511

Merged
merged 5 commits into from
May 1, 2024
Merged

#510 #511

Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 20 additions & 9 deletions chainladder/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,8 +256,7 @@ def _to_datetime(data, fields, period_end=False, format=None):
def _development_lag(origin, valuation):
"""For tabular format, this will convert the origin/valuation
difference to a development lag"""
return ((valuation - origin) / (365.25/12)).dt.round('1d').dt.days

return ((valuation - origin) / (365.25 / 12)).dt.round("1d").dt.days

@staticmethod
def _get_grain(dates, trailing=False, kind="origin"):
Expand All @@ -274,9 +273,19 @@ def _get_grain(dates, trailing=False, kind="origin"):
months = dates.dt.month.unique()
diffs = np.diff(np.sort(months))
if len(dates.unique()) == 1:
grain = "Y" if float('.'.join(pd.__version__.split('.')[:-1])) < 2.2 else "A"
grain = (
"Y"
if version.Version(pd.__version__) >= version.Version("2.2.0")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this be <= not >=?

else "A"
)

elif len(months) == 1:
grain = "Y" if float('.'.join(pd.__version__.split('.')[:-1])) < 2.2 else "A"
grain = (
"Y"
if version.Version(pd.__version__) >= version.Version("2.2.0")
else "A"
)

elif np.all(diffs == 6):
grain = "2Q"
elif np.all(diffs == 3):
Expand Down Expand Up @@ -402,7 +411,7 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
return obj
else:
raise NotImplementedError()

def _interchange_dataframe(self, data):
"""
Convert an object supporting the __dataframe__ protocol to a pandas DataFrame.
Expand All @@ -420,12 +429,14 @@ def _interchange_dataframe(self, data):
# Check if pandas version is greater than 1.5.2
if version.parse(pd.__version__) >= version.parse("1.5.2"):
return pd.api.interchange.from_dataframe(data)

else:
# Raise an error prompting the user to upgrade pandas
raise NotImplementedError("Your version of pandas does not support the DataFrame interchange API. "
"Please upgrade pandas to a version greater than 1.5.2 to use this feature.")

raise NotImplementedError(
"Your version of pandas does not support the DataFrame interchange API. "
"Please upgrade pandas to a version greater than 1.5.2 to use this feature."
)

def __array_function__(self, func, types, args, kwargs):
from chainladder.utils.utility_functions import concat

Expand Down
94 changes: 27 additions & 67 deletions chainladder/core/correlation.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,7 @@

from scipy.special import comb

from scipy.stats import (
binom,
norm,
rankdata
)
from scipy.stats import binom, norm, rankdata

from typing import TYPE_CHECKING

Expand Down Expand Up @@ -52,11 +48,7 @@
to be significant.
"""

def __init__(
self,
triangle,
p_critical: float = 0.5
):
def __init__(self, triangle, p_critical: float = 0.5):
self.p_critical = p_critical

# Check that critical value is a probability
Expand All @@ -69,19 +61,15 @@
m1 = triangle.link_ratio

# Rank link ratios by development period, assigning a score of 1 for the lowest
m1_val = xp.apply_along_axis(
func1d=rankdata,
axis=2,
arr=m1.values
) * (m1.values * 0 + 1)
m1_val = xp.apply_along_axis(func1d=rankdata, axis=2, arr=m1.values) * (

Check warning on line 64 in chainladder/core/correlation.py

View check run for this annotation

Codecov / codecov/patch

chainladder/core/correlation.py#L64

Added line #L64 was not covered by tests
m1.values * 0 + 1
)

# Remove the last element from each column, and then rank again
m2 = triangle[triangle.valuation < triangle.valuation_date].link_ratio
m2.values = xp.apply_along_axis(
func1d=rankdata,
axis=2,
arr=m2.values
) * (m2.values * 0 + 1)
m2.values = xp.apply_along_axis(func1d=rankdata, axis=2, arr=m2.values) * (

Check warning on line 70 in chainladder/core/correlation.py

View check run for this annotation

Codecov / codecov/patch

chainladder/core/correlation.py#L70

Added line #L70 was not covered by tests
m2.values * 0 + 1
)

m1 = m2.copy()

Expand Down Expand Up @@ -122,33 +110,20 @@
self.t_variance = 2 / ((I - 2) * (I - 3))

# array of t values
self.t = pd.DataFrame(
self.t[0, 0, ...],
columns=k,
index=["T_k"]
)
self.t = pd.DataFrame(self.t[0, 0, ...], columns=k, index=["T_k"])

Check warning on line 113 in chainladder/core/correlation.py

View check run for this annotation

Codecov / codecov/patch

chainladder/core/correlation.py#L113

Added line #L113 was not covered by tests

# array of weights
self.weights = pd.DataFrame(
weight[0, 0, ...],
columns=k,
index=["I-k-1"]
)
self.weights = pd.DataFrame(weight[0, 0, ...], columns=k, index=["I-k-1"])

Check warning on line 116 in chainladder/core/correlation.py

View check run for this annotation

Codecov / codecov/patch

chainladder/core/correlation.py#L116

Added line #L116 was not covered by tests

# final big T
self.t_expectation = pd.DataFrame(
t_expectation[..., 0, 0],
columns=triangle.vdims,
index=idx
t_expectation[..., 0, 0], columns=triangle.vdims, index=idx
)

# table of Spearman's rank coefficients Tk, can be used to verify consistency with paper
self.corr = pd.concat([
self.t,
self.weights
])
self.corr = pd.concat([self.t, self.weights])

Check warning on line 124 in chainladder/core/correlation.py

View check run for this annotation

Codecov / codecov/patch

chainladder/core/correlation.py#L124

Added line #L124 was not covered by tests

self.corr.columns.names = ['k']
self.corr.columns.names = ["k"]

Check warning on line 126 in chainladder/core/correlation.py

View check run for this annotation

Codecov / codecov/patch

chainladder/core/correlation.py#L126

Added line #L126 was not covered by tests

# construct confidence interval based on selection of p_critical
self.confidence_interval = (
Expand Down Expand Up @@ -198,18 +173,9 @@
The variance value of Z.
"""

def __init__(
self,
triangle: Triangle,
p_critical: float = 0.1,
total: bool = True
):

def pZlower(
z: int,
n: int,
p: float = 0.5
) -> float:
def __init__(self, triangle: Triangle, p_critical: float = 0.1, total: bool = True):

def pZlower(z: int, n: int, p: float = 0.5) -> float:

Check warning on line 178 in chainladder/core/correlation.py

View check run for this annotation

Codecov / codecov/patch

chainladder/core/correlation.py#L178

Added line #L178 was not covered by tests
return min(1, 2 * binom.cdf(z, n, p))

self.p_critical = p_critical
Expand All @@ -223,31 +189,27 @@
lr = triangle.link_ratio

# Rank link ratios for each column
m1 = xp.apply_along_axis(
func1d=rankdata,
axis=2,
arr=lr.values) * (lr.values * 0 + 1)

med = xp.nanmedian(
a=m1,
axis=2,
keepdims=True
m1 = xp.apply_along_axis(func1d=rankdata, axis=2, arr=lr.values) * (

Check warning on line 192 in chainladder/core/correlation.py

View check run for this annotation

Codecov / codecov/patch

chainladder/core/correlation.py#L192

Added line #L192 was not covered by tests
lr.values * 0 + 1
)

med = xp.nanmedian(a=m1, axis=2, keepdims=True)

Check warning on line 196 in chainladder/core/correlation.py

View check run for this annotation

Codecov / codecov/patch

chainladder/core/correlation.py#L196

Added line #L196 was not covered by tests
# print("med:\n", med)

m1large = (xp.nan_to_num(m1) > med) + (lr.values * 0)
m1small = (xp.nan_to_num(m1) < med) + (lr.values * 0)
m2large = triangle.link_ratio
m2large.values = m1large
m2small = triangle.link_ratio
m2small.values = m1small
S = xp.nan_to_num(m2small.dev_to_val().sum(axis=2).set_backend('numpy').values)
L = xp.nan_to_num(m2large.dev_to_val().sum(axis=2).set_backend('numpy').values)
S = xp.nan_to_num(m2small.dev_to_val().sum(axis=2).set_backend("numpy").values)
L = xp.nan_to_num(m2large.dev_to_val().sum(axis=2).set_backend("numpy").values)

Check warning on line 206 in chainladder/core/correlation.py

View check run for this annotation

Codecov / codecov/patch

chainladder/core/correlation.py#L205-L206

Added lines #L205 - L206 were not covered by tests
z = xp.minimum(L, S)
n = L + S
m = xp.floor((n - 1) / 2)
c = comb(n - 1, m)
EZ = (n / 2) - c * n / (2 ** n)
VarZ = n * (n - 1) / 4 - c * n * (n - 1) / (2 ** n) + EZ - EZ ** 2
EZ = (n / 2) - c * n / (2**n)
VarZ = n * (n - 1) / 4 - c * n * (n - 1) / (2**n) + EZ - EZ**2

Check warning on line 212 in chainladder/core/correlation.py

View check run for this annotation

Codecov / codecov/patch

chainladder/core/correlation.py#L211-L212

Added lines #L211 - L212 were not covered by tests
if not self.total:
T = []
for i in range(0, xp.max(m1large.shape[2:]) + 1):
Expand Down Expand Up @@ -296,9 +258,7 @@
)


def validate_critical(
p_critical: float
) -> None:
def validate_critical(p_critical: float) -> None:
"""
Checks whether value passed to the p_critical parameter in ValuationCorrelation or DevelopmentCorrelation
classes is a percentage, that is, between 0 and 1.
Expand All @@ -311,4 +271,4 @@
if 0 <= p_critical <= 1:
pass
else:
raise ValueError('p_critical must be between 0 and 1.')
raise ValueError("p_critical must be between 0 and 1.")

Check warning on line 274 in chainladder/core/correlation.py

View check run for this annotation

Codecov / codecov/patch

chainladder/core/correlation.py#L274

Added line #L274 was not covered by tests
27 changes: 17 additions & 10 deletions chainladder/core/triangle.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import numpy as np
import copy
import warnings
from packaging import version
from chainladder.core.base import TriangleBase
from chainladder.utils.sparse import sp
from chainladder.core.slice import VirtualColumns
Expand Down Expand Up @@ -125,7 +126,7 @@ def __init__(
return
elif not isinstance(data, pd.DataFrame) and hasattr(data, "__dataframe__"):
data = self._interchange_dataframe(data)

index, columns, origin, development = self._input_validation(
data, index, columns, origin, development
)
Expand Down Expand Up @@ -276,7 +277,7 @@ def __init__(
self.ddims = obj.ddims
self.values = obj.values
self.valuation_date = pd.Timestamp(options.ULT_VAL)

@staticmethod
def _split_ult(data, index, columns, origin, development):
"""Deal with triangles with ultimate values"""
Expand Down Expand Up @@ -330,17 +331,21 @@ def origin(self):
if self.is_pattern and len(self.odims) == 1:
return pd.Series(["(All)"])
else:
freq = {"Y": "Y" if float('.'.join(pd.__version__.split('.')[:-1])) < 2.2 else "A",
"S": "2Q", "H": "2Q"}.get(
self.origin_grain, self.origin_grain
)
freq = {
"Y": "Y" if version.Version(pd.__version__) else "A",
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should the conditional be completed?

"S": "2Q",
"H": "2Q",
}.get(self.origin_grain, self.origin_grain)
freq = freq if freq == "M" else freq + "-" + self.origin_close
return pd.DatetimeIndex(self.odims, name="origin").to_period(freq=freq)

@origin.setter
def origin(self, value):
self._len_check(self.origin, value)
freq = {"Y": "Y" if float('.'.join(pd.__version__.split('.')[:-1])) < 2.2 else "A", "S": "2Q"}.get(self.origin_grain, self.origin_grain)
freq = {
"Y": "Y" if float(".".join(pd.__version__.split(".")[:-1])) < 2.2 else "A",
"S": "2Q",
}.get(self.origin_grain, self.origin_grain)
freq = freq if freq == "M" else freq + "-" + self.origin_close
value = pd.PeriodIndex(list(value), freq=freq)
self.odims = value.to_timestamp().values
Expand Down Expand Up @@ -693,9 +698,11 @@ def grain(self, grain="", trailing=False, inplace=False):
obj.origin_close = origin_period_end
d_start = pd.Period(
obj.valuation[0],
freq=dgrain_old
if dgrain_old == "M"
else dgrain_old + obj.origin.freqstr[-4:],
freq=(
dgrain_old
if dgrain_old == "M"
else dgrain_old + obj.origin.freqstr[-4:]
),
).to_timestamp(how="s")
if len(obj.ddims) > 1 and obj.origin.to_timestamp(how="s")[0] != d_start:
addl_ts = (
Expand Down
Loading