From a4caceb721a958f9b9ae7c37d6a7fc053cbeae90 Mon Sep 17 00:00:00 2001 From: Kenneth Hsu Date: Wed, 13 Dec 2023 18:33:11 -0800 Subject: [PATCH 01/17] Added labels --- chainladder/core/triangle.py | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/chainladder/core/triangle.py b/chainladder/core/triangle.py index 25213529..9bea5e47 100644 --- a/chainladder/core/triangle.py +++ b/chainladder/core/triangle.py @@ -128,6 +128,9 @@ def __init__( data, index, columns, origin, development ) + self.columns_label = columns + self.origin_label = origin + # Handle any ultimate vectors in triangles separately data, ult = self._split_ult(data, index, columns, origin, development) # Conform origins and developments to datetimes and determine lowest grains @@ -170,6 +173,7 @@ def __init__( # Deal with labels if not index: index = ["Total"] + self.index_label = index data_agg[index[0]] = "Total" self.kdims, key_idx = self._set_kdims(data_agg, index) @@ -670,8 +674,8 @@ def grain(self, grain="", trailing=False, inplace=False): obj = self.dev_to_val() if ograin_new != ograin_old: freq = {"Y": "A", "S": "2Q"}.get(ograin_new, ograin_new) - if trailing or (obj.origin.freqstr[-3:] != "DEC" and ograin_old != 'M'): - origin_period_end = self.origin[-1].strftime("%b").upper() + if trailing or (obj.origin.freqstr[-3:] != "DEC" and ograin_old != "M"): + origin_period_end = self.origin[-1].strftime("%b").upper() else: origin_period_end = "DEC" indices = ( @@ -685,12 +689,16 @@ def grain(self, grain="", trailing=False, inplace=False): obj = obj.groupby(groups, axis=2).sum() obj.origin_close = origin_period_end d_start = pd.Period( - obj.valuation[0], - freq=dgrain_old if dgrain_old == 'M' else dgrain_old + obj.origin.freqstr[-4:] - ).to_timestamp(how='s') - if (len(obj.ddims) > 1 and obj.origin.to_timestamp(how='s')[0] != d_start): + obj.valuation[0], + freq=dgrain_old + if dgrain_old == "M" + else dgrain_old + obj.origin.freqstr[-4:], + ).to_timestamp(how="s") + if len(obj.ddims) > 1 and obj.origin.to_timestamp(how="s")[0] != d_start: addl_ts = ( - pd.period_range(obj.odims[0], obj.valuation[0], freq=dgrain_old)[:-1] + pd.period_range(obj.odims[0], obj.valuation[0], freq=dgrain_old)[ + :-1 + ] .to_timestamp() .values ) From f8cef11e80fab79f6d64414ac04f8c7f4caeddf7 Mon Sep 17 00:00:00 2001 From: Kenneth Hsu Date: Sat, 16 Dec 2023 10:12:51 -0800 Subject: [PATCH 02/17] Tracing --- chainladder/development/base.py | 341 +++++++++++++++++++------------- 1 file changed, 203 insertions(+), 138 deletions(-) diff --git a/chainladder/development/base.py b/chainladder/development/base.py index 2bf8d7a9..095109a6 100644 --- a/chainladder/development/base.py +++ b/chainladder/development/base.py @@ -12,17 +12,17 @@ class DevelopmentBase(BaseEstimator, TransformerMixin, EstimatorIO, Common): - - def fit(self,X,y=None,sample_weight=None): + def fit(self, X, y=None, sample_weight=None): average_ = self._validate_assumption(y, self.average, axis=3) self.average_ = average_.flatten() exponent = self.xp.array( - [{"regression": 0, "volume": 1, "simple": 2}[x] - for x in average_[0, 0, 0]] + [{"regression": 0, "volume": 1, "simple": 2}[x] for x in average_[0, 0, 0]] ) exponent = self.xp.nan_to_num(exponent * (y * 0 + 1)) w = num_to_nan(sample_weight / (X ** (exponent))) - self.params_ = WeightedRegression(axis=2, thru_orig=True, xp=self.xp).fit(X, y, w) + self.params_ = WeightedRegression(axis=2, thru_orig=True, xp=self.xp).fit( + X, y, w + ) return self def _set_fit_groups(self, X): @@ -74,11 +74,13 @@ def _assign_n_periods_weight_int(X, n_periods): xp = X.get_array_module() dict_map = { - item: _assign_n_periods_weight_int(X, item) for item in set(n_periods.flatten()) + item: _assign_n_periods_weight_int(X, item) + for item in set(n_periods.flatten()) } conc = [ - dict_map[item][..., num : num + 1] for num, item in enumerate(n_periods.flatten()) + dict_map[item][..., num : num + 1] + for num, item in enumerate(n_periods.flatten()) ] return xp.concatenate(tuple(conc), -1) @@ -92,8 +94,9 @@ def _drop_adjustment(self, X, link_ratio): weight = weight * self._drop_valuation(X) if (self.drop_high is not None) | (self.drop_low is not None): - n_periods_ = self._validate_assumption( - X, self.n_periods, axis=3)[0, 0, 0, :-1] + n_periods_ = self._validate_assumption(X, self.n_periods, axis=3)[ + 0, 0, 0, :-1 + ] w_ = self._assign_n_periods_weight(X, n_periods_) w_ = w_.astype("float") @@ -118,7 +121,7 @@ def _drop_adjustment(self, X, link_ratio): # for drop_high and drop_low def _drop_n(self, drop_high, drop_low, X, link_ratio, preserve): - #this is safe because each triangle by index and column has + # this is safe because each triangle by index and column has link_ratios_len = link_ratio.shape[3] def drop_array_helper(drop_type): @@ -147,42 +150,69 @@ def drop_array_helper(drop_type): return drop_type_array - #explicitly setting up 3D arrays for drop parameters to avoid broadcasting bugs - drop_high_array = np.zeros((link_ratio.shape[0],link_ratio.shape[1],link_ratios_len)) - drop_high_array[:,:,:] = drop_array_helper(drop_high) - drop_low_array = np.zeros((link_ratio.shape[0],link_ratio.shape[1],link_ratios_len)) - drop_low_array[:,:,:] = drop_array_helper(drop_low) - n_period_array = np.zeros((link_ratio.shape[0],link_ratio.shape[1],link_ratios_len)) - n_period_array[:,:,:] = drop_array_helper(self.n_periods) - preserve_array = np.zeros((link_ratio.shape[0],link_ratio.shape[1],link_ratios_len)) - preserve_array[:,:,:] = drop_array_helper(preserve) - - #operationalizing the -1 option for n_period + # explicitly setting up 3D arrays for drop parameters to avoid broadcasting bugs + drop_high_array = np.zeros( + (link_ratio.shape[0], link_ratio.shape[1], link_ratios_len) + ) + drop_high_array[:, :, :] = drop_array_helper(drop_high) + drop_low_array = np.zeros( + (link_ratio.shape[0], link_ratio.shape[1], link_ratios_len) + ) + drop_low_array[:, :, :] = drop_array_helper(drop_low) + n_period_array = np.zeros( + (link_ratio.shape[0], link_ratio.shape[1], link_ratios_len) + ) + n_period_array[:, :, :] = drop_array_helper(self.n_periods) + preserve_array = np.zeros( + (link_ratio.shape[0], link_ratio.shape[1], link_ratios_len) + ) + preserve_array[:, :, :] = drop_array_helper(preserve) + + # operationalizing the -1 option for n_period n_period_array = np.where(n_period_array == -1, link_ratios_len, n_period_array) - #ranking factors by itself and volume - link_ratio_ranks = np.lexsort((X.values[...,:-1],link_ratio),axis = 2).argsort(axis=2) + # ranking factors by itself and volume + link_ratio_ranks = np.lexsort((X.values[..., :-1], link_ratio), axis=2).argsort( + axis=2 + ) - #setting up default return - weights = ~np.isnan(link_ratio.transpose((0,1,3,2))) + # setting up default return + weights = ~np.isnan(link_ratio.transpose((0, 1, 3, 2))) - #counting valid factors + # counting valid factors ldf_count = weights.sum(axis=3) - #applying n_period - ldf_count_n_period = np.where(ldf_count > n_period_array, n_period_array, ldf_count) + # applying n_period + ldf_count_n_period = np.where( + ldf_count > n_period_array, n_period_array, ldf_count + ) - #applying drop_high and drop_low + # applying drop_high and drop_low max_rank_unpreserve = ldf_count_n_period - drop_high_array min_rank_unpreserve = drop_low_array - #applying preserve + # applying preserve warning_flag = np.any(max_rank_unpreserve - min_rank_unpreserve < preserve) - max_rank = np.where(max_rank_unpreserve - min_rank_unpreserve < preserve, ldf_count_n_period, max_rank_unpreserve) - min_rank = np.where(max_rank_unpreserve - min_rank_unpreserve < preserve, 0, min_rank_unpreserve) + max_rank = np.where( + max_rank_unpreserve - min_rank_unpreserve < preserve, + ldf_count_n_period, + max_rank_unpreserve, + ) + min_rank = np.where( + max_rank_unpreserve - min_rank_unpreserve < preserve, 0, min_rank_unpreserve + ) - index_array_weights = (link_ratio_ranks.transpose((0,1,3,2)) < max_rank.reshape(max_rank.shape[0],max_rank.shape[1],max_rank.shape[2],1)) & ( - link_ratio_ranks.transpose((0,1,3,2)) > min_rank.reshape(min_rank.shape[0],min_rank.shape[1],min_rank.shape[2],1) - 1 + index_array_weights = ( + link_ratio_ranks.transpose((0, 1, 3, 2)) + < max_rank.reshape( + max_rank.shape[0], max_rank.shape[1], max_rank.shape[2], 1 + ) + ) & ( + link_ratio_ranks.transpose((0, 1, 3, 2)) + > min_rank.reshape( + min_rank.shape[0], min_rank.shape[1], min_rank.shape[2], 1 + ) + - 1 ) weights = index_array_weights @@ -203,11 +233,11 @@ def drop_array_helper(drop_type): ) warnings.warn(warning) - return weights.transpose((0,1,3,2)) + return weights.transpose((0, 1, 3, 2)) # for drop_above and drop_below def _drop_x(self, drop_above, drop_below, X, link_ratio, preserve): - #this is safe because each triangle by index and column has + # this is safe because each triangle by index and column has link_ratios_len = link_ratio.shape[3] def drop_array_helper(drop_type, default_value): @@ -226,31 +256,41 @@ def drop_array_helper(drop_type, default_value): return drop_type_array - #explicitly setting up 3D arrays for drop parameters to avoid broadcasting bugs - drop_above_array = np.zeros((link_ratio.shape[0],link_ratio.shape[1],link_ratios_len)) - drop_above_array[:,:,:] = drop_array_helper(drop_above, np.inf) - drop_below_array = np.zeros((link_ratio.shape[0],link_ratio.shape[1],link_ratios_len)) - drop_below_array[:,:,:] = drop_array_helper(drop_below, 0.0) - preserve_array = np.zeros((link_ratio.shape[0],link_ratio.shape[1],link_ratios_len)) - preserve_array[:,:,:] = drop_array_helper(preserve, preserve) + # explicitly setting up 3D arrays for drop parameters to avoid broadcasting bugs + drop_above_array = np.zeros( + (link_ratio.shape[0], link_ratio.shape[1], link_ratios_len) + ) + drop_above_array[:, :, :] = drop_array_helper(drop_above, np.inf) + drop_below_array = np.zeros( + (link_ratio.shape[0], link_ratio.shape[1], link_ratios_len) + ) + drop_below_array[:, :, :] = drop_array_helper(drop_below, 0.0) + preserve_array = np.zeros( + (link_ratio.shape[0], link_ratio.shape[1], link_ratios_len) + ) + preserve_array[:, :, :] = drop_array_helper(preserve, preserve) - #transposing - link_ratio_T = link_ratio.transpose((0,1,3,2)) + # transposing + link_ratio_T = link_ratio.transpose((0, 1, 3, 2)) - #setting up default return + # setting up default return weights = ~np.isnan(link_ratio_T) - #dropping - index_array_weights = (link_ratio_T < drop_above_array[...,None]) & ( - link_ratio_T > drop_below_array[...,None] + # dropping + index_array_weights = (link_ratio_T < drop_above_array[..., None]) & ( + link_ratio_T > drop_below_array[..., None] ) - #counting remaining factors + # counting remaining factors ldf_count = index_array_weights.sum(axis=3) - #applying preserve + # applying preserve warning_flag = np.any(ldf_count < preserve_array) - weights = np.where(ldf_count[...,None] < preserve_array[...,None], weights, index_array_weights) + weights = np.where( + ldf_count[..., None] < preserve_array[..., None], + weights, + index_array_weights, + ) if warning_flag: if preserve == 1: @@ -268,9 +308,10 @@ def drop_array_helper(drop_type, default_value): ) warnings.warn(warning) - return weights.transpose((0,1,3,2)) + return weights.transpose((0, 1, 3, 2)) def _drop_valuation(self, X): + print("=== in _drop_valuation ===") xp = X.get_array_module() if type(self.drop_valuation) is not list: drop_valuation = [self.drop_valuation] @@ -301,9 +342,9 @@ def _drop(self, X): ] = 0 return arr[:, :-1] - def _param_array_helper(self,size, param, default_value): + def _param_array_helper(self, size, param, default_value): # setting default - param_array = pd.Series(size * [default_value]).astype('object') + param_array = pd.Series(size * [default_value]).astype("object") # only a single parameter is provided if isinstance(param, list): param_array[range(len(param))] = np.array(param) @@ -316,7 +357,7 @@ def _param_array_helper(self,size, param, default_value): param_array = param_array.astype(type(default_value)) return param_array.to_numpy() - def _set_weight_func(self,factor,secondary_rank=None): + def _set_weight_func(self, factor, secondary_rank=None): w = (~np.isnan(factor.values)).astype(float) w = w * self._assign_n_periods_weight_func(factor) if self.drop is not None: @@ -329,17 +370,17 @@ def _set_weight_func(self,factor,secondary_rank=None): w = w * self._drop_x_func(factor) if (self.drop_high is not None) | (self.drop_low is not None): - w = w * self._drop_n_func(factor * num_to_nan(w),secondary_rank) + w = w * self._drop_n_func(factor * num_to_nan(w), secondary_rank) w_tri = factor.copy() w_tri.values = num_to_nan(w) return w_tri - def _assign_n_periods_weight_func(self,factor): + def _assign_n_periods_weight_func(self, factor): """Used to apply the n_periods weight""" - #getting dimensions of factor for various manipulation + # getting dimensions of factor for various manipulation factor_len = factor.shape[3] - #putting n_periods into array - n_periods_array = self._param_array_helper(factor_len,self.n_periods,-1) + # putting n_periods into array + n_periods_array = self._param_array_helper(factor_len, self.n_periods, -1) def _assign_n_periods_weight_int(X, n_periods): xp = X.get_array_module() @@ -362,83 +403,101 @@ def _assign_n_periods_weight_int(X, n_periods): xp = factor.get_array_module() dict_map = { - item: _assign_n_periods_weight_int(factor, item) for item in set(n_periods_array) + item: _assign_n_periods_weight_int(factor, item) + for item in set(n_periods_array) } conc = [ - dict_map[item][..., num : num + 1] for num, item in enumerate(n_periods_array) + dict_map[item][..., num : num + 1] + for num, item in enumerate(n_periods_array) ] return xp.concatenate(tuple(conc), -1) - def _drop_func(self,factor): - #get the appropriate backend for nan_triangle and nan_to_num + def _drop_func(self, factor): + # get the appropriate backend for nan_triangle and nan_to_num xp = factor.get_array_module() - #turn single drop_valuation parameter to list if necessary - drop_list = self.drop if isinstance(self.drop,list) else [self.drop] - #get an starting array of weights + # turn single drop_valuation parameter to list if necessary + drop_list = self.drop if isinstance(self.drop, list) else [self.drop] + # get an starting array of weights arr = factor.nan_triangle.copy() - #accommodate ldf triangle as factor, where the dimensions are '12-24' - dev_list = factor.development.str.split("-",expand=True)[0] if factor.development.dtype == object else factor.development.astype("string") - #create ndarray of drop_list for further operation in numpy + # accommodate ldf triangle as factor, where the dimensions are '12-24' + dev_list = ( + factor.development.str.split("-", expand=True)[0] + if factor.development.dtype == object + else factor.development.astype("string") + ) + # create ndarray of drop_list for further operation in numpy drop_np = np.asarray(drop_list) - #find indices of drop_np - origin_ind = np.where(np.array([factor.origin.astype("string")]) == drop_np[:,[0]])[1] - dev_ind = np.where(np.array([dev_list]) == drop_np[:,[1]])[1] - #set weight of dropped factors to 0 - arr[(origin_ind,dev_ind)] = 0 - return xp.nan_to_num(arr)[None,None] - - def _drop_valuation_func(self,factor): - #get the appropriate backend for nan_to_num + # find indices of drop_np + origin_ind = np.where( + np.array([factor.origin.astype("string")]) == drop_np[:, [0]] + )[1] + dev_ind = np.where(np.array([dev_list]) == drop_np[:, [1]])[1] + # set weight of dropped factors to 0 + arr[(origin_ind, dev_ind)] = 0 + return xp.nan_to_num(arr)[None, None] + + def _drop_valuation_func(self, factor): + # get the appropriate backend for nan_to_num xp = factor.get_array_module() - #turn single drop_valuation parameter to list if necessary - if isinstance(self.drop_valuation,list): + # turn single drop_valuation parameter to list if necessary + if isinstance(self.drop_valuation, list): drop_valuation_list = self.drop_valuation else: drop_valuation_list = [self.drop_valuation] - #turn drop_valuation to same valuation freq as factor - v = pd.PeriodIndex(drop_valuation_list, freq=factor.development_grain).to_timestamp(how="e") - #warn that some drop_valuation are outside of factor + # turn drop_valuation to same valuation freq as factor + v = pd.PeriodIndex( + drop_valuation_list, freq=factor.development_grain + ).to_timestamp(how="e") + # warn that some drop_valuation are outside of factor if np.any(~v.isin(factor.valuation)): warnings.warn("Some valuations could not be dropped.") - #return triangle of 0/1 where dropped factors have 0 - b = xp.nan_to_num(factor.iloc[0,0][~factor.valuation.isin(v)].values * 0 + 1) - #check to make sure some factors are still left + # return triangle of 0/1 where dropped factors have 0 + b = xp.nan_to_num(factor.iloc[0, 0][~factor.valuation.isin(v)].values * 0 + 1) + # check to make sure some factors are still left if b.sum() == 0: raise Exception("The entire triangle has been dropped via drop_valuation.") return b - def _drop_x_func(self,factor): - #getting dimensions of factor for various manipulation + def _drop_x_func(self, factor): + # getting dimensions of factor for various manipulation factor_val = factor.values.copy() factor_len = factor_val.shape[3] indices = factor_val.shape[0] columns = factor_val.shape[1] - #explicitly setting up 3D arrays for drop parameters to avoid broadcasting bugs - drop_above_array = np.zeros((indices,columns,factor_len)) - drop_above_array[:,:,:] = self._param_array_helper(factor_len,self.drop_above, np.inf)[None,None] - drop_below_array = np.zeros((indices,columns,factor_len)) - drop_below_array[:,:,:] = self._param_array_helper(factor_len,self.drop_below, 0.0)[None,None] - preserve_array = np.zeros((indices,columns,factor_len)) - preserve_array[:,:,:] = self._param_array_helper(factor_len,self.preserve,self.preserve)[None,None] - #transposing so columns of factors (same dev age) are in the last index. - #not sure if this is really necessary. will leave for a better dev to find out - factor_val_T = factor_val.transpose((0,1,3,2)) - - #setting up starting array of weights + # explicitly setting up 3D arrays for drop parameters to avoid broadcasting bugs + drop_above_array = np.zeros((indices, columns, factor_len)) + drop_above_array[:, :, :] = self._param_array_helper( + factor_len, self.drop_above, np.inf + )[None, None] + drop_below_array = np.zeros((indices, columns, factor_len)) + drop_below_array[:, :, :] = self._param_array_helper( + factor_len, self.drop_below, 0.0 + )[None, None] + preserve_array = np.zeros((indices, columns, factor_len)) + preserve_array[:, :, :] = self._param_array_helper( + factor_len, self.preserve, self.preserve + )[None, None] + # transposing so columns of factors (same dev age) are in the last index. + # not sure if this is really necessary. will leave for a better dev to find out + factor_val_T = factor_val.transpose((0, 1, 3, 2)) + + # setting up starting array of weights w = ~np.isnan(factor_val_T) - #dropping - index_array_weights = (factor_val_T < drop_above_array[...,None]) & ( - factor_val_T > drop_below_array[...,None] + # dropping + index_array_weights = (factor_val_T < drop_above_array[..., None]) & ( + factor_val_T > drop_below_array[..., None] ) - #counting remaining factors + # counting remaining factors ldf_count = index_array_weights.sum(axis=3) - #applying preserve + # applying preserve warning_flag = np.any(ldf_count < preserve_array) - w = np.where(ldf_count[...,None] < preserve_array[...,None], w, index_array_weights) + w = np.where( + ldf_count[..., None] < preserve_array[..., None], w, index_array_weights + ) if warning_flag: if self.preserve == 1: @@ -456,17 +515,17 @@ def _drop_x_func(self,factor): ) warnings.warn(warning) - return w.transpose((0,1,3,2)).astype(float) + return w.transpose((0, 1, 3, 2)).astype(float) # for drop_high and drop_low - def _drop_n_func(self,factor,secondary_rank=None): - #getting dimensions of factor for various manipulation + def _drop_n_func(self, factor, secondary_rank=None): + # getting dimensions of factor for various manipulation factor_val = factor.values.copy() - #secondary rank is the optional triangle that breaks ties in factor - #the original use case is for dropping the link ratio of 1 with the lowest loss value - #(pass in a reverse rank of loss to drop link of ratio of 1 with the highest loss value) - #leaving to user to ensure that secondary rank is the same dimensions as factor - #also leaving to user to pick whether to trim head or tail + # secondary rank is the optional triangle that breaks ties in factor + # the original use case is for dropping the link ratio of 1 with the lowest loss value + # (pass in a reverse rank of loss to drop link of ratio of 1 with the highest loss value) + # leaving to user to ensure that secondary rank is the same dimensions as factor + # also leaving to user to pick whether to trim head or tail if secondary_rank is None: sec_rank_val = factor_val.copy() else: @@ -475,36 +534,42 @@ def _drop_n_func(self,factor,secondary_rank=None): indices = factor_val.shape[0] columns = factor_val.shape[1] - #explicitly setting up 3D arrays for drop parameters to avoid broadcasting bugs - drop_high_array = np.zeros((indices,columns,factor_len)) - drop_high_array[:,:,:] = self._param_array_helper(factor_len,self.drop_high,0)[None,None] - drop_low_array = np.zeros((indices,columns,factor_len)) - drop_low_array[:,:,:] = self._param_array_helper(factor_len,self.drop_low,0)[None,None] - preserve_array = np.zeros((indices,columns,factor_len)) - preserve_array[:,:,:] = self._param_array_helper(factor_len,self.preserve,self.preserve)[None,None] - - #ranking factors by itself and secondary rank - factor_ranks = np.lexsort((sec_rank_val,factor_val),axis = 2).argsort(axis=2) - - #setting up starting weights - w = ~np.isnan(factor_val.transpose((0,1,3,2))) - - #counting valid factors + # explicitly setting up 3D arrays for drop parameters to avoid broadcasting bugs + drop_high_array = np.zeros((indices, columns, factor_len)) + drop_high_array[:, :, :] = self._param_array_helper( + factor_len, self.drop_high, 0 + )[None, None] + drop_low_array = np.zeros((indices, columns, factor_len)) + drop_low_array[:, :, :] = self._param_array_helper( + factor_len, self.drop_low, 0 + )[None, None] + preserve_array = np.zeros((indices, columns, factor_len)) + preserve_array[:, :, :] = self._param_array_helper( + factor_len, self.preserve, self.preserve + )[None, None] + + # ranking factors by itself and secondary rank + factor_ranks = np.lexsort((sec_rank_val, factor_val), axis=2).argsort(axis=2) + + # setting up starting weights + w = ~np.isnan(factor_val.transpose((0, 1, 3, 2))) + + # counting valid factors ldf_count = w.sum(axis=3) - #getting max index after drop high + # getting max index after drop high max_rank_unpreserve = ldf_count - drop_high_array - #applying preserve + # applying preserve preserve_trigger = (max_rank_unpreserve - drop_low_array) < preserve_array warning_flag = np.any(preserve_trigger) max_rank = np.where(preserve_trigger, ldf_count, max_rank_unpreserve) min_rank = np.where(preserve_trigger, 0, drop_low_array) - #dropping - index_array_weights = (factor_ranks.transpose((0,1,3,2)) < max_rank[...,None]) & ( - factor_ranks.transpose((0,1,3,2)) > min_rank[...,None] - 1 - ) + # dropping + index_array_weights = ( + factor_ranks.transpose((0, 1, 3, 2)) < max_rank[..., None] + ) & (factor_ranks.transpose((0, 1, 3, 2)) > min_rank[..., None] - 1) if warning_flag: if self.preserve == 1: @@ -522,4 +587,4 @@ def _drop_n_func(self,factor,secondary_rank=None): ) warnings.warn(warning) - return index_array_weights.transpose((0,1,3,2)).astype(float) + return index_array_weights.transpose((0, 1, 3, 2)).astype(float) From 8fcd403c68b5492187a0b2c6e756fcac06037f8f Mon Sep 17 00:00:00 2001 From: Kenneth Hsu Date: Thu, 21 Mar 2024 18:52:31 -0700 Subject: [PATCH 03/17] Investigating the bug --- chainladder/utils/utility_functions.py | 36 +++++++++++++++++++------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/chainladder/utils/utility_functions.py b/chainladder/utils/utility_functions.py index 96e3410d..f1ec7035 100644 --- a/chainladder/utils/utility_functions.py +++ b/chainladder/utils/utility_functions.py @@ -161,22 +161,36 @@ def parallelogram_olf( if not end_date: end_date = "{}-12-31".format(date.max().year) start_date = pd.to_datetime(start_date) - pd.tseries.offsets.DateOffset(days=1) + print("start_date:", start_date) + print("end_date", end_date) date_idx = pd.date_range( - start_date - pd.tseries.offsets.DateOffset(years=1), end_date + start_date - pd.tseries.offsets.DateOffset(years=1), + end_date, + freq="MS", ## TO DO, remove FREQ ) - y = pd.Series(np.array(values), np.array(date)) - y = y.reindex(date_idx, fill_value=0) - idx = np.cumprod(y.values + 1) - idx = idx[-1] / idx - y = pd.Series(idx, y.index) - y = y[~((y.index.day == 29) & (y.index.month == 2))] + rate_changes = pd.Series(np.array(values), np.array(date)) + # print("rate_changes:\n", rate_changes) + # y = rate_changes + rate_changes = rate_changes.reindex(date_idx, fill_value=0) + # print("y changes:\n", y) + cum_rate_changes = np.cumprod(1 + rate_changes.values) + crl = cum_rate_changes[-1] + print("crl", crl) + cum_rate_changes = pd.Series(cum_rate_changes, rate_changes.index) + y = cum_rate_changes + # y = y[~((y.index.day == 29) & (y.index.month == 2))] if not vertical_line: - y = y.rolling(365).mean() + y = y.rolling(12).mean() y = (y + y.shift(1).values) / 2 - y = y.iloc[366:] + print("first y\n", y) + y = y.iloc[12:] + print("y 2:\n", y) + print("y groupby:\n", y.groupby(y.index.to_period(grain)).mean().reset_index()) y = y.groupby(y.index.to_period(grain)).mean().reset_index() y.columns = ["Origin", "OLF"] y["Origin"] = y["Origin"].astype(str) + y["OLF"] = crl / y["OLF"] + print("y final:\n", y) return y.set_index("Origin") @@ -377,7 +391,9 @@ def model_diagnostics(model, name=None, groupby=None): latest = obj.X_.sum("development") run_off = obj.full_expectation_.iloc[..., :-1].dev_to_val().cum_to_incr() run_off = run_off[run_off.development > str(obj.X_.valuation_date)] - run_off = run_off.iloc[..., : {"M": 12, "S": 6, "Q": 4, "Y": 1}[obj.X_.development_grain]] + run_off = run_off.iloc[ + ..., : {"M": 12, "S": 6, "Q": 4, "Y": 1}[obj.X_.development_grain] + ] triangles = [] for col in obj.ultimate_.columns: From e6a1c98299626b47d226d08df6da91d51e9c889e Mon Sep 17 00:00:00 2001 From: Kenneth Hsu Date: Sat, 23 Mar 2024 18:12:25 -0700 Subject: [PATCH 04/17] Added approximation_grain --- chainladder/adjustments/parallelogram.py | 11 ++- chainladder/utils/utility_functions.py | 101 ++++++++++++++++++----- 2 files changed, 88 insertions(+), 24 deletions(-) diff --git a/chainladder/adjustments/parallelogram.py b/chainladder/adjustments/parallelogram.py index bdb53b47..4bf6bed7 100644 --- a/chainladder/adjustments/parallelogram.py +++ b/chainladder/adjustments/parallelogram.py @@ -36,11 +36,17 @@ class ParallelogramOLF(BaseEstimator, TransformerMixin, EstimatorIO): """ def __init__( - self, rate_history=None, change_col="", date_col="", vertical_line=False + self, + rate_history=None, + change_col="", + date_col="", + approximation_grain="M", + vertical_line=False, ): self.rate_history = rate_history self.change_col = change_col self.date_col = date_col + self.approximation_grain = approximation_grain self.vertical_line = vertical_line def fit(self, X, y=None, sample_weight=None): @@ -77,6 +83,7 @@ def fit(self, X, y=None, sample_weight=None): end_date=X.origin[-1].to_timestamp(how="e"), grain=X.origin_grain, vertical_line=self.vertical_line, + approximation_grain=self.approximation_grain, ) if len(groups) > 0: @@ -105,7 +112,7 @@ def fit(self, X, y=None, sample_weight=None): return self def transform(self, X, y=None, sample_weight=None): - """ If X and self are of different shapes, align self to X, else + """If X and self are of different shapes, align self to X, else return self. Parameters diff --git a/chainladder/utils/utility_functions.py b/chainladder/utils/utility_functions.py index f1ec7035..b2530806 100644 --- a/chainladder/utils/utility_functions.py +++ b/chainladder/utils/utility_functions.py @@ -152,7 +152,13 @@ def read_json(json_str, array_backend=None): def parallelogram_olf( - values, date, start_date=None, end_date=None, grain="M", vertical_line=False + values, + date, + start_date=None, + end_date=None, + grain="Y", + approximation_grain="M", + vertical_line=False, ): """Parallelogram approach to on-leveling.""" date = pd.to_datetime(date) @@ -162,36 +168,87 @@ def parallelogram_olf( end_date = "{}-12-31".format(date.max().year) start_date = pd.to_datetime(start_date) - pd.tseries.offsets.DateOffset(days=1) print("start_date:", start_date) - print("end_date", end_date) + print("end_date:", end_date) + print("approximation_grain:", approximation_grain) + + date_freq = { + "M": "MS", + "D": "D", + } + try: + date_freq[approximation_grain] + except: + print("grain must be " "M" " or " "D" "") + date_idx = pd.date_range( start_date - pd.tseries.offsets.DateOffset(years=1), end_date, - freq="MS", ## TO DO, remove FREQ + freq=date_freq[approximation_grain], ) + rate_changes = pd.Series(np.array(values), np.array(date)) - # print("rate_changes:\n", rate_changes) - # y = rate_changes rate_changes = rate_changes.reindex(date_idx, fill_value=0) - # print("y changes:\n", y) + cum_rate_changes = np.cumprod(1 + rate_changes.values) - crl = cum_rate_changes[-1] - print("crl", crl) cum_rate_changes = pd.Series(cum_rate_changes, rate_changes.index) - y = cum_rate_changes - # y = y[~((y.index.day == 29) & (y.index.month == 2))] + + crl = cum_rate_changes[-1] + if not vertical_line: - y = y.rolling(12).mean() - y = (y + y.shift(1).values) / 2 - print("first y\n", y) - y = y.iloc[12:] - print("y 2:\n", y) - print("y groupby:\n", y.groupby(y.index.to_period(grain)).mean().reset_index()) - y = y.groupby(y.index.to_period(grain)).mean().reset_index() - y.columns = ["Origin", "OLF"] - y["Origin"] = y["Origin"].astype(str) - y["OLF"] = crl / y["OLF"] - print("y final:\n", y) - return y.set_index("Origin") + rolling_num = { + "M": 12, + "D": 365, + } + + cum_avg_rate_non_leaps = cum_rate_changes.rolling( + rolling_num[approximation_grain] + ).mean() + cum_avg_rate_non_leaps = ( + cum_avg_rate_non_leaps + cum_avg_rate_non_leaps.shift(1).values + ) / 2 + + cum_avg_rate_leaps = cum_rate_changes.rolling( + rolling_num[approximation_grain] + 1 + ).mean() + cum_avg_rate_leaps = ( + cum_avg_rate_leaps + cum_avg_rate_leaps.shift(1).values + ) / 2 + + dropdates_num = { + "M": 12, + "D": 366, + } + cum_avg_rate_non_leaps = cum_avg_rate_non_leaps.iloc[ + dropdates_num[approximation_grain] : + ] + cum_avg_rate_leaps = cum_avg_rate_leaps.iloc[ + dropdates_num[approximation_grain] + 1 : + ] + + fcrl_non_leaps = ( + cum_avg_rate_non_leaps.groupby(cum_avg_rate_non_leaps.index.to_period(grain)) + .mean() + .reset_index() + ) + fcrl_non_leaps.columns = ["Origin", "OLF"] + fcrl_non_leaps["Origin"] = fcrl_non_leaps["Origin"].astype(str) + fcrl_non_leaps["OLF"] = crl / fcrl_non_leaps["OLF"] + print("fcrl_non_leaps final:\n", fcrl_non_leaps) + + fcrl_leaps = ( + cum_avg_rate_leaps.groupby(cum_avg_rate_leaps.index.to_period(grain)) + .mean() + .reset_index() + ) + fcrl_leaps.columns = ["Origin", "OLF"] + fcrl_leaps["Origin"] = fcrl_leaps["Origin"].astype(str) + fcrl_leaps["OLF"] = crl / fcrl_leaps["OLF"] + print("fcrl_leaps final:\n", fcrl_leaps) + + master = fcrl_non_leaps.join(fcrl_leaps, lsuffix="_non_leaps", rsuffix="_leaps") + print("msater", master) + + return fcrl_non_leaps.set_index("Origin") def set_common_backend(objs): From b24b15bed7737a55d9dba03be46e5793ac4b1573 Mon Sep 17 00:00:00 2001 From: Kenneth Hsu Date: Mon, 25 Mar 2024 22:25:48 -0700 Subject: [PATCH 05/17] Clean up debugger --- chainladder/utils/utility_functions.py | 30 +++++++++++++++++--------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/chainladder/utils/utility_functions.py b/chainladder/utils/utility_functions.py index b2530806..b7596197 100644 --- a/chainladder/utils/utility_functions.py +++ b/chainladder/utils/utility_functions.py @@ -167,14 +167,12 @@ def parallelogram_olf( if not end_date: end_date = "{}-12-31".format(date.max().year) start_date = pd.to_datetime(start_date) - pd.tseries.offsets.DateOffset(days=1) - print("start_date:", start_date) - print("end_date:", end_date) - print("approximation_grain:", approximation_grain) date_freq = { "M": "MS", "D": "D", } + try: date_freq[approximation_grain] except: @@ -188,10 +186,8 @@ def parallelogram_olf( rate_changes = pd.Series(np.array(values), np.array(date)) rate_changes = rate_changes.reindex(date_idx, fill_value=0) - cum_rate_changes = np.cumprod(1 + rate_changes.values) cum_rate_changes = pd.Series(cum_rate_changes, rate_changes.index) - crl = cum_rate_changes[-1] if not vertical_line: @@ -233,7 +229,6 @@ def parallelogram_olf( fcrl_non_leaps.columns = ["Origin", "OLF"] fcrl_non_leaps["Origin"] = fcrl_non_leaps["Origin"].astype(str) fcrl_non_leaps["OLF"] = crl / fcrl_non_leaps["OLF"] - print("fcrl_non_leaps final:\n", fcrl_non_leaps) fcrl_leaps = ( cum_avg_rate_leaps.groupby(cum_avg_rate_leaps.index.to_period(grain)) @@ -243,12 +238,27 @@ def parallelogram_olf( fcrl_leaps.columns = ["Origin", "OLF"] fcrl_leaps["Origin"] = fcrl_leaps["Origin"].astype(str) fcrl_leaps["OLF"] = crl / fcrl_leaps["OLF"] - print("fcrl_leaps final:\n", fcrl_leaps) - master = fcrl_non_leaps.join(fcrl_leaps, lsuffix="_non_leaps", rsuffix="_leaps") - print("msater", master) + combined = fcrl_non_leaps.join(fcrl_leaps, lsuffix="_non_leaps", rsuffix="_leaps") + combined["is_leap"] = pd.to_datetime( + combined["Origin_non_leaps"], format="%Y" + ).dt.is_leap_year + + if approximation_grain == "M": + combined["final_OLF"] = combined["OLF_non_leaps"] + else: + combined["final_OLF"] = np.where( + combined["is_leap"], combined["OLF_leaps"], combined["OLF_non_leaps"] + ) + + combined.drop( + ["OLF_non_leaps", "Origin_leaps", "OLF_leaps", "is_leap"], + axis=1, + inplace=True, + ) + combined.columns = ["Origin", "OLF"] - return fcrl_non_leaps.set_index("Origin") + return combined.set_index("Origin") def set_common_backend(objs): From 9c741dde078652af356074f95d0755d9b589d821 Mon Sep 17 00:00:00 2001 From: Kenneth Hsu Date: Tue, 26 Mar 2024 10:27:11 -0700 Subject: [PATCH 06/17] Added test cases --- chainladder/utils/tests/test_utilities.py | 76 +++++++++++++++++++++++ 1 file changed, 76 insertions(+) diff --git a/chainladder/utils/tests/test_utilities.py b/chainladder/utils/tests/test_utilities.py index bf8c4f8c..201fa46e 100644 --- a/chainladder/utils/tests/test_utilities.py +++ b/chainladder/utils/tests/test_utilities.py @@ -2,6 +2,7 @@ from chainladder.utils.cupy import cp import numpy as np import copy +import pandas as pd def test_non_vertical_line(): @@ -13,6 +14,81 @@ def test_non_vertical_line(): cl.parallelogram_olf([0.20], ["7/2/2017"], grain="Y").loc["2017"].iloc[0] - 1 ) assert olf_low < true_olf < olf_high + # Monthly approximation + rate_history = pd.DataFrame( + { + "EffDate": ["2010-07-01", "2011-01-01", "2012-07-01", "2013-04-01"], + "RateChange": [0.035, 0.05, 0.10, -0.01], + } + ) + + data = pd.DataFrame( + {"Year": list(range(2006, 2016)), "EarnedPremium": [10_000] * 10} + ) + + prem_tri = cl.Triangle( + data, origin="Year", columns="EarnedPremium", cumulative=True + ) + prem_tri = cl.ParallelogramOLF( + rate_history, + change_col="RateChange", + date_col="EffDate", + approximation_grain="M", + vertical_line=False, + ).fit_transform(prem_tri) + assert ( + np.round(prem_tri.olf_.to_frame().values, 6).flatten() + == [ + 1.183471, + 1.183471, + 1.183471, + 1.183471, + 1.178316, + 1.120181, + 1.075556, + 1.004236, + 0.999684, + 1.000000, + ] + ).all() + + # Daily approximation + rate_history = pd.DataFrame( + { + "EffDate": ["2010-07-01", "2011-01-01", "2012-07-01", "2013-04-01"], + "RateChange": [0.035, 0.05, 0.10, -0.01], + } + ) + + data = pd.DataFrame( + {"Year": list(range(2006, 2016)), "EarnedPremium": [10_000] * 10} + ) + + prem_tri = cl.Triangle( + data, origin="Year", columns="EarnedPremium", cumulative=True + ) + prem_tri = cl.ParallelogramOLF( + rate_history, + change_col="RateChange", + date_col="EffDate", + approximation_grain="D", + vertical_line=False, + ).fit_transform(prem_tri) + assert ( + np.round(prem_tri.olf_.to_frame().values, 6).flatten() + == [ + 1.183471, + 1.183471, + 1.183471, + 1.183471, + 1.178231, + 1.120105, + 1.075410, + 1.004073, + 0.999693, + 1.000000, + ] + ).all() def test_vertical_line(): From 6c9ad1c6ae39926f96d9660a18d27959a2a36e1d Mon Sep 17 00:00:00 2001 From: Kenneth Hsu Date: Tue, 26 Mar 2024 15:15:14 -0700 Subject: [PATCH 07/17] Added assert in some test cases --- chainladder/methods/tests/test_predict.py | 84 ++++++++++++++--------- 1 file changed, 52 insertions(+), 32 deletions(-) diff --git a/chainladder/methods/tests/test_predict.py b/chainladder/methods/tests/test_predict.py index 482f8a15..9cc40217 100644 --- a/chainladder/methods/tests/test_predict.py +++ b/chainladder/methods/tests/test_predict.py @@ -9,27 +9,21 @@ def test_cc_predict(): cc = cl.CapeCod().fit(raa_1989, sample_weight=apriori_1989) - cc.predict(raa, sample_weight=apriori) + assert cc.predict(raa, sample_weight=apriori) def test_bf_predict(): - cc = cl.BornhuetterFerguson().fit(raa_1989, sample_weight=apriori_1989) - cc.predict(raa, sample_weight=apriori) + bf = cl.BornhuetterFerguson().fit(raa_1989, sample_weight=apriori_1989) + assert bf.predict(raa, sample_weight=apriori) def test_mack_predict(): mack = cl.MackChainladder().fit(raa_1989) - mack.predict(raa_1989) - # mack.predict(raa) + assert mack.predict(raa_1989) def test_bs_random_state_predict(clrd): - tri = ( - clrd - .groupby("LOB") - .sum() - .loc["wkcomp", ["CumPaidLoss", "EarnedPremNet"]] - ) + tri = clrd.groupby("LOB").sum().loc["wkcomp", ["CumPaidLoss", "EarnedPremNet"]] X = cl.BootstrapODPSample(random_state=100).fit_transform(tri["CumPaidLoss"]) bf = cl.BornhuetterFerguson(apriori=0.6, apriori_sigma=0.1, random_state=42).fit( X, sample_weight=tri["EarnedPremNet"].latest_diagonal @@ -56,20 +50,27 @@ def test_basic_transform(raa): cl.BootstrapODPSample().fit_transform(raa) cl.IncrementalAdditive().fit_transform(raa, sample_weight=raa.latest_diagonal) + def test_misaligned_index(prism): - prism = prism['Paid'] - model = cl.Chainladder().fit(cl.Development(groupby=['Line', 'Type']).fit_transform(prism)) + prism = prism["Paid"] + model = cl.Chainladder().fit( + cl.Development(groupby=["Line", "Type"]).fit_transform(prism) + ) a = model.ultimate_.loc[prism.index.iloc[:10]].sum().sum() b = model.predict(prism.iloc[:10]).ultimate_.sum().sum() assert abs(a - b) < 1e-5 def test_misaligned_index2(clrd): - clrd = clrd['CumPaidLoss'] - w = cl.load_sample('clrd')['EarnedPremDIR'].latest_diagonal - bcl = cl.Chainladder().fit(cl.Development(groupby=['LOB']).fit_transform(clrd)) - bbk = cl.Benktander().fit(cl.Development(groupby=['LOB']).fit_transform(clrd), sample_weight=w) - bcc = cl.CapeCod().fit(cl.Development(groupby=['LOB']).fit_transform(clrd), sample_weight=w) + clrd = clrd["CumPaidLoss"] + w = cl.load_sample("clrd")["EarnedPremDIR"].latest_diagonal + bcl = cl.Chainladder().fit(cl.Development(groupby=["LOB"]).fit_transform(clrd)) + bbk = cl.Benktander().fit( + cl.Development(groupby=["LOB"]).fit_transform(clrd), sample_weight=w + ) + bcc = cl.CapeCod().fit( + cl.Development(groupby=["LOB"]).fit_transform(clrd), sample_weight=w + ) a = bcl.ultimate_.iloc[:10].sum().sum() b = bcl.predict(clrd.iloc[:10]).ultimate_.sum().sum() @@ -85,20 +86,36 @@ def test_misaligned_index2(clrd): b = bcl.predict(clrd.iloc[150:153]).ultimate_.sum().sum() assert abs(a - b) < 1e-5 a = bbk.ultimate_.iloc[150:153].sum().sum() - b = bbk.predict(clrd.iloc[150:153], sample_weight=w.iloc[150:153]).ultimate_.sum().sum() + b = ( + bbk.predict(clrd.iloc[150:153], sample_weight=w.iloc[150:153]) + .ultimate_.sum() + .sum() + ) assert abs(a - b) < 1e-5 a = bcc.ultimate_.iloc[150:153].sum().sum() - b = bcc.predict(clrd.iloc[150:153], sample_weight=w.iloc[150:153]).ultimate_.sum().sum() + b = ( + bcc.predict(clrd.iloc[150:153], sample_weight=w.iloc[150:153]) + .ultimate_.sum() + .sum() + ) assert abs(a - b) < 1e-5 a = bcl.ultimate_.iloc[150:152].sum().sum() b = bcl.predict(clrd.iloc[150:152]).ultimate_.sum().sum() assert abs(a - b) < 1e-5 a = bbk.ultimate_.iloc[150:152].sum().sum() - b = bbk.predict(clrd.iloc[150:152], sample_weight=w.iloc[150:152]).ultimate_.sum().sum() + b = ( + bbk.predict(clrd.iloc[150:152], sample_weight=w.iloc[150:152]) + .ultimate_.sum() + .sum() + ) assert abs(a - b) < 1e-5 a = bcc.ultimate_.iloc[150:152].sum().sum() - b = bcc.predict(clrd.iloc[150:152], sample_weight=w.iloc[150:152]).ultimate_.sum().sum() + b = ( + bcc.predict(clrd.iloc[150:152], sample_weight=w.iloc[150:152]) + .ultimate_.sum() + .sum() + ) assert abs(a - b) < 1e-5 a = bcl.ultimate_.iloc[150].sum().sum() @@ -111,22 +128,25 @@ def test_misaligned_index2(clrd): b = bcc.predict(clrd.iloc[150], sample_weight=w.iloc[150]).ultimate_.sum().sum() assert abs(a - b) < 1e-5 + def test_align_cdfs(): - ld = cl.load_sample('raa').latest_diagonal*0+40000 - model = cl.BornhuetterFerguson().fit(cl.load_sample('raa'), sample_weight=ld) - a = model.ultimate_.iloc[..., :4, :] + ld = cl.load_sample("raa").latest_diagonal * 0 + 40000 + model = cl.BornhuetterFerguson().fit(cl.load_sample("raa"), sample_weight=ld) + a = model.ultimate_.iloc[..., :4, :] b = model.predict( - cl.load_sample('raa').dev_to_val().iloc[..., :4, -1].val_to_dev(), - sample_weight=ld.iloc[..., :4, :]).ultimate_ + cl.load_sample("raa").dev_to_val().iloc[..., :4, -1].val_to_dev(), + sample_weight=ld.iloc[..., :4, :], + ).ultimate_ assert a == b - model = cl.Chainladder().fit(cl.load_sample('raa'), sample_weight=ld) - a = model.ultimate_.iloc[..., :4, :] + model = cl.Chainladder().fit(cl.load_sample("raa"), sample_weight=ld) + a = model.ultimate_.iloc[..., :4, :] b = model.predict( - cl.load_sample('raa').dev_to_val().iloc[..., :4, -1].val_to_dev(), - sample_weight=ld.iloc[..., :4, :]).ultimate_ + cl.load_sample("raa").dev_to_val().iloc[..., :4, -1].val_to_dev(), + sample_weight=ld.iloc[..., :4, :], + ).ultimate_ assert a == b def test_check_val_tri_cl(raa): model = cl.Chainladder().fit(raa.dev_to_val()) - assert model.predict(raa.latest_diagonal).ultimate_ == model.ultimate_ \ No newline at end of file + assert model.predict(raa.latest_diagonal).ultimate_ == model.ultimate_ From fddc0a62e8a07b2120195254ab5c1297abc8fc1e Mon Sep 17 00:00:00 2001 From: Kenneth Hsu Date: Tue, 26 Mar 2024 15:45:09 -0700 Subject: [PATCH 08/17] Able to print an empty triangle --- chainladder/core/display.py | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/chainladder/core/display.py b/chainladder/core/display.py index e7fe4e87..bed3abff 100644 --- a/chainladder/core/display.py +++ b/chainladder/core/display.py @@ -13,11 +13,17 @@ class TriangleDisplay: def __repr__(self): - if (self.values.shape[0], self.values.shape[1]) == (1, 1): - data = self._repr_format() - return data.to_string() - else: - return self._summary_frame().__repr__() + try: + self.values + + if (self.values.shape[0], self.values.shape[1]) == (1, 1): + data = self._repr_format() + return data.to_string() + else: + return self._summary_frame().__repr__() + + except: + print("Triangle is empty") def _summary_frame(self): return pd.Series( @@ -33,7 +39,7 @@ def _summary_frame(self): ).to_frame() def _repr_html_(self): - """ Jupyter/Ipython HTML representation """ + """Jupyter/Ipython HTML representation""" if (self.values.shape[0], self.values.shape[1]) == (1, 1): data = self._repr_format() fmt_str = self._get_format_str(data) @@ -66,7 +72,7 @@ def _get_format_str(self, data): def _repr_format(self, origin_as_datetime=False): out = self.compute().set_backend("numpy").values[0, 0] if origin_as_datetime and not self.is_pattern: - origin = self.origin.to_timestamp(how='s') + origin = self.origin.to_timestamp(how="s") else: origin = self.origin.copy() origin.name = None @@ -85,7 +91,7 @@ def _repr_format(self, origin_as_datetime=False): return pd.DataFrame(out, index=origin, columns=development) def heatmap(self, cmap="coolwarm", low=0, high=0, axis=0, subset=None): - """ Color the background in a gradient according to the data in each + """Color the background in a gradient according to the data in each column (optionally row). Requires matplotlib Parameters @@ -134,7 +140,12 @@ def heatmap(self, cmap="coolwarm", low=0, high=0, axis=0, subset=None): else: default_output = ( data.style.format(fmt_str) - .background_gradient(cmap=cmap, low=low, high=high, axis=axis,) + .background_gradient( + cmap=cmap, + low=low, + high=high, + axis=axis, + ) .render() ) output_xnan = re.sub("", "", default_output) From d06ac9bbe01a94be8a73b6cfa0f2987a64b9bdc1 Mon Sep 17 00:00:00 2001 From: Kenneth Hsu Date: Tue, 26 Mar 2024 15:47:09 -0700 Subject: [PATCH 09/17] Added test cases --- chainladder/core/tests/test_display.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/chainladder/core/tests/test_display.py b/chainladder/core/tests/test_display.py index 0de9ab8d..981e566c 100644 --- a/chainladder/core/tests/test_display.py +++ b/chainladder/core/tests/test_display.py @@ -3,18 +3,18 @@ def test_heatmap_render(raa): - """ The heatmap method should render correctly given the sample.""" - return raa.heatmap() + """The heatmap method should render correctly given the sample.""" + assert raa.heatmap() -def test_to_frame(raa): - try: - cl.Chainladder().fit(raa).cdf_.to_frame() - cl.Chainladder().fit(raa).cdf_.to_frame(origin_as_datetime=False) - cl.Chainladder().fit(raa).cdf_.to_frame(origin_as_datetime=True) - cl.Chainladder().fit(raa).ultimate_.to_frame() - cl.Chainladder().fit(raa).ultimate_.to_frame(origin_as_datetime=False) - cl.Chainladder().fit(raa).ultimate_.to_frame(origin_as_datetime=True) +def test_empty_triangle(): + assert cl.Triangle() + - except: - assert False +def test_to_frame(raa): + assert cl.Chainladder().fit(raa).cdf_.to_frame() + assert cl.Chainladder().fit(raa).cdf_.to_frame(origin_as_datetime=False) + assert cl.Chainladder().fit(raa).cdf_.to_frame(origin_as_datetime=True) + assert cl.Chainladder().fit(raa).ultimate_.to_frame() + assert cl.Chainladder().fit(raa).ultimate_.to_frame(origin_as_datetime=False) + assert cl.Chainladder().fit(raa).ultimate_.to_frame(origin_as_datetime=True) From 578b2376f947e1a23bd35a59ca9a47c06556a464 Mon Sep 17 00:00:00 2001 From: Kenneth Hsu Date: Tue, 26 Mar 2024 17:07:10 -0700 Subject: [PATCH 10/17] Fixed display issues --- chainladder/core/display.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/chainladder/core/display.py b/chainladder/core/display.py index bed3abff..1cf34cd6 100644 --- a/chainladder/core/display.py +++ b/chainladder/core/display.py @@ -15,15 +15,15 @@ class TriangleDisplay: def __repr__(self): try: self.values - - if (self.values.shape[0], self.values.shape[1]) == (1, 1): - data = self._repr_format() - return data.to_string() - else: - return self._summary_frame().__repr__() - except: print("Triangle is empty") + return + + if (self.values.shape[0], self.values.shape[1]) == (1, 1): + data = self._repr_format() + return data.to_string() + else: + return self._summary_frame().__repr__() def _summary_frame(self): return pd.Series( From 1758dfd983c1d7f5729bcdd15db9d031616903cf Mon Sep 17 00:00:00 2001 From: John S Bogaardt Date: Wed, 27 Mar 2024 14:42:02 -0600 Subject: [PATCH 11/17] Update base.py --- chainladder/core/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chainladder/core/base.py b/chainladder/core/base.py index 9d8c3109..51648611 100644 --- a/chainladder/core/base.py +++ b/chainladder/core/base.py @@ -258,7 +258,7 @@ def _to_datetime(data, fields, period_end=False, format=None): def _development_lag(origin, valuation): """For tabular format, this will convert the origin/valuation difference to a development lag""" - return ((valuation - origin) / (365.25/12)).round('1d').dt.days + return ((valuation - origin) / (365.25/12)).dt.round('1d').dt.days @staticmethod From 13a1f22beeadedb75a0e7047bddd2736a36be788 Mon Sep 17 00:00:00 2001 From: Kenneth Hsu Date: Sun, 31 Mar 2024 14:36:27 -0700 Subject: [PATCH 12/17] Addressed a calculation bug --- chainladder/utils/tests/test_utilities.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/chainladder/utils/tests/test_utilities.py b/chainladder/utils/tests/test_utilities.py index 201fa46e..e0b23ddc 100644 --- a/chainladder/utils/tests/test_utilities.py +++ b/chainladder/utils/tests/test_utilities.py @@ -6,14 +6,24 @@ def test_non_vertical_line(): - true_olf = (1 - 0.5 * ((31 + 31 + 30 + 31 + 30 + 31) / 365.25) ** 2) * 0.2 - olf_low = ( - cl.parallelogram_olf([0.20], ["7/1/2017"], grain="Y").loc["2017"].iloc[0] - 1 + true_olf = ( + 1.20 + / ( + (1 - 0.5 * ((31 + 31 + 30 + 31 + 30 + 31) / 365) ** 2) * 1.0 + + (0.5 * ((31 + 31 + 30 + 31 + 30 + 31) / 365) ** 2) * 1.2 + ) + - 1 ) - olf_high = ( - cl.parallelogram_olf([0.20], ["7/2/2017"], grain="Y").loc["2017"].iloc[0] - 1 + + result = ( + cl.parallelogram_olf([0.20], ["7/1/2017"], approximation_grain="D") + .loc["2017"] + .iloc[0] + - 1 ) - assert olf_low < true_olf < olf_high + + assert true_olf == result + # Monthly approximation rate_history = pd.DataFrame( { From 148dec1010627e997d989be1040d8c8087855b93 Mon Sep 17 00:00:00 2001 From: Kenneth Hsu Date: Sun, 31 Mar 2024 14:36:28 -0700 Subject: [PATCH 13/17] Added debugger --- chainladder/utils/utility_functions.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/chainladder/utils/utility_functions.py b/chainladder/utils/utility_functions.py index b7596197..bb9031c2 100644 --- a/chainladder/utils/utility_functions.py +++ b/chainladder/utils/utility_functions.py @@ -185,10 +185,14 @@ def parallelogram_olf( ) rate_changes = pd.Series(np.array(values), np.array(date)) + print("rate_changes:\n", rate_changes) rate_changes = rate_changes.reindex(date_idx, fill_value=0) + print("rate_changes:\n", rate_changes) cum_rate_changes = np.cumprod(1 + rate_changes.values) cum_rate_changes = pd.Series(cum_rate_changes, rate_changes.index) + # print("cum_rate_changes:\n", cum_rate_changes) crl = cum_rate_changes[-1] + # print("crl:", crl) if not vertical_line: rolling_num = { @@ -209,6 +213,8 @@ def parallelogram_olf( cum_avg_rate_leaps = ( cum_avg_rate_leaps + cum_avg_rate_leaps.shift(1).values ) / 2 + # print("cum_avg_rate_non_leaps\n", cum_avg_rate_non_leaps) + # print("cum_avg_rate_leaps\n", cum_avg_rate_leaps) dropdates_num = { "M": 12, From 92c917462eae175b05242133bd939129ef14cd61 Mon Sep 17 00:00:00 2001 From: Kenneth Hsu Date: Sun, 31 Mar 2024 14:48:15 -0700 Subject: [PATCH 14/17] Modified test --- chainladder/utils/tests/test_utilities.py | 7 +++++-- chainladder/utils/utility_functions.py | 3 +++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/chainladder/utils/tests/test_utilities.py b/chainladder/utils/tests/test_utilities.py index e0b23ddc..0fbd020a 100644 --- a/chainladder/utils/tests/test_utilities.py +++ b/chainladder/utils/tests/test_utilities.py @@ -102,8 +102,11 @@ def test_non_vertical_line(): def test_vertical_line(): - olf = cl.parallelogram_olf([0.20], ["7/1/2017"], grain="Y", vertical_line=True) - assert abs(olf.loc["2017"].iloc[0] - ((1 - 184 / 365) * 0.2 + 1)) < 0.00001 + olf = cl.parallelogram_olf( + [0.20], ["7/1/2017"], approximation_grain="D", vertical_line=True + ) + true_olf = 1.2 / ((1 - 184 / 365) * 1.0 + (184 / 365) * 1.2) + assert abs(olf.loc["2017"].iloc[0] - true_olf) < 0.00001 def test_triangle_json_io(clrd): diff --git a/chainladder/utils/utility_functions.py b/chainladder/utils/utility_functions.py index bb9031c2..2352fe0d 100644 --- a/chainladder/utils/utility_functions.py +++ b/chainladder/utils/utility_functions.py @@ -194,6 +194,9 @@ def parallelogram_olf( crl = cum_rate_changes[-1] # print("crl:", crl) + cum_avg_rate_non_leaps = cum_rate_changes + cum_avg_rate_leaps = cum_rate_changes + if not vertical_line: rolling_num = { "M": 12, From f1c70c28e8536ecbd83b2980db9e4141172474be Mon Sep 17 00:00:00 2001 From: Kenneth Hsu Date: Sun, 31 Mar 2024 14:48:51 -0700 Subject: [PATCH 15/17] Removed debugger --- chainladder/utils/utility_functions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/chainladder/utils/utility_functions.py b/chainladder/utils/utility_functions.py index 2352fe0d..9a06df01 100644 --- a/chainladder/utils/utility_functions.py +++ b/chainladder/utils/utility_functions.py @@ -185,9 +185,9 @@ def parallelogram_olf( ) rate_changes = pd.Series(np.array(values), np.array(date)) - print("rate_changes:\n", rate_changes) + # print("rate_changes:\n", rate_changes) rate_changes = rate_changes.reindex(date_idx, fill_value=0) - print("rate_changes:\n", rate_changes) + # print("rate_changes:\n", rate_changes) cum_rate_changes = np.cumprod(1 + rate_changes.values) cum_rate_changes = pd.Series(cum_rate_changes, rate_changes.index) # print("cum_rate_changes:\n", cum_rate_changes) From 4777e8a7ba48501206f16fcb6aa8090a10bb1a08 Mon Sep 17 00:00:00 2001 From: Kenneth Hsu Date: Wed, 3 Apr 2024 16:11:32 -0700 Subject: [PATCH 16/17] Remove attemps to address 491 --- chainladder/core/display.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/chainladder/core/display.py b/chainladder/core/display.py index 1cf34cd6..e0a135ee 100644 --- a/chainladder/core/display.py +++ b/chainladder/core/display.py @@ -13,11 +13,11 @@ class TriangleDisplay: def __repr__(self): - try: - self.values - except: - print("Triangle is empty") - return + # try: + # self.values + # except: + # print("Triangle is empty") + # return if (self.values.shape[0], self.values.shape[1]) == (1, 1): data = self._repr_format() From 9a0ae1fa2034c4d2b5c7ddb38c14a724941b6998 Mon Sep 17 00:00:00 2001 From: Kenneth Hsu Date: Wed, 3 Apr 2024 16:50:44 -0700 Subject: [PATCH 17/17] Better tests --- chainladder/core/tests/test_display.py | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/chainladder/core/tests/test_display.py b/chainladder/core/tests/test_display.py index 981e566c..0b11b1fb 100644 --- a/chainladder/core/tests/test_display.py +++ b/chainladder/core/tests/test_display.py @@ -4,17 +4,25 @@ def test_heatmap_render(raa): """The heatmap method should render correctly given the sample.""" - assert raa.heatmap() + try: + raa.heatmap() + except: + assert False -def test_empty_triangle(): - assert cl.Triangle() + +# def test_empty_triangle(): +# assert cl.Triangle() def test_to_frame(raa): - assert cl.Chainladder().fit(raa).cdf_.to_frame() - assert cl.Chainladder().fit(raa).cdf_.to_frame(origin_as_datetime=False) - assert cl.Chainladder().fit(raa).cdf_.to_frame(origin_as_datetime=True) - assert cl.Chainladder().fit(raa).ultimate_.to_frame() - assert cl.Chainladder().fit(raa).ultimate_.to_frame(origin_as_datetime=False) - assert cl.Chainladder().fit(raa).ultimate_.to_frame(origin_as_datetime=True) + try: + cl.Chainladder().fit(raa).cdf_.to_frame() + cl.Chainladder().fit(raa).cdf_.to_frame(origin_as_datetime=False) + cl.Chainladder().fit(raa).cdf_.to_frame(origin_as_datetime=True) + cl.Chainladder().fit(raa).ultimate_.to_frame() + cl.Chainladder().fit(raa).ultimate_.to_frame(origin_as_datetime=False) + cl.Chainladder().fit(raa).ultimate_.to_frame(origin_as_datetime=True) + + except: + assert False