/
identifiability_functions.R
154 lines (151 loc) · 6.53 KB
/
identifiability_functions.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
#' @title Assess the equifinality or time spans in the input radiocarbon
#' calibration curve
#'
#' @details
#' The input calibration data frame has three columns: year_BP, uncal_year_BP,
#' and uncal_year_BP_error. A time span is equifinal if, for each date in the
#' span, there is at least one other date in the radiocarbon calibration curve
#' with the same fraction modern value. Conversely, a time span is not equifinal
#' if this is not true (all dates in the time span correspond to a unique
#' fraction modern value). Thus, the calibration curve is divided into
#' alternating equifinal and non-equifinal spans. This function identifies these
#' regions for the input calibration data frame, calib_df.
#'
#' Although calib_df uses year BP, all calculations and returned data use AD.
#'
#' A list is returned with two named variables: can_invert and inv_span_list
#' (inv = invert, which is conceptually identical to non-equifinal).
#'
#' can_invert is a boolean that indicates whether each entry (row) in the
#' calibration data frame, calib_df, is inside or outside an invertible region.
#'
#' inv_span_list is a list summarizing information about all the invertible
#' (non-equifinal) time spans. It contains the following named variables:
#'
#' ind -- Indices (rows) of calib_df following within the invertible time
#' span
#' tau_left -- Calendar date (AD) of the left (earlier) boundary of the
#' invertible time span
#' phi_left -- Fraction modern value of the left (earlier) boundary of the
#' invertible time span
#' tau_right -- Calendar date (AD) of the right (later) boundary of the
#' invertible time span
#' phi_right -- Fraction modern value of the right (later) boundary of the
#' invertible time span
#' ii_prev -- The index (row) of the calib_df earlier than the invertible
#' region with closest fraction modern value
#' ii_next -- The index (row) of the calib_df later than the invertible
#' region with closest fraction modern value
#'
#' @param calib_df The calibration data frame, with columns year_BP,
#' uncal_year_BP, and uncal_year_BP_error
#' @param equi_list (Optional) The result of a call to
#' calc_calib_curve_equif_dates
#'
#' @return A list of with named variables can_invert and inv_span_list (see
#' details)
#'
#' @export
#'
assess_calib_curve_equif <- function(calib_df, equi_list = NA) {
if (all(is.na(equi_list))) {
equi_list <- calc_calib_curve_equif_dates(calib_df)
}
can_invert <- rep(T, length(calib_df$year_BP))
tau_curve <- 1950 - calib_df$year_BP
phi_curve <- calc_calib_curve_frac_modern(calib_df)
for (equi_entry in equi_list) {
can_invert[equi_entry$ind_base] <- F
}
clust_list <- evd::clusters(can_invert, .5)
inv_span_list <- list()
for (cc in 1:length(clust_list)) {
clust <- clust_list[[cc]]
lo <- as.numeric(names(clust)[1])
hi <- as.numeric(names(clust)[length(clust)])
# Find left boundary
if (lo == 1) {
tau_left <- tau_curve[1]
phi_left <- phi_curve[1]
ii_prev <- 1
} else {
ii_prev <- which.min(abs(phi_curve[1:(lo - 1)] - phi_curve[lo]))
tau_left <- phi2tau(tau_curve, phi_curve, phi_curve[ii_prev], lo - 1, lo)
phi_left <- phi_curve[ii_prev]
}
# Find right boundary
if (hi == length(tau_curve)) {
tau_right <- tau_curve[length(tau_curve)]
phi_right <- phi_curve[length(tau_curve)]
ii_next <- length(tau_curve)
} else {
ii_next <- hi + which.min(abs(phi_curve[(hi + 1):length(phi_curve)] - phi_curve[hi]))
tau_right <- phi2tau(tau_curve, phi_curve, phi_curve[ii_next], hi, hi + 1)
phi_right <- phi_curve[ii_next]
}
inv_span <- list(ind = lo:hi, tau_left = tau_left, phi_left = phi_left,
tau_right = tau_right, phi_right = phi_right,
ii_prev = ii_prev, ii_next = ii_next)
inv_span_list[[cc]] <- inv_span
}
return(list(inv_span_list = inv_span_list, can_invert = can_invert))
}
#' @title Calculate equifinal dates for each point in the calibration curve
#'
#' @details
#' The input calibration data frame has three columns: year_BP, uncal_year_BP,
#' and uncal_year_BP_error. For each date in the column year_BP, determine all
#' other dates with the same fraction modern. These dates are equifinal since,
#' in the absence of additional information, there is no way to determine which
#' year a sample came from. Typically the actual equifinal date lies between two
#' observations in year_BP, so linear interpolation is used to estimate the
#' decimal year. A list of length nrow(calib_df) is returned with, for each
#' point in the calibration curve, the following information:
#'
#' ind_base -- The row index in calib_df of the point
#' tau_base -- The calendar date (AD) of the point
#' ind_equi -- The row index/indices in calib-df of the equifinal points
#' tau_equi -- The calendar date(s) (AD) of the equifinal points
#'
#' @param calib_df The calibration data frame, with columns year_BP,
#' uncal_year_BP, and uncal_year_BP_error
#'
#' @return A list of equifinality information for each point in the calibration
#' curve (see details)
#'
#' @export
calc_calib_curve_equif_dates <- function(calib_df) {
# For all the calendar dates in the calibration dataframe, identify points
# (other calendar dates) with the same fraction modern.
tau_curve <- 1950 - calib_df$year_BP
phi_curve <- calc_calib_curve_frac_modern(calib_df)
output_list <- list()
for (ii in 1:(length(phi_curve) - 1)) {
tau <- tau_curve[ii]
phi <- phi_curve[ii]
# Look for adjacent points that bracket tau
ind1 <- which(phi_curve[1:(length(phi_curve) - 1)] >= phi_curve[ii] &
phi_curve[2:length(phi_curve)] <= phi_curve[ii])
ind2 <- which(phi_curve[1:(length(phi_curve) - 1)] <= phi_curve[ii] &
phi_curve[2:length(phi_curve)] >= phi_curve[ii])
ind <- setdiff(c(ind1, ind2), ii)
if (length(ind) > 1) {
ind_equi <- ind
tau_equi <- rep(NA, length(ind_equi))
for (jj in 1:length(ind_equi)) {
ii_lo <- ind_equi[jj]
ii_hi <- ind_equi[jj] + 1
tau_equi[jj] <- tau_curve[ii_lo] +
(tau_curve[ii_hi] - tau_curve[ii_lo]) *
(phi_curve[ii] - phi_curve[ii_lo]) /
(phi_curve[ii_hi] - phi_curve[ii_lo])
}
new_entry <- list(ind_base = ii,
tau_base = tau_curve[ii],
ind_equi = ind_equi,
tau_equi = unique(tau_equi))
output_list[[length(output_list) + 1]] <- new_entry
}
}
return(output_list)
}