Skip to content

Commit

Permalink
Bugfix in bootstrap and simplify example
Browse files Browse the repository at this point in the history
  • Loading branch information
MaryanMorel committed Feb 7, 2018
1 parent 8497968 commit 594b316
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 55 deletions.
81 changes: 35 additions & 46 deletions examples/plot_conv_sccs_cv_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,38 +37,13 @@

coeffs = np.log(np.hstack(sim_effects))

# Plot "true" relative incidences functions
n_plots = np.sum([1 for l in n_lags if l > 0])
n_rows = int(np.ceil(n_plots / 2))
fig, axarr = plt.subplots(n_rows, 2, sharex=True, sharey=True, figsize=(10, 6))
offset = 0
c = 0
for l in n_lags:
if l == 0:
offset += 1
continue
end = int(offset + l + 1)
ax = axarr[c // 2][c % 2]
ax.plot(np.exp(coeffs[offset:end]))
offset = end
c += 1
plt.suptitle('Relative risks used for simulations')
axarr[-1][0].set_ylabel('Relative incidence')
axarr[-1][1].set_xlabel('Time after exposure start')
plt.show()

# Time drift (age effect) used for the simulations.
time_drift = lambda t: np.log(8 * np.sin(.01 * t) + 9)
plt.plot(np.exp(time_drift(np.arange(750))))
plt.title('Age effect used for simulations')
plt.xlabel('Age')
plt.ylabel('Age Relative Incidence')
plt.show()

# Simaltion of the features.
sim = SimuSCCS(n_samples, n_intervals, n_features, n_lags,
time_drift=time_drift, coeffs=coeffs, seed=seed,
n_correlations=n_corr)
n_correlations=n_corr, verbose=False)
features, censored_features, labels, censoring, coeffs = sim.simulate()

# Plot the Hawkes kernel matrix used to generate the features.
Expand All @@ -77,7 +52,7 @@
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="5%", pad=0.5)
fig.colorbar(heatmap, cax=cax)
ax.set_title('Hawkes adjacency matrix');
ax.set_title('Hawkes adjacency matrix used for the simulation');
plt.show()

## Add age_groups features to feature matrices.
Expand All @@ -95,24 +70,35 @@
n_lags = np.hstack([n_lags, np.zeros(n_agegrps)])

# Learning
start = time()
# Example code for cross validation
# start = time()
# learner = ConvSCCS(n_lags=n_lags.astype('uint64'),
# penalized_features=np.arange(n_features),
# random_state=42)
# strength_TV_range = (-4, -1)
# strength_L1_range = (-5, -2)
# _, cv_track = learner.fit_kfold_cv(features, labels, censoring,
# strength_TV_range, strength_L1_range,
# bootstrap=True, bootstrap_rep=50, n_cv_iter=50)
# elapsed_time = time() - start
# print("Elapsed time (model training): %.2f seconds \n" % elapsed_time)
# print("Best model hyper parameters: \n", cv_track.best_model['strength'])
# cv_track.plot_cv_report(35, 45)
# plt.show()

# using the parameters resulting from cross-validation
learner = ConvSCCS(n_lags=n_lags.astype('uint64'),
penalized_features=np.arange(n_features),
random_state=42)
strength_TV_range = (-4, -1)
strength_L1_range = (-5, -2)
res = learner.fit_kfold_cv(features, labels, censoring, strength_TV_range,
strength_L1_range, bootstrap=True, bootstrap_rep=50,
n_cv_iter=50)
elapsed_time = time() - start
print("Elapsed time (model training): %.2f seconds \n" % elapsed_time)
cv_track = res[1]
print("Best model hyper parameters: \n", cv_track.best_model['strength'])
cv_track.plot_cv_report(35, 45)
plt.show()
random_state=42, strength_tv=0.0036999724314638062,
strength_group_l1=0.0001917004158917066)

_, bootstrap_ci = learner.fit(features, labels, censoring,
bootstrap=True, bootstrap_rep=20)

# Plot estimated parameters
bootstrap_ci = cv_track.best_model['bootstrap_ci']
# when using cross validation output:
# bootstrap_ci = cv_track.best_model['bootstrap_ci']
# get bootstrap confidence intervals
refitted_coeffs = bootstrap_ci['refit_coeffs']
lower_bound = bootstrap_ci['lower_bound']
upper_bound = bootstrap_ci['upper_bound']
Expand All @@ -131,14 +117,15 @@
lb = lower_bound[offset:end]
ub = upper_bound[offset:end]
ax = axarr[c // 2][c % 2]
ax.plot(np.exp(coeffs[offset:end]))
ax.plot(np.exp(m))
ax.plot(np.exp(coeffs[offset:end]), label="True RI")
ax.step(np.arange(l+1), np.exp(m), label="Estimated RI")
ax.fill_between(np.arange(l + 1), np.exp(lb), np.exp(ub), alpha=.5,
color='orange')
color='orange', step='pre', label="95% boostrap CI")
offset = end
c += 1

plt.suptitle('Estimated relative risks with 95% confidence bands')
axarr[0][1].legend(loc='upper right')
axarr[0][0].set_ylabel('Relative incidence')
axarr[1][0].set_ylabel('Relative incidence')
axarr[-1][0].set_xlabel('Time after exposure start')
Expand All @@ -150,11 +137,13 @@
m = np.repeat(refitted_coeffs[offset:], 125)
lb = np.repeat(lower_bound[offset:], 125)
ub = np.repeat(upper_bound[offset:], 125)
plt.figure()
plt.plot(np.arange(n_intervals),
normalize(np.exp(time_drift(np.arange(n_intervals)))))
plt.plot(np.arange(n_intervals), normalize(np.exp(m)))
plt.step(np.arange(n_intervals), normalize(np.exp(m)))
plt.fill_between(np.arange(n_intervals), np.exp(lb) / np.exp(m).sum(),
np.exp(ub) / np.exp(m).sum(), alpha=.5, color='orange')
np.exp(ub) / np.exp(m).sum(), alpha=.5, color='orange',
step='pre')
plt.xlabel('Age')
plt.ylabel('Normalized Age Relative Incidence')
plt.title("Normalized age effect with 95% confidence bands");
Expand Down
2 changes: 1 addition & 1 deletion lib/cpp/preprocessing/longitudinal_features_lagger.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ LongitudinalFeaturesLagger::LongitudinalFeaturesLagger(
if ((*n_lags)[0] >= n_intervals) {
TICK_ERROR("n_lags elements must be between 0 and (n_intervals - 1).");
}
for(ulong i(1); i < n_lags->size(); i++){
for (ulong i(1); i < n_lags->size(); i++) {
if ((*n_lags)[i] >= n_intervals) {
TICK_ERROR("n_lags elements must be between 0 and (n_intervals - 1).");
}
Expand Down
2 changes: 1 addition & 1 deletion lib/cpp/survival/model_sccs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ ModelSCCS::ModelSCCS(const SBaseArrayDouble2dPtrList1D &features,
}
col_offset = ArrayULong(n_lags->size());
col_offset.init_to_zero();
for(ulong i(1); i < n_lags->size(); i++){
for (ulong i(1); i < n_lags->size(); i++) {
if ((*n_lags)[i] >= n_intervals) {
TICK_ERROR("n_lags elements must be between 0 and (n_intervals - 1).");
}
Expand Down
15 changes: 8 additions & 7 deletions tick/survival/convolutional_sccs.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ def __init__(self, n_lags: np.array,
verbose, random_state)

# Interface
def fit(self, features: np.ndarray, labels: np.array,
def fit(self, features: list, labels: list,
censoring: np.array, bootstrap: bool = False,
bootstrap_rep: int = 200,
bootstrap_confidence: float = .95):
Expand Down Expand Up @@ -378,7 +378,7 @@ def fit_kfold_cv(self, features, labels, censoring,

cv_tracker.log_best_model(self._strengths, self.coeffs.tolist(),
self.score(),
self.bootstrap_coeffs._asdict())
self.bootstrap_coeffs)

return coeffs, cv_tracker

Expand Down Expand Up @@ -450,7 +450,7 @@ def _postfit(self, p_features, p_labels, p_censoring,
bootstrap_ci = self._bootstrap(p_features, p_labels, p_censoring,
refit_coeffs, bootstrap_rep,
bootstrap_confidence)
self._set('bootstrap_coeffs', bootstrap_ci)
self._set('bootstrap_coeffs', bootstrap_ci._asdict())

return self.coeffs, self.bootstrap_coeffs

Expand All @@ -475,12 +475,12 @@ def _bootstrap(self, p_features, p_labels, p_censoring, coeffs,

bootstrap_coeffs = []
sim = SimuSCCS(self.n_cases, self.n_intervals, self.n_features,
self.n_lags)
self.n_lags, coeffs=coeffs)
# TODO later: parallelize bootstrap (everything should be pickable...)
for k in range(rep):
y = sim._simulate_multinomial_outcomes(p_features, coeffs)
self._model_obj.fit(p_features, y, p_censoring)
bootstrap_coeffs.append(self._fit(False))
bootstrap_coeffs.append(self._fit(True))

bootstrap_coeffs = np.exp(np.array(bootstrap_coeffs))
bootstrap_coeffs.sort(axis=0)
Expand Down Expand Up @@ -534,13 +534,14 @@ def _construct_model_obj(self):
return ModelSCCS(self.n_intervals, self.n_lags)

def _construct_prox_obj(self, coeffs=None, project=False):
# TODO: test this !!!
n_penalized_features = len(self.penalized_features) \
if self.penalized_features is not None else 0

if project:
# project future coeffs on the support of given coeffs
if coeffs is not None and any(self.n_lags) > 0:
if all(self.n_lags) == 0:
proxs = [ProxZero()]
elif coeffs is not None and any(self.n_lags) > 0:
prox_ranges = self._detect_support(coeffs)
proxs = [ProxEquality(0, range=r) for r in prox_ranges]
else:
Expand Down

0 comments on commit 594b316

Please sign in to comment.