diff --git a/convoys/__init__.py b/convoys/__init__.py index 1e66876..737d746 100644 --- a/convoys/__init__.py +++ b/convoys/__init__.py @@ -173,66 +173,3 @@ def plot_cohorts(data, t_max=None, title=None, group_min_size=0, max_groups=100, pyplot.legend() pyplot.gca().grid(True) return result - - -def plot_timeseries(data, window, model='kaplan-meier', group_min_size=0, max_groups=100, window_min_size=1, stride=None, title=None, time=False): - if stride is None: - stride = window - - # Find limits - t_lo = min(created_at for _, created_at, _, _ in data) - t_hi = min(now for _, _, _, now in data) - t_unit, t_converter = get_timescale(t_hi - t_lo) - - # Split data by group - groups, js = split_by_group(data, group_min_size, max_groups) - - # PLOT - colors = seaborn.color_palette('hls', len(groups)) - y_max = 0 - for group, color in zip(sorted(groups), colors): - t1 = t_lo - ts, ys, y_los, y_his = [], [], [], [] - js[group].sort(key=lambda x: x[0]) - created_ats = [created_at for created_at, _, _ in js[group]] - while True: - t2 = t1 + window - i1 = bisect.bisect_left(created_ats, t1) - i2 = bisect.bisect_left(created_ats, t2) - if i2 >= len(js[group]): - break - data = js[group][i1:i2] - t1 += stride - - X, B, T = get_arrays(data, t_converter) - if sum(B) < window_min_size: - continue - - p = _models[model]() - p.fit(X, B, T) - - if time: - y, y_lo, y_hi = p.predict_time([1], ci=0.95) - else: - y, y_lo, y_hi = p.predict_final([1], ci=0.95) - print('%30s %40s %.4f %.4f %.4f' % (group, t1, y, y_lo, y_hi)) - ts.append(t2) - ys.append(y) - y_los.append(y_lo) - y_his.append(y_hi) - - if not time: - ys, y_los, y_his = (100.*numpy.array(x) for x in (ys, y_los, y_his)) - pyplot.plot(ts, ys, color=color, label='%s (%d)' % (group, len(js[group]))) - pyplot.fill_between(ts, y_los, y_his, color=color, alpha=0.2) - y_max = max(y_max, 1.1 * max(ys)) - - if title: - pyplot.title(title) - if time: - pyplot.ylabel('Average time to conversion (%s)' % t_unit) - else: - pyplot.ylabel('Conversion rate %') - pyplot.ylim([0, y_max]) - pyplot.legend() - pyplot.gca().grid(True)