In [None]:
def mpNumCoEvents(closeIdx, t1, molecule):
    """
    Compute number of concurrent events per bar.

    Parameters:
    - closeIdx: pd.Index of close prices (usually datetime index)
    - t1: pd.Series where index is event start time and value is event end time
    - molecule: list-like with timestamps (subset of t1.index) to compute on

    Returns:
    - count: pd.Series with index as closeIdx and values as number of overlapping events
    """

    # 1) Ensure all t1 values are filled (use last close if NaN)
    t1 = t1.fillna(closeIdx[-1])

    # 2) Keep only events that start after or at the first molecule time
    t1 = t1[t1 >= molecule[0]]

    # 3) And end before or at the last end time in this molecule
    t1 = t1.loc[:t1[molecule].max()]

    # 4) Find the time window we're working in
    iloc_start, iloc_end = closeIdx.searchsorted([t1.index[0], t1.max()])
    barIdx = closeIdx[iloc_start: iloc_end + 1]

    # 5) Initialize count Series
    count = pd.Series(0, index=barIdx)

    # 6) Count how many events overlap each time point
    for t_in, t_out in t1.items():
        count.loc[t_in:t_out] += 1

    return count.loc[molecule[0]: t1[molecule].max()]


In [50]:
import pandas as pd
import numpy as np
def getIndicatorMatrix(events, close):
    """
    Create an indicator matrix (bool DataFrame) where:
    - Rows: event start time (t0)
    - Columns: close index (timestamps)
    - True: if event covers that timestamp
    """
    indM = pd.DataFrame(0, index=events.index, columns=close.index)
    for t0, t1 in events['t1'].items():
        indM.loc[t0, t0:t1] = 1
    return indM

def getAvgUniqueness(indM):
    # Avarage uniqueness from indicator matrix
    c = indM.sum(ais = 1) # concurency
    u = indM.div(c, axis = 0) # uniquness
    avgU = u[u >0].mean() # avg uniqueness
    return avgU
def seqBootstrap(indM, sample_size = None):
    '''perform sequantioal bootstrap sampling based on indicator matrix

    Returns:
        - list of selected semple indices
    '''
    if sample_size is None:
        sample_size = indM.shape[0]

    phi = [] # selected samples
    while len(phi) < sample_size:
        avg_uniqueness = (1. / indM.loc[phi].sum()) if phi else pd.Series(1, index= indM.columns)
        score = (indM * avg_uniqueness).sum(axis = 1)
        prob = score / score.sum()
        candidate = np.random.choice(indM.index, prob)
        if candidate not in phi:
            phi.append(candidate)
    return phi