In [1]:
import numpy as np
import pandas as pd

In [2]:
from scipy.stats import norm

In [3]:
norm.cdf(3)

0.9986501019683699

In [4]:
from scipy.stats import norm


def discrete_signal(signal, step_size):
    # Discrete signal
    disc_sig = (signal / step_size).round() * step_size
    disc_sig[disc_sig > 1] = 1
    disc_sig[disc_sig < -1] = -1
    return disc_sig

def get_signal(events, step_size, prob, pred, num_classes, num_threads, **kwargs):
    # Get signals from predictions
    if prob.shape[0] == 0:
        return pd.Series()
    # Generate signals from multinomial
    signal0 = (prob - 1. / num_classes) / np.sqrt(prob * (1. - prob))
    signal0 = pred * (2 * norm.cdf(signal0) - 1)
    if 'side' in events:
        signal0 *= events.loc[signal0.index, 'side']
    # Averaging
    df0 = signal0.to_frame('signal').join(events[['t1']], how='left')
    df0 = avg_active_signals(df0, num_threads)
    signal1 = discrete_signal(signal0=df0, step_size=step_size)
    return signal1

def mp_avg_active_signals(signals, molecule):
    out = pd.Series()
    for loc in molecule:
        is_act = (signals.index.values <= loc) & ((loc < signal['t1']) | pd.isnull(signals['t1']))
        act = signals[is_act].index
        if len(act) > 0:
            out[loc] = signals.loc[act, 'signal'].mean()
        else:
            out[loc] = 0
    return out

def avg_active_signals(signals, num_threads):
    # Compute the average signal
    # 1) time points where singal changes
    t_pnts = set(signals['t1'].dropna().values)
    t_pnts = t_pnts.union(signals.index.values)
    t_pnts = list(t_pnts)
    t_pnts.sort();
    out = mp_pandas_obj(mp_avg_active_signals, ('molecule', t_pnts), num_threads, signals=signals)
    return out

# 10.2

In [20]:
samples = np.random.uniform(.5, 1., 10000)

In [25]:
zs = (samples - .5) / np.sqrt(samples * (1 - samples))
bet_size = 2 * norm.cdf(zs) - 1

In [26]:
bet_size

array([0.99884328, 0.51128425, 0.99999709, ..., 0.16633489, 0.46459308,
       0.02115305])