# `mlfinlab.bet_sizing` - Module Tutorial
The following is a tutorial in how to apply the functions in the `mlfinlab.bet_sizing` module. The exercises from Chapter 10 are used as examples.

In [1]:
# imports
import numpy as np
import pandas as pd

import datetime as dt

# mlfinlab imports
from mlfinlab.bet_sizing import bet_size_probability


----
#### EXERCISE 10.2
Draw 10,000 random numbers from a uniform distribution with bounds U[.5, 1.]. (Author's note: These exercises are intended to simulate dynamic bet sizing of a long-only strategy.)

__(a)__ Compute bet sizes _m_ for $||X||=2$.

__(b)__ Assign 10,000 consecutive calendar days to the bet sizes.

__(c)__ Draw 10,000 random numbers from a uniform distribution with bounds U[1, 25].

__(d)__ Form a `pandas.Series` indexed by the dates in 2.b, and with values equal to the index shifted forward the number of days in 2.c. This is a `t1` object similar to the ones we used in Chapter 3.

__(e)__ Compute the resulting average active bets, following Section 10.4.
 

In [2]:
# draw random numbers from a uniform distribution
np.random.seed(0)
sample_size = 1_000
P_t = np.random.uniform(.300001, 1., sample_size)  # array of random from uniform dist.
S_t = np.random.choice([-1, 0, 1], sample_size, True, [0.3, 0.2, 0.5])

# 10.2(b) assign 10,000 consecutive calendar days
start_date = dt.datetime(2000, 1, 1)  # starting at 01-JAN-2000
date_step = dt.timedelta(days=1)
dates = np.array([start_date + i*date_step for i in range(sample_size)])
prob = pd.Series(data=P_t, index=dates)
side = pd.Series(data=S_t, index=dates)

# 10.2(c) draw 10,000 random numbers from a uniform distribution
shift_list = np.random.uniform(1., 5., sample_size)
shift_dt = np.array([dt.timedelta(days=d) for d in shift_list])

# 10.2(d) create a pandas.Series object
dates_shifted = dates + shift_dt
t1 = pd.Series(data=dates_shifted, index=dates)

# Collect the series into a single DataFrame.
# Add a randomized 'side' indicator so we have both long and short bets.
df_events = pd.concat(objs=[t1, prob, side], axis=1)
df_events = df_events.rename(columns={0: 't1', 1: 'prob', 2: 'side'})
df_events = df_events[['t1', 'prob', 'side']]


print(df_events.head(10))

df_bets_1 = bet_size_probability(events=df_events, prob=df_events.prob, pred=df_events.side,
                                 num_classes=2, num_threads=6)
df_bets_2 = bet_size_probability(events=df_events, prob=df_events.prob,
                                 num_classes=2, num_threads=6)
df_bets_3 = bet_size_probability(events=df_events, prob=df_events.prob, pred=df_events.side,
                                 num_classes=2, average_active=True, num_threads=6)
df_bets_4 = bet_size_probability(events=df_events, prob=df_events.prob, pred=df_events.side,
                                 num_classes=2, step_size=0.05, average_active=True, num_threads=6)

print("\n\n   Printing outputs: \n")
print("df_bets_1\n", df_bets_1.head(10))
print("df_bets_2\n", df_bets_2.head(10))
print("df_bets_3\n", df_bets_3.head(10))
print("df_bets_4\n", df_bets_4.head(10))


                                   t1      prob  side
2000-01-01 2000-01-05 05:54:20.783447  0.684170     1
2000-01-02 2000-01-04 21:42:14.625551  0.800633    -1
2000-01-03 2000-01-06 02:13:22.710141  0.721935     0
2000-01-04 2000-01-06 00:02:59.914666  0.681419     1
2000-01-05 2000-01-08 10:05:02.866610  0.596559    -1
2000-01-06 2000-01-08 05:04:43.901876  0.752126     1
2000-01-07 2000-01-10 07:25:09.355426  0.606312     1
2000-01-08 2000-01-09 16:17:20.756742  0.924241    -1
2000-01-09 2000-01-10 15:18:32.518313  0.974564    -1
2000-01-10 2000-01-12 16:02:05.478556  0.568410     1


2019-07-04 20:43:23.677941 100.0% mp_avg_active_signals done after 0.07 minutes. Remaining 0.0 minutes.
2019-07-04 20:43:27.782647 100.0% mp_avg_active_signals done after 0.06 minutes. Remaining 0.0 minutes.




   Printing outputs: 

df_bets_1
 2000-01-01    0.308039
2000-01-02   -0.548235
2000-01-03    0.000000
2000-01-04    0.302999
2000-01-05   -0.156034
2000-01-06    0.440730
2000-01-07    0.172258
2000-01-08   -0.891123
2000-01-09   -0.997423
2000-01-10    0.109853
Name: signal, dtype: float64
df_bets_2
 2000-01-01    0.308039
2000-01-02    0.548235
2000-01-03    0.379640
2000-01-04    0.302999
2000-01-05    0.156034
2000-01-06    0.440730
2000-01-07    0.172258
2000-01-08    0.891123
2000-01-09    0.997423
2000-01-10    0.109853
Name: signal, dtype: float64
df_bets_3
 2000-01-01 00:00:00.000000    0.308039
2000-01-02 00:00:00.000000   -0.120098
2000-01-03 00:00:00.000000   -0.080065
2000-01-04 00:00:00.000000    0.015701
2000-01-04 21:42:14.625551    0.203680
2000-01-05 00:00:00.000000    0.113751
2000-01-05 05:54:20.783447    0.048988
2000-01-06 00:00:00.000000    0.146924
2000-01-06 00:02:59.914666    0.094899
2000-01-06 02:13:22.710141    0.142348
dtype: float64
df_bets_4
 2000-01-