# Second Stage

In [None]:
from get_data import *
from simulation import *
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator
import tqdm

In [None]:
target_code = '0050'
all_df = get_one_file(stock_code=target_code, year_month='202201')
df_dict = {k: v.reset_index(drop=True) for k, v in all_df.groupby('date')}

---

### 1. Count cases of spread in specific (2) ticks

In [None]:
t2_all_df = all_df.loc[all_df['spread_in_tick'] == 2, :].reset_index(drop=True)
log_info(f'{len(t2_all_df)}/{len(all_df)} experiments to be considered')

Actual number of experiments may not be equal to this number since some of the quotes are consecutive (happens one after another). We only consider cases where tick changes to 2 and stays til either:

    - Order got filled at at b_1 before t_m
    - Replaced order at b_1 with b_star at t_m, and got filled at b_star at t_m < t < t_start
    - Replace order at b_1 with b_star at t_m, and waited til t_star, and then cross the spread, fill at a_1

---

### 2. Simulation

In [None]:
df = df_dict.get('2022-01-03').copy()
side = 'bid'

In [None]:
sim_df = sim_one_day_t2(df, target_code, side, 10, 5, verbose=True)

In [None]:
# Demonstrate one simulation

row = sim_df.loc[293]
res = plot_one_sim(day_df=df, ts=10, tm=5, side=side, row=row)
print(row.to_dict())

---

### 3. Simulation over all on 1 set of parameters and calculate score

In [None]:
target_code = '0050'
all_df = get_one_stock_data(stock_code=target_code)
df_dict = {k: v.reset_index(drop=True) for k, v in all_df.groupby('date')}

In [None]:
# 1 set of params
i_ts, i_tm = 5, 1
side = 'bid'

In [None]:
# Multiprocessing
params_ls = [[one_df, target_code, side, i_ts, i_tm, False] for d, one_df in df_dict.items()]
st = datetime.datetime.now()
log_info(f'Start simulation on {len(params_ls)} days')
sim_df = pd.concat(pool_run_func(sim_one_day_t2, params_ls)).reset_index(drop=True)
log_info(f'Done simulation - {(datetime.datetime.now() - st).total_seconds():.2f}s')

In [None]:
sim_df = sim_df.loc[sim_df['duration'] > 0, :].reset_index(drop=True)
sim_df.loc[:, 'duration'] /= 1000
sim_df.loc[:, 'score'] = sim_df[['pnl', 'duration']].apply(lambda x: obj(x[0], x[1]), axis=1)

In [None]:
filt_cond = sim_df['score'] < 50

In [None]:
fig, ax = plt.subplots(figsize=(16, 6))
ax.hist(sim_df.loc[filt_cond, 'score'], bins=100, label='score')
ax.grid(True)
ax.set_title('Score', fontsize=14)
fig.tight_layout()
plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(16, 6))
ax.scatter(sim_df.loc[filt_cond, 'duration'], sim_df.loc[filt_cond, 'pnl'], label='Result')
ax.grid(True)
ax.set_title('PnL - Duration Plot', fontsize=14)
fig.tight_layout()
plt.show()

---