### Ideia 
#### Buscar porcentagem de vezes em que sempre que uma ação desceu X% (X=5) no mesmo dia, a mesma ação subiu Y% (Y=5) no mesmo dia ou no dia seguinte. 
#### Maximizar esta função de utilidade de forma a diminuir o risco. Ou seja, analisar a quantidade de vezes que isso deu errado.

####  Parâmetros da busca: Tamanho da queda e tamanho da recuperação.
####  Para cada evento, encontrar todos os periodos que atendem as especificacoes (parametrizadas) de tamanho de queda e descartar aquelas que tenham alguma interseção.
####  Testar quantos destes intervalos atendem a especificação de subida.

In [None]:
import pandas as pd
import seaborn as sns
import datetime
import numpy as np
import matplotlib.pyplot as plt
sns.set()

In [None]:
bdr_metadata = pd.read_excel("../doc/lista_bdrs.xls")
dfbdr = pd.read_csv("../data/output/consolidado/bdrs60d.csv")
dfbdr = dfbdr[['symbol', 'date', 'volume', 'close']]
dfbdr['date_time'] = pd.to_datetime(dfbdr['date'], format="%Y-%m-%d %H:%M:%S")
dfbdr['date'] = dfbdr['date'].apply(lambda x: datetime.datetime.strptime(x, "%Y-%m-%d %H:%M:%S").strftime("%Y-%m-%d"))
dfbdr.set_index(["symbol", "date_time"], inplace=True)

In [None]:
def plot_cumsum(df, stock, start, decr_th, incr_th):
    end = (datetime.datetime.strptime(start, "%Y-%m-%d") + datetime.timedelta(days=1)).strftime("%Y-%m-%d")
    df_stock = df.loc[stock]
    df_stock = df_stock[df_stock['date'].isin([start, end])]
    diff = df_stock['close'].diff()
    csum = diff.cumsum()/df_stock.iloc[0, 2]
    csum.name = "changes"
    csum.index.name = "date"
    df_csum = csum.reset_index()
    g = sns.FacetGrid(df_csum, size=5, aspect=2)
    g.map(sns.lineplot, "date", "changes")
    ax1 = g.axes[0]
    ax1[0].axhline(decr_th, ls='--', color="red")
    ax1[0].axhline(incr_th, ls='--', color="green")
    # sns.lineplot(data=csum[~sel], color="red", ax=ax1[0])
    plt.show()

In [None]:
plot_cumsum(dfbdr, stock='TSLA34.SA', start="2020-11-30", decr_th=-0.05, incr_th=-0.02)

In [None]:
def filter_cumsum(df, stock, start, decr_th, incr_th):
    end = (datetime.datetime.strptime(start, "%Y-%m-%d") + datetime.timedelta(days=1)).strftime("%Y-%m-%d")
    # print(stock, start, end)
    df_stock = df.loc[stock]
    df_stock = df_stock[df_stock['date'].isin([start, end])]
    # print(df_stock)
    if df_stock.shape[0] < 2:
        return 0, 0, 0
    diff = df_stock['close'].diff()
    csum = diff.cumsum()/df_stock.iloc[0, 2]
    csum.name="cumsum"
    th_sum = csum < decr_th
    ths1 = th_sum.shift(1)
    sel = ths1 | ~th_sum
    sel.name = "intervals"
    df_cumsum = csum.to_frame().join(sel).join(csum[sel] > incr_th, rsuffix="_sel")
    df_cumsum = df_cumsum.iloc[1:]
    # print(df_stock['close'])
    idx = df_cumsum['intervals'].eq(False).idxmax()
    df_cumsum.loc[df_cumsum.index < idx,'cumsum_sel'] = False 
    df_cumsum["intervals_num"] = df_cumsum["intervals"].astype(int)
    lenght = df_cumsum.loc[df_cumsum['intervals_num'] == 0, "intervals_num"].size
    sel_index = df_cumsum.loc[df_cumsum['intervals'] == False].index.to_list()
    if not sel_index:
        return 0, 0, 0
    sel_index.append(df_cumsum.iloc[-1, :].name.replace(hour=18))
    for start, end, i in zip(sel_index[:-1], sel_index[1:], range(2, len(sel_index)+2)):
        sel_range = (df_cumsum.index > start) & (df_cumsum.index < end)    
        df_cumsum.loc[sel_range, "intervals_num"] = i

    r = df_cumsum[['cumsum_sel', 'intervals_num']].groupby('intervals_num')['cumsum_sel'].sum()
    num_events = (~sel[1:]).sum()
    s = (r > 0).sum()
    percent = s/num_events
    return percent, num_events, s

In [None]:
resultp = pd.DataFrame(columns=['symbol', 'date', 'percent', 'total_decr', 'num_postive_cycles'])   

for stock in dfbdr.index.unique(level=0)[:-1]:
    for d in dfbdr['date'].unique():
        p, n, s = filter_cumsum(dfbdr, stock=stock, start=d, decr_th=-0.06, incr_th=-0.04)
        resultp = resultp.append({'symbol': stock, 'date': d, 'percent': p, 'total_decr': int(n), 'num_postive_cycles': int(s)}, ignore_index=True)

In [None]:
resultp['percent'] = resultp['percent'].astype(float)
resultp['total_decr'] = resultp['total_decr'].astype(float)
resultp['num_postive_cycles'] = resultp['num_postive_cycles'].astype(float)
# resultp.info()

In [None]:
sns.set(rc={'figure.figsize':(11.7,5.27)})
ax = sns.lineplot(data=resultp, x="date", y="percent", hue="symbol")
ax.set_xticks([])

In [None]:
ax = sns.lineplot(data=resultp, x="date", y="num_postive_cycles", hue="symbol")
ax.set_xticks([])

In [None]:
ax = sns.lineplot(data=resultp, x="date", y="total_decr", hue="symbol")
ax.set_xticks([])

In [None]:
resultp[["total_decr", "num_postive_cycles"]].sum(axis=0)