### Ideia 
#### Buscar qual é o prazo em que sempre que uma ação desceu X% (X=5) num intervalo D (2 < D < 10) de dias, a mesma ação subiu Y% (Y=5) num prazo R (2 < R < 10). Espaço de busca: X x D x Y x R = 10 * 8 * 10 * 8 = 6400 * 365 * 3
#### Maximizar esta função de utilidade de forma a diminuir o risco. Ou seja, a quantidade de vezes que isso deu errado.

####  Parâmetros da busca: Janela deslizante. Janelas de tamanho 2 a 10, deslocadas de 1 dia.
####  Para cada tamanho de janela, encontrar todos os periodos que atendem as especificacoes (parametrizadas) de tamanho de queda e descartar aquelas que tenham alguma interseção.
####  Testar quantos destes intervalos atendem a especificação de subida.

In [None]:
import pandas as pd
import seaborn as sns
import datetime
import numpy as np

In [None]:
bdr_metadata = pd.read_excel("../doc/lista_bdrs.xls")
df = pd.read_csv("../data/output/consolidado/bdrs_consolidado.csv")
df = df[['data_pregao', 'cod_bdi', 'cod_papel', 'nome_resum', 'preco_ultimo']]
df_bdi2 = df[(df['cod_bdi']==2)] # df['cod_papel']=='NVDC34') & 
del df_bdi2['cod_bdi']
df_bdi2.drop_duplicates(subset=['data_pregao', 'cod_papel'], inplace=True)
df_bdi2['data_pregao'] = pd.to_datetime(df_bdi2['data_pregao'], format="%Y-%m-%d %H:%M:%S")

In [None]:
# sns.lineplot(data=df_nvidia, x="data_pregao", y="preco_ultimo")

def generate_windows(start, end, lenght):
    d1 = datetime.strptime(start, "%Y-%m-%d")
    d2 = datetime.strptime(end, "%Y-%m-%d")
    diff = d2 - d1
    days_list = [(d1 + datetime.timedelta(i), d1 + datetime.timedelta(i+lenght)) for i in range(diff.days + 1)]
    return days_list

def filter_decreasing_periods(df, plenght, decr_percent):
    df['end_decr_period'] = df['data_pregao'].apply(lambda x: x + datetime.timedelta(plenght))
    df_result = df.merge(right=df[['data_pregao', 'end_decr_period', 'preco_ultimo']], left_on='data_pregao', right_on="end_decr_period", suffixes=["", "_y"])
    df_result['decr_pct'] = (df_result['preco_ultimo_y'] - df_result['preco_ultimo']) / df_result['preco_ultimo']
    df_result = df_result[df_result['decr_pct'] < decr_percent ]
    return df_result

def eliminate_overlap(df):
    """Eliminates overlaps between date range periods"""

    date_tuples = list(df[['data_pregao_y', 'data_pregao']].itertuples(index=False))
    for start, end in date_tuples:
        sel = (df['data_pregao_y'] > start) & (df['data_pregao_y'] < end)
        df = df[~sel]
    del df['data_pregao_y'], df['end_decr_period_y']	
    return df

def eliminate_small_gains(input_df, df_stock, mingoal, plenght):
    input_df['end_incr_period'] = input_df['end_decr_period'].apply(lambda x: x + datetime.timedelta(plenght))
    df_inc = input_df.merge(right=df_stock[['data_pregao', 'preco_ultimo']], left_on='end_incr_period', right_on="data_pregao", suffixes=["", "_z"])
    df_inc['incr_pct'] = (df_inc['preco_ultimo_z'] - df_inc['preco_ultimo_y']) / df_inc['preco_ultimo_y']
    # del df_inc['data_pregao_z']
    return df_inc[df_inc['incr_pct'] > mingoal]

def analyze_all_stocks(df, stocks_col, plenght, decr_percent, mingoal):
    """Iterate over all stocks"""
    stocks = df[stocks_col].unique()
    results = pd.DataFrame(data={"pct_total": [np.nan]*len(stocks), 
                                "pct_success": [np.nan]*len(stocks),
                                "n_occurrences": [np.nan]*len(stocks), 
                                "mean_price": [np.nan]*len(stocks), 
                                "metric": [np.nan]*len(stocks)}, index=stocks)
    results.index.name = "stocks"
    for s in stocks:
        df_stock = df[df[stocks_col] == s]
        df_decreasing = filter_decreasing_periods(df_stock, plenght, decr_percent)
        df_el = eliminate_overlap(df_decreasing)
        if df_el.shape[0] == 0:
            results.loc[s, "pct_total"] = 0
            results.loc[s, "pct_success"] = 0
            results.loc[s, "n_occurrences"] = 0
            continue
        df_goal = eliminate_small_gains(df_el, df_stock, mingoal, plenght)
        results.loc[s, "pct_total"] = df_goal.shape[0]/df_stock.shape[0]
        results.loc[s, "pct_success"] = df_goal.shape[0]/df_el.shape[0]
        results.loc[s, "n_occurrences"] = df_goal.shape[0]
        results.loc[s, "mean_price"] = df_goal['preco_ultimo_z'].mean()
        results.loc[s, "metric"] = df_goal.shape[0]*df_goal.shape[0]/df_el.shape[0]

    return results

In [None]:
lplen = range(1,11)
ldecr = np.arange(0.03, 0.08, 0.01)
mean_success = 0
record = None
count = 0
for pl in lplen:
    count += 1
    if count %10 == 0:
        print(count, mean_success)
    df_result = analyze_all_stocks(df_bdi2, stocks_col='cod_papel', plenght=pl, decr_percent=0.03, mingoal=0.03)
    curr_mean = df_result['metric'].mean()
    if curr_mean > mean_success:
        mean_success = curr_mean
        record = df_result
        print(pl, curr_mean)

In [None]:
df_result = analyze_all_stocks(df_bdi2, stocks_col='cod_papel', plenght=5, decr_percent=0.03, mingoal=0.03)

In [None]:
# record[record['pct_success'] > 0.4]
df_result.sort_values(by='metric', ascending=False).head(20)

In [None]:
df_melt = df_result.reset_index().melt(id_vars=['index'], value_vars=['pct_success'])
df_merge = df_melt.merge(right=bdr_metadata, left_on="index", right_on="CÓDIGO")
df_melt2 = df_result.reset_index().melt(id_vars=['index'], value_vars=['pct_total'])
df_merge2 = df_melt2.merge(right=bdr_metadata, left_on="index", right_on="CÓDIGO")

In [None]:
sns.catplot(kind="box", data=df_merge, x="SETOR", y="value", order=df_merge['SETOR'].value_counts().head(10).index, aspect = 3)

In [None]:
sns.catplot(kind="box", data=df_merge2, x="SETOR", y="value", order=df_merge['SETOR'].value_counts().head(10).index, aspect = 3)

In [None]:
stock_list = ['TSLA34', 'NVDC34', 'AMZO34', 'MSFT34', 'AAPL34', 'GOGL34', 'HOME34', 'NFLX34', 'U1BE34', 'TEXA34']
df_dez = pd.read_csv("../data/output/consolidado/bdrs_consolidado_dez.csv")
df_selected = df_dez[df_dez['cod_papel'].isin(stock_list)]
df_selected.pivot(index='cod_papel', columns='data_pregao', values='preco_ultimo')