In [19]:
from pathlib import Path
import numpy as np
import pandas as pd

In [20]:
meta = pd.read_csv(Path.cwd() / 'data' / 'meta.csv', parse_dates=['first_include'], date_format='%Y-%m-%d')

In [21]:
historical = pd.read_csv(Path.cwd() / 'data' / 'historical_prices_monthly_stat.csv').dropna().sort_values(['_code', '_year', '_month'], ascending=True).reset_index(drop=True)

In [22]:
df = pd.merge(historical, meta, how="inner", on="_code")
df['ym'] = pd.to_datetime(df["_year"].astype(str) + df["_month"].astype(str).str.rjust(2, "0"), format="%Y%m")

In [23]:
df = df[df["ym"] >= df["first_include"]].reset_index(drop=True)

In [24]:
df = df.dropna().reset_index(drop=True)

In [25]:
df = (
    pd.concat(
        [
            df,
            df.groupby("_code", as_index=False)
            .shift(-1)
            .rename(columns={c: "1mf_" + c for c in df.columns}),
        ],
        axis=1,
    )
    .dropna()
    .reset_index(drop=True)
)

In [49]:
bins = [-np.inf, -0.3, -0.1, -0.01, 0] # [-np.inf, -0.7, -0.6, -0.5, -0.4, -0.3, -0.2, -0.1, -0.05, -0.01, 0]  # [-np.inf, -0.4, -0.1, -0.01, 0] # 
labels = [f'({bins[i-1]}, {bins[i]}]' for i, _ in enumerate(bins) if i > 0]
df['monthly_high_end_rtn_category'] = pd.cut(np.exp(df['monthly_high_end_rtn'])-1, bins=bins, labels=labels).astype(str)

In [60]:
bins2 = [-np.inf, 0]
labels2 = [f'({bins2[i-1]}, {bins2[i]}]' for i, _ in enumerate(bins2) if i > 0]
df['monthly_high_low_rtn_category'] = pd.cut(np.exp(df['monthly_high_low_rtn'])-1, right=True, bins=bins2, labels=labels2).astype(str)

In [61]:
ct = pd.crosstab(df['monthly_high_low_rtn_category'], df['monthly_high_end_rtn_category'], df['1mf_monthly_start_high_rtn'], aggfunc='count')

In [64]:
x = [-0.7, -0.6, -0.5, -0.4, -0.3, -0.2, -0.1, -0.05, -0.01]
[-v for v in x]

[0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1, 0.05, 0.01]

In [45]:
df['monthly_rtn'] >= 0.

0          True
1          True
2          True
3         False
4          True
          ...  
199256    False
199257     True
199258     True
199259    False
199260     True
Name: monthly_rtn, Length: 199261, dtype: bool

In [44]:
if (df['monthly_rtn'] >= 0.).all():
    pass

In [39]:
import plotly.graph_objects as go

fig = go.Figure(data=go.Heatmap(
                   z=ct.values,
                   x=ct.columns,
                   y=ct.index,
                   colorscale='Blues'))
fig.show()

In [29]:
df.groupby(['monthly_start_high_rtn_category', 'monthly_high_end_rtn_category'], as_index=False)['1mf_monthly_start_high_rtn'].mean()

Unnamed: 0,monthly_start_high_rtn_category,monthly_high_end_rtn_category,1mf_monthly_start_high_rtn
0,"[0, 0.01)","(-0.01, 0]",0.041643
1,"[0, 0.01)","(-0.1, -0.01]",0.049917
2,"[0, 0.01)","(-0.3, -0.1]",0.074245
3,"[0, 0.01)","(-inf, -0.3]",0.176287
4,"[0.01, 0.1)","(-0.01, 0]",0.045161
5,"[0.01, 0.1)","(-0.1, -0.01]",0.052547
6,"[0.01, 0.1)","(-0.3, -0.1]",0.074767
7,"[0.01, 0.1)","(-inf, -0.3]",0.157829
8,"[0.1, 0.3)","(-0.01, 0]",0.062696
9,"[0.1, 0.3)","(-0.1, -0.01]",0.07105


In [None]:
df.groupby("monthly_high_end_rtn_category")["1mf_monthly_rtn"].mean()

In [None]:
df['monthly_rtn_12m_std'] = df.groupby('_code')['monthly_rtn'].rolling(12, min_periods=3).std().reset_index(drop=True)

In [None]:
df['monthly_rtn_12m_avg'] = df.groupby('_code')['monthly_rtn'].rolling(12, min_periods=3).mean().reset_index(drop=True)

In [None]:
df = df.dropna().reset_index(drop=True)

In [None]:
df['monthly_high_end_rtn_sigma_category'] = 'normal'

In [None]:
df.loc[(df['monthly_rtn_12m_avg'] - df['monthly_rtn_12m_std']) > df['monthly_high_end_rtn'], 'monthly_high_end_rtn_sigma_category'] = '1sigma'

In [None]:
# df.loc[(df['monthly_rtn_12m_avg'] - 2 * df['monthly_rtn_12m_std']) > df['monthly_high_end_rtn'], 'monthly_high_end_rtn_sigma_category'] = '2sigma'

In [None]:
# df.loc[(df['monthly_rtn_12m_avg'] - 3 * df['monthly_rtn_12m_std']) > df['monthly_high_end_rtn'], 'monthly_high_end_rtn_sigma_category'] = '3sigma'

In [None]:
df['sigma_coef'] =  (df['monthly_rtn_12m_avg'] - df['monthly_high_end_rtn']) / df['monthly_rtn_12m_std']

In [None]:
df.dropna(inplace=True)

In [None]:
df.groupby('monthly_high_end_rtn_category')['1mf_monthly_start_high_rtn'].mean()

In [None]:
df.groupby('monthly_high_end_rtn_sigma_category')['1mf_monthly_start_high_rtn'].count()

In [None]:
pd.crosstab(df['monthly_high_end_rtn_category'], df['monthly_high_end_rtn_sigma_category'])[['normal', '1sigma']]

In [None]:
df.groupby(['monthly_high_end_rtn_sigma_category', 'monthly_high_end_rtn_category'], as_index=False)['1mf_monthly_start_high_rtn'].agg(['count', 'mean', 'median', 'std'])

In [None]:
# import plotly.express as px

# fig = px.scatter(df, 
#               x="sigma_coef", 
#               y="monthly_high_end_rtn", 
# )
# fig.update_traces(marker={'size': 2})

# fig.show()

In [None]:
# bins = [-np.inf, -0.4, -0.1, -0.01, 0] # [-np.inf, -0.7, -0.6, -0.5, -0.4, -0.3, -0.2, -0.1, -0.05, -0.01, 0] 
# labels = [f'({bins[i-1]}, {bins[i]}]' for i, _ in enumerate(bins) if i > 0]
# df['monthly_high_end_rtn_category'] = pd.cut(df['monthly_high_end_rtn'], bins=bins, labels=labels).astype(str)
# df['before_monthly_high_end_rtn'] = df.groupby('_code', as_index=False)['monthly_high_end_rtn'].shift(1)
# df['before_monthly_high_end_rtn_category'] = df.groupby('_code', as_index=False)['monthly_high_end_rtn_category'].shift(1)

In [None]:
# df.dropna(inplace=True)

In [None]:
# import plotly.graph_objects as go

# fig = go.Figure()
# fig.add_trace(go.Box(
#     y = df[df['before_monthly_high_end_rtn_category'] == labels[0]]['monthly_start_high_rtn'],
#     name=labels[0],
#     boxmean='sd',
#     boxpoints=False
# ))
# fig.add_trace(go.Box(
#     y = df[df['before_monthly_high_end_rtn_category'] == labels[1]]['monthly_start_high_rtn'],
#     name=labels[1],
#     boxmean='sd',
#     boxpoints=False
# ))
# fig.add_trace(go.Box(
#     y = df[df['before_monthly_high_end_rtn_category'] == labels[2]]['monthly_start_high_rtn'],
#     name=labels[2],
#     boxmean='sd',
#     boxpoints=False
# ))
# fig.add_trace(go.Box(
#     y = df[df['before_monthly_high_end_rtn_category'] == labels[3]]['monthly_start_high_rtn'],
#     name=labels[3],
#     boxmean='sd',
#     boxpoints=False
# ))
# fig.show()

In [None]:
total_grp = df.groupby(['ym'], as_index=False)['monthly_start_high_rtn'].agg(['count', 'mean', 'std', 'median'])
total_grp['before_monthly_high_end_rtn_category'] = 'Total'
bins_grp = df.groupby(['before_monthly_high_end_rtn_category', 'ym'], as_index=False)['monthly_start_high_rtn'].agg(['count', 'mean', 'std', 'median'])
grp = pd.concat([total_grp, bins_grp]).reset_index(drop=True)

In [None]:
grp

In [None]:
import plotly.express as px

fig = px.line(grp, 
              x="ym", 
              y="mean", 
              color="before_monthly_high_end_rtn_category",
              markers=True, 
              hover_name="before_monthly_high_end_rtn_category",
              hover_data={'before_monthly_high_end_rtn_category': False,
                          'ym': False,
                          'count': True,
                          'mean': ':.4f',
                          'mean': ':.4f',
                          'std': ':.4f'})
fig.update_layout(hovermode="x unified")

fig.show()

In [None]:
import plotly.graph_objects as go

fig = go.Figure()
fig.add_trace(go.Scatter(
    x = df.groupby(['ym'], as_index=False)['monthly_start_high_rtn'].mean()['ym'],
    y = df.groupby(['ym'], as_index=False)['monthly_start_high_rtn'].mean()['monthly_start_high_rtn'],))
fig.add_trace(go.Scatter(
    x = df[df['before_monthly_high_end_rtn_category'] == '(-0.01, 0]'].groupby(['ym'], as_index=False)['monthly_start_high_rtn'].mean()['ym'],
    y = df[df['before_monthly_high_end_rtn_category'] == '(-0.01, 0]'].groupby(['ym'], as_index=False)['monthly_start_high_rtn'].mean()['monthly_start_high_rtn'],))
fig.add_trace(go.Scatter(
    x = df[df['before_monthly_high_end_rtn_category'] == '(-0.1, -0.01]'].groupby(['ym'], as_index=False)['monthly_start_high_rtn'].mean()['ym'],
    y = df[df['before_monthly_high_end_rtn_category'] == '(-0.1, -0.01]'].groupby(['ym'], as_index=False)['monthly_start_high_rtn'].mean()['monthly_start_high_rtn'],))
fig.add_trace(go.Scatter(
    x = df[df['before_monthly_high_end_rtn_category'] == '(-0.4, -0.1]'].groupby(['ym'], as_index=False)['monthly_start_high_rtn'].mean()['ym'],
    y = df[df['before_monthly_high_end_rtn_category'] == '(-0.4, -0.1]'].groupby(['ym'], as_index=False)['monthly_start_high_rtn'].mean()['monthly_start_high_rtn'],))
fig.add_trace(go.Scatter(
    x = df[df['before_monthly_high_end_rtn_category'] == '(-inf, -0.4]'].groupby(['ym'], as_index=False)['monthly_start_high_rtn'].mean()['ym'],
    y = df[df['before_monthly_high_end_rtn_category'] == '(-inf, -0.4]'].groupby(['ym'], as_index=False)['monthly_start_high_rtn'].mean()['monthly_start_high_rtn'],))
fig.show()

In [None]:
df.groupby(['before_monthly_high_end_rtn_category', 'ym'], as_index=False)[['monthly_start_high_rtn', 'monthly_rtn']].agg('mean')

In [None]:
labels

In [None]:
df.columns

In [None]:
df_count = df.groupby("before_monthly_high_end_rtn_category")[
    "monthly_start_high_rtn"
].count().reset_index(drop=False)

In [None]:
df[df['before_monthly_high_end_rtn_category'] == '(-inf, -0.4]'][['before_monthly_high_end_rtn', 'monthly_start_high_rtn']].corr()

In [None]:
import plotly.figure_factory as ff

hist_data = [
    df[df['before_monthly_high_end_rtn_category'] == '(-inf, -0.4]']['monthly_start_high_rtn'], 
    df[df['before_monthly_high_end_rtn_category'] == '(-0.4, -0.1]']['monthly_start_high_rtn'], 
    df[df['before_monthly_high_end_rtn_category'] == '(-0.1, -0.01]']['monthly_start_high_rtn'],
    df[df['before_monthly_high_end_rtn_category'] == '(-0.01, 0]']['monthly_start_high_rtn'], 
]

group_labels = [
    '(-inf, -0.4]', 
    '(-0.4, -0.1]', 
    '(-0.1, -0.01]',
    '(-0.01, 0]', 
]

colors = ['#333F44', '#37AA9C', '#94F3E4', 'slategray']

# Create distplot with curve_type set to 'normal'
fig = ff.create_distplot(hist_data, group_labels, show_hist=False, colors=colors, show_rug=False, histnorm='probability')

# Add title
fig.update_xaxes()
fig.update_layout(title_text='Curve and Rug Plot', width=800)
fig.show()