In [1]:
from pathlib import Path
import numpy as np
import pandas as pd

In [2]:
historical = pd.read_csv(Path.cwd() / 'data' / 'historical_prices.csv', parse_dates=['_date'], date_format='%Y-%m-%d')
historical['_year'] = historical['_date'].dt.year
historical['_month'] = historical['_date'].dt.month
historical = historical[(historical['_date'] >= '2012-01-01') & (historical['_date'] <= '2024-04-30')]

In [3]:
# meta = pd.read_csv(Path.cwd() / 'data' / 'meta.csv', parse_dates=['first_include'], date_format='%Y-%m-%d')

In [4]:
# historical = historical[historical['_date'] >= historical['first_include']].drop('first_include', axis=1).reset_index(drop=True)

In [5]:
pre = historical.groupby(['_code', '_year', '_month'], as_index=False)['_value'].count()
pre = pre[pre['_value'] <= 2]

In [6]:
for v in pre.values:
    historical = historical[(historical['_code'] != v[0]) | (historical['_year'] != v[1]) | (historical['_month'] != v[2])]

In [7]:
def monthly_nbdays(ts: pd.Series):
    return len(ts)

def monthly_rtn(ts: pd.Series):
    return np.log(ts.iloc[-1] / ts.iloc[0])

def monthly_start_high_nbdays(ts: pd.Series):
    return ts.argmax() + 1

def monthly_start_high_rtn(ts: pd.Series):
    return np.log(ts.max() / ts.iloc[0])

def monthly_high_low_nbdays(ts: pd.Series):
    return ts.iloc[ts.argmax():].argmin() + 1

def monthly_high_low_rtn(ts: pd.Series):
    return np.log(ts.iloc[ts.argmax():].min() / ts.max())

def monthly_high_end_nbdays(ts: pd.Series):
    return len(ts) - ts.argmax()

def monthly_high_end_rtn(ts: pd.Series):
    return np.log(ts.iloc[-1] / ts.max())

def monthly_mdd(ts: pd.Series):
    r = ts.pct_change(1).fillna(0)
    comp_ret = (r+1.).cumprod()
    peak = comp_ret.expanding(min_periods=1).max()
    dd = comp_ret/peak
    return np.log(dd.min())

def monthly_vola(ts: pd.Series):
    return np.std(np.log(ts.pct_change(1).dropna() + 1.), ddof=1)

def monthly_dvola(ts: pd.Series):
    return np.std(np.log(ts.pct_change(1).dropna().clip(upper=0.) + 1.), ddof=1)

In [8]:
monthly_historical = historical.groupby(['_code', '_year', '_month'], as_index=False)['_value'].agg(
    [
        monthly_nbdays,
        monthly_rtn,
        monthly_start_high_nbdays,
        monthly_start_high_rtn,
        monthly_high_low_nbdays,
        monthly_high_low_rtn,
        monthly_high_end_nbdays,
        monthly_high_end_rtn,
        monthly_mdd,
        monthly_vola,
        monthly_dvola,
     ]
)

In [9]:
monthly_historical.to_csv(Path.cwd() / 'data' / 'historical_prices_monthly_stat.csv', index=False)

In [None]:
monthly_historical

In [None]:
monthly_historical

In [None]:
monthly_historical.to_csv(Path.cwd() / 'data' / 'historical_prices_monthly_stat.csv', index=False)