In [52]:
from pathlib import Path
import numpy as np
import pandas as pd

In [53]:
historical = pd.read_csv(Path.cwd() / 'data' / 'historical_prices.csv', parse_dates=['_date'], date_format='%Y-%m-%d')
historical['_year'] = historical['_date'].dt.year
historical['_month'] = historical['_date'].dt.month
historical = historical[(historical['_date'] >= '2012-01-01') & (historical['_date'] <= '2024-04-30')]

In [54]:
# meta = pd.read_csv(Path.cwd() / 'data' / 'meta.csv', parse_dates=['first_include'], date_format='%Y-%m-%d')

In [55]:
# historical = historical[historical['_date'] >= historical['first_include']].drop('first_include', axis=1).reset_index(drop=True)

In [56]:
pre = historical.groupby(['_code', '_year', '_month'], as_index=False)['_value'].count()
pre = pre[pre['_value'] <= 2]

In [58]:
for v in pre.values:
    historical = historical[(historical['_code'] != v[0]) | (historical['_year'] != v[1]) | (historical['_month'] != v[2])]

In [70]:
def monthly_nbdays(ts: pd.Series):
    return len(ts)

def monthly_rtn(ts: pd.Series):
    return np.log(ts.iloc[-1] / ts.iloc[0])

def monthly_start_high_nbdays(ts: pd.Series):
    return ts.argmax() + 1

def monthly_start_high_rtn(ts: pd.Series):
    return np.log(ts.max() / ts.iloc[0])

def monthly_high_low_nbdays(ts: pd.Series):
    return ts.iloc[ts.argmax():].argmin() + 1

def monthly_high_low_rtn(ts: pd.Series):
    return np.log(ts.iloc[ts.argmax():].min() / ts.max())

def monthly_high_end_nbdays(ts: pd.Series):
    return len(ts) - ts.argmax()

def monthly_high_end_rtn(ts: pd.Series):
    return np.log(ts.iloc[-1] / ts.max())

def monthly_mdd(ts: pd.Series):
    r = ts.pct_change(1).dropna()
    comp_ret = (r+1.).cumprod()
    peak = comp_ret.expanding(min_periods=1).max()
    dd = comp_ret/peak
    return np.log(dd.min())

def monthly_vola(ts: pd.Series):
    return np.std(np.log(ts.pct_change(1).dropna() + 1.), ddof=1)

def monthly_dvola(ts: pd.Series):
    return np.std(np.log(ts.pct_change(1).dropna().clip(upper=0.) + 1.), ddof=1)

In [71]:
monthly_historical = historical.groupby(['_code', '_year', '_month'], as_index=False)['_value'].agg(
    [
        monthly_nbdays,
        monthly_rtn,
        monthly_start_high_nbdays,
        monthly_start_high_rtn,
        monthly_high_low_nbdays,
        monthly_high_low_rtn,
        monthly_high_end_nbdays,
        monthly_high_end_rtn,
        monthly_mdd,
        monthly_vola,
        monthly_dvola,
     ]
)

In [72]:
monthly_historical.to_csv(Path.cwd() / 'data' / 'historical_prices_monthly_stat.csv', index=False)

In [73]:
monthly_historical

Unnamed: 0,_code,_year,_month,monthly_nbdays,monthly_rtn,monthly_start_high_nbdays,monthly_start_high_rtn,monthly_high_low_nbdays,monthly_high_low_rtn,monthly_high_end_nbdays,monthly_high_end_rtn,monthly_mdd,monthly_vola,monthly_dvola
0,AN8068571086,2014,4,21,0.035992,16,0.042470,4,-0.009042,6,-0.006478,-0.019580,0.008682,0.004391
1,AN8068571086,2014,5,21,0.031637,21,0.031637,1,0.000000,1,0.000000,-0.023638,0.007599,0.004330
2,AN8068571086,2014,6,21,0.126736,21,0.126736,1,0.000000,1,0.000000,-0.020061,0.016541,0.004784
3,AN8068571086,2014,7,22,-0.083677,1,0.000000,22,-0.083677,22,-0.083677,-0.082658,0.009156,0.006473
4,AN8068571086,2014,8,21,0.015534,19,0.028222,3,-0.012688,3,-0.012688,-0.034886,0.010732,0.006516
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
271825,US98980L1017,2023,11,21,0.122160,20,0.123928,2,-0.001768,2,-0.001768,-0.037856,0.017936,0.009081
271826,US98980L1017,2023,12,20,0.022786,17,0.054269,4,-0.031484,4,-0.031484,-0.044788,0.018821,0.010836
271827,US98980L1017,2024,1,21,-0.067909,9,0.018767,13,-0.086676,13,-0.086676,-0.086676,0.018485,0.012216
271828,US98980L1017,2024,2,20,0.086793,20,0.086793,1,0.000000,1,0.000000,-0.068993,0.022897,0.008387


In [74]:
monthly_historical['monthly_rtn_davg'] = monthly_historical['monthly_rtn'] / monthly_historical['monthly_nbdays']
monthly_historical['monthly_high_low_rtn_davg'] = monthly_historical['monthly_high_low_rtn'] / monthly_historical['monthly_high_low_nbdays']
monthly_historical['monthly_high_end_rtn_davg'] = monthly_historical['monthly_high_end_rtn'] / monthly_historical['monthly_high_end_nbdays']
monthly_historical['monthly_start_high_davg'] = monthly_historical['monthly_start_high_rtn'] / monthly_historical['monthly_start_high_nbdays']

In [75]:
monthly_historical

Unnamed: 0,_code,_year,_month,monthly_nbdays,monthly_rtn,monthly_start_high_nbdays,monthly_start_high_rtn,monthly_high_low_nbdays,monthly_high_low_rtn,monthly_high_end_nbdays,monthly_high_end_rtn,monthly_mdd,monthly_vola,monthly_dvola,monthly_rtn_davg,monthly_high_low_rtn_davg,monthly_high_end_rtn_davg,monthly_start_high_davg
0,AN8068571086,2014,4,21,0.035992,16,0.042470,4,-0.009042,6,-0.006478,-0.019580,0.008682,0.004391,0.001714,-0.002260,-0.001080,0.002654
1,AN8068571086,2014,5,21,0.031637,21,0.031637,1,0.000000,1,0.000000,-0.023638,0.007599,0.004330,0.001507,0.000000,0.000000,0.001507
2,AN8068571086,2014,6,21,0.126736,21,0.126736,1,0.000000,1,0.000000,-0.020061,0.016541,0.004784,0.006035,0.000000,0.000000,0.006035
3,AN8068571086,2014,7,22,-0.083677,1,0.000000,22,-0.083677,22,-0.083677,-0.082658,0.009156,0.006473,-0.003803,-0.003803,-0.003803,0.000000
4,AN8068571086,2014,8,21,0.015534,19,0.028222,3,-0.012688,3,-0.012688,-0.034886,0.010732,0.006516,0.000740,-0.004229,-0.004229,0.001485
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
271825,US98980L1017,2023,11,21,0.122160,20,0.123928,2,-0.001768,2,-0.001768,-0.037856,0.017936,0.009081,0.005817,-0.000884,-0.000884,0.006196
271826,US98980L1017,2023,12,20,0.022786,17,0.054269,4,-0.031484,4,-0.031484,-0.044788,0.018821,0.010836,0.001139,-0.007871,-0.007871,0.003192
271827,US98980L1017,2024,1,21,-0.067909,9,0.018767,13,-0.086676,13,-0.086676,-0.086676,0.018485,0.012216,-0.003234,-0.006667,-0.006667,0.002085
271828,US98980L1017,2024,2,20,0.086793,20,0.086793,1,0.000000,1,0.000000,-0.068993,0.022897,0.008387,0.004340,0.000000,0.000000,0.004340


In [76]:
monthly_historical.to_csv(Path.cwd() / 'data' / 'historical_prices_monthly_stat.csv', index=False)

In [7]:
monthly_historical[['_code', '_year', '_month', 'monthly_rtn', 'monthly_high_low_rtn', 
'monthly_high_end_rtn', 
'monthly_start_high_rtn', 'daily_rtn_avg', 'daily_vola', 'monthly_mdd',
'monthly_rtn_to_daily', 'monthly_high_low_rtn_to_daily',
'monthly_high_end_rtn_to_daily', 'monthly_start_high_to_daily']].to_csv(Path.cwd() / 'data' / 'historical_prices_monthly_stat.csv', index=False)