In [1]:
import yfinance as yf
import pandas as pd
import pandas_ta
import matplotlib.pyplot as plt
import numpy as np

In [2]:
data = pd.read_csv('./data/Raw_Stock_Data.csv')
data

Unnamed: 0,Date,Index,Adj Close,Close,High,Low,Open,Volume
0,2020-01-02,FTNT,21.936001,21.936001,22.058001,21.476000,21.520000,5541000.0
1,2020-01-03,FTNT,22.242001,22.242001,22.306000,21.620001,21.684000,6641000.0
2,2020-01-06,FTNT,22.400000,22.400000,22.472000,21.906000,22.122000,7369000.0
3,2020-01-07,FTNT,22.378000,22.378000,22.502001,22.214001,22.466000,3796500.0
4,2020-01-08,FTNT,22.733999,22.733999,22.879999,22.360001,22.389999,6953000.0
...,...,...,...,...,...,...,...,...
432229,2022-07-25,AIH,2.220000,2.220000,2.360000,2.110000,2.310000,6400.0
432230,2022-07-26,AIH,2.370000,2.370000,2.370000,2.360000,2.360000,2500.0
432231,2022-07-27,AIH,2.400000,2.400000,2.400000,2.234000,2.350000,20400.0
432232,2022-07-28,AIH,2.340000,2.340000,2.370000,2.270000,2.270000,2800.0


In [3]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 432234 entries, 0 to 432233
Data columns (total 8 columns):
 #   Column     Non-Null Count   Dtype  
---  ------     --------------   -----  
 0   Date       432234 non-null  object 
 1   Index      432234 non-null  object 
 2   Adj Close  423623 non-null  float64
 3   Close      423623 non-null  float64
 4   High       423623 non-null  float64
 5   Low        423623 non-null  float64
 6   Open       423623 non-null  float64
 7   Volume     423623 non-null  float64
dtypes: float64(6), object(2)
memory usage: 26.4+ MB


In [4]:
stock_list = data['Index'].unique()
print(len(stock_list))

666


## High_Low & Std (last 21 index == 1 month)

In [5]:
def high_low_m(df, m):
    start_index = df.index[0]
    end_index = df.index[-1]
    #print(f'start_index: {start_index}; end_index: {end_index}')
    
    high_low_list = []
    duration = None
    
    if m == 1:
        duration = 21
    elif m == 2:
        duration = 42
    else:
        duration = 63
        
    for idx in df.index:
        if idx-start_index < (duration-1):
            high_low_list.append(None)
        else:
            df_last_month = df[idx-(duration-1): idx+1]
            assert len(df_last_month) == duration
            High_highest = max(df_last_month['High'])
            Low_lowest = min(df_last_month['Low'])
            high_low_list.append(High_highest/Low_lowest)

    assert len(high_low_list) == df.shape[0]
    return high_low_list


In [6]:
def std_m(df, m):
    start_index = df.index[0]
    end_index = df.index[-1]
    #print(f'start_index: {start_index}; end_index: {end_index}')
    
    std_list = []
    RoR_list = df['Adj Close'].pct_change()
    duration = None
    
    if m == 1:
        duration = 21
    elif m == 2:
        duration = 42
    else:
        duration = 63
    
    for idx in df.index:
        if idx-start_index < duration:
            std_list.append(None)
        else:
            RoR_last_month = RoR_list[idx-(duration-1): idx+1]
            assert len(RoR_last_month) == duration
            std_list.append(np.std(RoR_last_month))
    
    assert len(std_list) == df.shape[0]
    return std_list 

In [8]:
def process(data):
    stock_list = data['Index'].unique()
    high_low_1m_total = []
    high_low_2m_total = []
    high_low_3m_total = []
    std_1m_total = []
    std_2m_total = []
    std_3m_total = []
    for i, stock in enumerate(stock_list):
        print(f'Processing stock: {stock}, {i+1}/{len(stock_list)}')
        df_tmp = data.loc[data['Index'] == stock].copy().reset_index(drop=True)
        high_low_1m = high_low_m(df_tmp, 1)
        high_low_2m = high_low_m(df_tmp, 2)
        high_low_3m = high_low_m(df_tmp, 3)
        std_1m = std_m(df_tmp, 1)
        std_2m = std_m(df_tmp, 2)
        std_3m = std_m(df_tmp, 3)
        
        high_low_1m_total += high_low_1m
        high_low_2m_total += high_low_2m
        high_low_3m_total += high_low_3m
        std_1m_total += std_1m
        std_2m_total += std_2m
        std_3m_total += std_3m
    
    assert len(high_low_1m_total) == data.shape[0]
    assert len(high_low_2m_total) == data.shape[0]
    assert len(high_low_3m_total) == data.shape[0]
    assert len(std_1m_total) == data.shape[0]
    assert len(std_2m_total) == data.shape[0]
    assert len(std_3m_total) == data.shape[0]
    
    data['high_low_1m'] = high_low_1m_total
    data['high_low_2m'] = high_low_2m_total
    data['high_low_3m'] = high_low_3m_total
    data['std_1m'] = std_1m_total
    data['std_2m'] = std_2m_total
    data['std_3m'] = std_3m_total


In [9]:
process(data)
data

Processing stock: FTNT, 1/666
Processing stock: LAND, 2/666
Processing stock: GDYN, 3/666
Processing stock: REG, 4/666
Processing stock: ODFL, 5/666
Processing stock: SYNA, 6/666
Processing stock: PRFT, 7/666
Processing stock: IMKTA, 8/666
Processing stock: METC, 9/666
Processing stock: PTSI, 10/666
Processing stock: CLMT, 11/666
Processing stock: MCBS, 12/666
Processing stock: GAIN, 13/666
Processing stock: SBNY, 14/666
Processing stock: KRUS, 15/666
Processing stock: LKQ, 16/666
Processing stock: COKE, 17/666
Processing stock: EXLS, 18/666
Processing stock: PCB, 19/666
Processing stock: INMD, 20/666
Processing stock: WIRE, 21/666
Processing stock: CCRN, 22/666
Processing stock: PAYX, 23/666
Processing stock: IRMD, 24/666
Processing stock: AEHR, 25/666
Processing stock: RELL, 26/666
Processing stock: RRR, 27/666
Processing stock: AFBI, 28/666
Processing stock: INTU, 29/666
Processing stock: CCB, 30/666
Processing stock: MSFT, 31/666
Processing stock: GOOGL, 32/666
Processing stock: TB

Processing stock: PDFS, 262/666
Processing stock: CZWI, 263/666
Processing stock: ERII, 264/666
Processing stock: PXLW, 265/666
Processing stock: SMCI, 266/666
Processing stock: NTLA, 267/666
Processing stock: BECN, 268/666
Processing stock: ITRN, 269/666
Processing stock: VIR, 270/666
Processing stock: ITOS, 271/666
Processing stock: TTEK, 272/666
Processing stock: ANAB, 273/666
Processing stock: FLGT, 274/666
Processing stock: RAIL, 275/666
Processing stock: IMNM, 276/666
Processing stock: CENT, 277/666
Processing stock: BCBP, 278/666
Processing stock: HEES, 279/666
Processing stock: PLXP, 280/666
Processing stock: LOVE, 281/666
Processing stock: KOPN, 282/666
Processing stock: FOCS, 283/666
Processing stock: CHEF, 284/666
Processing stock: BCML, 285/666
Processing stock: ZEUS, 286/666
Processing stock: MTSI, 287/666
Processing stock: UTHR, 288/666
Processing stock: HIBB, 289/666
Processing stock: NXPI, 290/666
Processing stock: BLKB, 291/666
Processing stock: PDLB, 292/666
Processin

Processing stock: DBX, 519/666
Processing stock: MBUU, 520/666
Processing stock: VIA, 521/666
Processing stock: SQFT, 522/666
Processing stock: ICUI, 523/666
Processing stock: QNCX, 524/666
Processing stock: PINC, 525/666
Processing stock: IPHA, 526/666
Processing stock: QTRX, 527/666
Processing stock: SRDX, 528/666
Processing stock: STSA, 529/666
Processing stock: LULU, 530/666
Processing stock: JRSH, 531/666
Processing stock: CTSH, 532/666
Processing stock: COLM, 533/666
Processing stock: CARG, 534/666
Processing stock: CAKE, 535/666
Processing stock: MKSI, 536/666
Processing stock: PUYI, 537/666
Processing stock: CMCO, 538/666
Processing stock: HYRE, 539/666
Processing stock: BSY, 540/666
Processing stock: RNA, 541/666
Processing stock: VRA, 542/666
Processing stock: BOXL, 543/666
Processing stock: NEOG, 544/666
Processing stock: YTEN, 545/666
Processing stock: CHUY, 546/666
Processing stock: KDNY, 547/666
Processing stock: BBBY, 548/666
Processing stock: ANTE, 549/666
Processing st

Unnamed: 0,Date,Index,Adj Close,Close,High,Low,Open,Volume,high_low_1m,high_low_2m,high_low_3m,std_1m,std_2m,std_3m
0,2020-01-02,FTNT,21.936001,21.936001,22.058001,21.476000,21.520000,5541000.0,,,,,,
1,2020-01-03,FTNT,22.242001,22.242001,22.306000,21.620001,21.684000,6641000.0,,,,,,
2,2020-01-06,FTNT,22.400000,22.400000,22.472000,21.906000,22.122000,7369000.0,,,,,,
3,2020-01-07,FTNT,22.378000,22.378000,22.502001,22.214001,22.466000,3796500.0,,,,,,
4,2020-01-08,FTNT,22.733999,22.733999,22.879999,22.360001,22.389999,6953000.0,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
432229,2022-07-25,AIH,2.220000,2.220000,2.360000,2.110000,2.310000,6400.0,1.443066,1.641026,2.169492,0.069084,0.061110,0.061053
432230,2022-07-26,AIH,2.370000,2.370000,2.370000,2.360000,2.360000,2500.0,1.430168,1.641026,2.169492,0.070134,0.061876,0.061378
432231,2022-07-27,AIH,2.400000,2.400000,2.400000,2.234000,2.350000,20400.0,1.430168,1.641026,2.169492,0.070013,0.061869,0.060908
432232,2022-07-28,AIH,2.340000,2.340000,2.370000,2.270000,2.270000,2800.0,1.430168,1.641026,2.169492,0.070459,0.060642,0.061058


In [10]:
data_final = data.copy()
indexes = data_final[data_final['Date'] <= '2020-12-31'].index
data_final.drop(indexes, inplace = True)
data_final.to_csv('data/data_volatility_raw.csv', index=False) 
data_final

Unnamed: 0,Date,Index,Adj Close,Close,High,Low,Open,Volume,high_low_1m,high_low_2m,high_low_3m,std_1m,std_2m,std_3m
253,2021-01-04,FTNT,29.112,29.112,29.931999,28.572001,29.914000,5391000.0,1.246585,1.403221,1.436347,0.023125,0.021400,0.023501
254,2021-01-05,FTNT,28.348,28.348,28.535999,27.966000,28.312000,6130000.0,1.234939,1.394797,1.436347,0.023887,0.021925,0.023665
255,2021-01-06,FTNT,27.622,27.622,28.122000,27.462000,27.464001,6531000.0,1.234939,1.394797,1.436347,0.024728,0.021979,0.023906
256,2021-01-07,FTNT,28.650,28.650,28.724001,27.822001,27.826000,6856000.0,1.234939,1.394797,1.436347,0.025711,0.022255,0.024196
257,2021-01-08,FTNT,29.628,29.628,29.698000,28.818001,28.902000,6301000.0,1.234939,1.394797,1.436347,0.025998,0.022382,0.024500
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
432229,2022-07-25,AIH,2.220,2.220,2.360000,2.110000,2.310000,6400.0,1.443066,1.641026,2.169492,0.069084,0.061110,0.061053
432230,2022-07-26,AIH,2.370,2.370,2.370000,2.360000,2.360000,2500.0,1.430168,1.641026,2.169492,0.070134,0.061876,0.061378
432231,2022-07-27,AIH,2.400,2.400,2.400000,2.234000,2.350000,20400.0,1.430168,1.641026,2.169492,0.070013,0.061869,0.060908
432232,2022-07-28,AIH,2.340,2.340,2.370000,2.270000,2.270000,2800.0,1.430168,1.641026,2.169492,0.070459,0.060642,0.061058
