In [11]:
import pandas as pd
from datetime import datetime
import copy

In [7]:
def extract_intraday(symbol):
    api_key = '5fb4dbfbc7da14.06701815'
    url = f'https://eodhistoricaldata.com/api/intraday/{symbol}.US?api_token={api_key}&interval=1m'
    df = pd.read_csv(url).iloc[:,2:].set_index('Datetime')
    return df

In [8]:
data = extract_intraday('AAPL')
data

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-11-04 08:00:00,152.00,152.00,151.75,151.92,1666.0
2021-11-04 08:01:00,151.95,151.96,151.89,151.96,1356.0
2021-11-04 08:02:00,151.96,151.96,151.94,151.94,1092.0
2021-11-04 08:04:00,151.77,151.77,151.73,151.73,1153.0
2021-11-04 08:05:00,151.87,151.87,151.80,151.80,847.0
...,...,...,...,...,...
2022-03-03 00:56:00,166.33,166.33,166.30,166.30,327.0
2022-03-03 00:57:00,166.30,166.35,166.30,166.35,3435.0
2022-03-03 00:58:00,166.33,166.40,166.33,166.40,1519.0
2022-03-03 00:59:00,166.36,166.42,166.36,166.36,968.0


In [10]:
data.dropna(inplace=True)
data

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-11-04 08:00:00,152.00,152.00,151.75,151.92,1666.0
2021-11-04 08:01:00,151.95,151.96,151.89,151.96,1356.0
2021-11-04 08:02:00,151.96,151.96,151.94,151.94,1092.0
2021-11-04 08:04:00,151.77,151.77,151.73,151.73,1153.0
2021-11-04 08:05:00,151.87,151.87,151.80,151.80,847.0
...,...,...,...,...,...
2022-03-03 00:52:00,166.28,166.33,166.28,166.33,1020.0
2022-03-03 00:56:00,166.33,166.33,166.30,166.30,327.0
2022-03-03 00:57:00,166.30,166.35,166.30,166.35,3435.0
2022-03-03 00:58:00,166.33,166.40,166.33,166.40,1519.0


In [31]:
#Feature Engineering Class 
class FeatureEngineering:
    def __init__(self, stock_name):
        self.stock_name = stock_name

    #단순 이동 평균
    def SMA(self,  data, column='Close', period=30):
        data = copy.deepcopy(data)
        data[column+f'_SMA{period}'] = data[column].rolling(period).mean()
        return data
    
    def SMA_(self,  data, column='Close', period=30):
        data = copy.deepcopy(data)
        return data[column].rolling(period).mean()

    #지수 이동 평균
    def EMA(self, data, period=20, column='Close'):
        data = copy.deepcopy(data)
        data[column+f'_EMA{period}'] = data[column].ewm(span=period, adjust=False).mean()
        return data
    
    def EMA_(self, data, period=20, column='Close'):
        data = copy.deepcopy(data)
        return data[column].ewm(span=period, adjust=False).mean()
    
    #볼린저밴드
    def Bollingerband(self, data, period=20, column='Close'):
        data = copy.deepcopy(data)
        
        data[column+f'_SMA{period}'] = self.SMA_(data, column=column, period=period)
        data[column+'_UB'] = data[column+f'_SMA{period}'] + 2*data[column+f'_SMA{period}'].std()
        data[column+'_LB'] = data[column+f'_SMA{period}'] - 2*data[column+f'_SMA{period}'].std()
        
        return data
    
    #MACD
    def MACD(self, data, period_long=26, period_short=12, period_signal=9, column='Close'):
        data = copy.deepcopy(data)
        
        data[column+'_short'] = self.EMA_(data, period=period_short, column=column)

        data[column+f'_long'] = self.EMA_(data, period=period_long, column=column)

        data[column+'_MACD'] = data[column+f'_short']- data[column+f'_long']

        #signal
        data[column+'_SignalLine'] = self.EMA_(data, period=period_signal, column=column+'_MACD')

        return data
    
    #Momentum
    def Momentum(self, data, period=7, column='Close'):
        data = copy.deepcopy(data)
        
        data[column+f'_{period}D'] = data[column].shift(period)
        data[column+'_1D'] = data[column].shift(1)
        data[column+'_Momentum'] = data[column+'_1D'] / data[column+f'_{period}D'] - 1
        
        return data
    
    #RSI
    def RSI(self, data, period=14, column='Close'):
        data = copy.deepcopy(data)
        
        data[column+'1diff'] = data[column].diff(1)
        
        delta = data[column].diff(1)
        delta = delta.dropna()

        up = delta.copy()
        down = delta.copy()
        up[up<0] = 0
        down[down>0] = 0
        data[column+'_up'] = up
        data[column+'_down'] = down

        AVG_Gain = self.SMA_(data, period=period, column=column+'_up')
        AVG_Loss = abs(self.SMA_(data, period=period, column=column+'_down'))
        RS = AVG_Gain / AVG_Loss

        RSI = 100.0 - (100.0/(1.0+RS))
        data[column+'_RSI'] = RSI
  
        return data
    
    #Get feature engineered data
    def get_data(self, train):
        #print('Feature Engineering...')
        
        self.train = copy.deepcopy(train)
        c = self.stock_name
        
        self.train = self.SMA(self.train, column=c, period=10)
        self.train = self.SMA(self.train, column=c, period=20)
        self.train = self.SMA(self.train, column=c, period=30)
        
        self.train = self.EMA(self.train, column=c, period=10)
        self.train = self.EMA(self.train, column=c, period=20)
        self.train = self.EMA(self.train, column=c, period=30)
        
        self.train = self.Bollingerband(self.train, column=c)
        self.train = self.MACD(self.train, column=c)
        self.train = self.Momentum(self.train, column=c)
        self.train = self.RSI(self.train, column=c)
        
        #print("Done!")
        
        return self.train

In [32]:
fe = FeatureEngineering('Close')
data_fe = fe.get_data(data)
data_fe

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Close_SMA10,Close_SMA20,Close_SMA30,Close_EMA10,Close_EMA20,...,Close_long,Close_MACD,Close_SignalLine,Close_7D,Close_1D,Close_Momentum,Close1diff,Close_up,Close_down,Close_RSI
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-11-04 08:00:00,152.00,152.00,151.75,151.92,1666.0,,,,151.920000,151.920000,...,151.920000,0.000000,0.000000,,,,,,,
2021-11-04 08:01:00,151.95,151.96,151.89,151.96,1356.0,,,,151.927273,151.923810,...,151.922963,0.003191,0.000638,,151.92,,0.04,0.04,0.00,
2021-11-04 08:02:00,151.96,151.96,151.94,151.94,1092.0,,,,151.929587,151.925351,...,151.924225,0.004059,0.001322,,151.96,,-0.02,0.00,-0.02,
2021-11-04 08:04:00,151.77,151.77,151.73,151.73,1153.0,,,,151.893298,151.906747,...,151.909838,-0.012059,-0.001354,,151.94,,-0.21,0.00,-0.21,
2021-11-04 08:05:00,151.87,151.87,151.80,151.80,847.0,,,,151.876335,151.896580,...,151.901702,-0.018966,-0.004876,,151.73,,0.07,0.07,0.00,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-03-03 00:52:00,166.28,166.33,166.28,166.33,1020.0,166.340,166.3450,166.359333,166.323779,166.337186,...,166.336513,-0.007955,0.005171,166.40,166.28,-0.000721,0.05,0.05,0.00,51.162791
2022-03-03 00:56:00,166.33,166.33,166.30,166.30,327.0,166.325,166.3450,166.351333,166.319455,166.333645,...,166.333808,-0.009644,0.002208,166.33,166.33,0.000000,-0.03,0.00,-0.03,42.307692
2022-03-03 00:57:00,166.30,166.35,166.30,166.35,3435.0,166.323,166.3500,166.349333,166.325009,166.335203,...,166.335008,-0.006868,0.000393,166.40,166.30,-0.000601,0.05,0.05,0.00,45.121951
2022-03-03 00:58:00,166.33,166.40,166.33,166.40,1519.0,166.323,166.3515,166.348667,166.338644,166.341374,...,166.339822,-0.000627,0.000189,166.35,166.35,0.000000,0.05,0.05,0.00,47.674419


In [34]:
data_fe.dropna(inplace=True)
data_fe.to_csv('../data/aapl.csv')