In [1]:
import warnings
warnings.filterwarnings(action='ignore')
import os
import sys
sys.path.append('.')

import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm.auto import tqdm
from sklearn.preprocessing import StandardScaler
import pickle
from datetime import datetime
import logging
from sklearn.linear_model import LinearRegression

import torch
from torch import nn
from torch.utils.data import Dataset
from torch.optim.lr_scheduler import ReduceLROnPlateau
import torch.nn.functional as F

import FinanceDataReader as fdr

SEED=10
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
np.random.seed(SEED)

In [2]:
data_folder = '../data/'

In [3]:
NASDAQ = fdr.StockListing('NASDAQ')
sp500 = fdr.StockListing('S&P500')

100%|██████████████████████████████████████| 4567/4567 [00:07<00:00, 617.52it/s]


In [4]:
sp500

Unnamed: 0,Symbol,Name,Sector,Industry
0,MMM,3M,Industrials,Industrial Conglomerates
1,AOS,A. O. Smith,Industrials,Building Products
2,ABT,Abbott,Health Care,Health Care Equipment
3,ABBV,AbbVie,Health Care,Pharmaceuticals
4,ABMD,Abiomed,Health Care,Health Care Equipment
...,...,...,...,...
500,YUM,Yum! Brands,Consumer Discretionary,Restaurants
501,ZBRA,Zebra,Information Technology,Electronic Equipment & Instruments
502,ZBH,Zimmer Biomet,Health Care,Health Care Equipment
503,ZION,Zions Bancorp,Financials,Regional Banks


In [5]:
import requests
import pandas_market_calendars as mcal

def extract_historical_csvdata(symbol, data):
    api_key = '5fb4dbfbc7da14.06701815'
    url = f'https://eodhistoricaldata.com/api/intraday/{symbol}.US?api_token={api_key}&interval=1m'
    df = pd.read_csv(url).iloc[:, 2:]
    
    if df.empty == False:
        df['Datetime'] = df['Datetime']+"+00:00"
        df = df.set_index('Datetime')

        xkrx = mcal.get_calendar('NASDAQ')
        early = xkrx.schedule(start_date='2021-11-01', end_date='2022-02-24')

        df.index = pd.to_datetime(df.index)
        df = df.resample('1min').first().loc[mcal.date_range(early, frequency='1min')].interpolate(method='linear', limit_direction='both')
        df[symbol] = df['Close'].copy()
        
        data = pd.concat([data, df[[symbol]]], axis=1)
    
    return data

In [8]:
data = pd.DataFrame()

In [14]:
# no 127(CEG), 128+214(NI), 128+214+155(WTW)
for i in tqdm(range(128+214+155, len(sp500['Symbol']))): 
    print(sp500['Symbol'][i])
    code = sp500['Symbol'][i]
    data = extract_historical_csvdata(code, data)[:-1]

  0%|          | 0/8 [00:00<?, ?it/s]

WYNN
XEL
XYL
YUM
ZBRA
ZBH
ZION
ZTS


In [15]:
data

Unnamed: 0,MMM,AOS,ABT,ABBV,ABMD,ACN,ATVI,ADM,ADBE,ADP,...,WHR,WMB,WYNN,XEL,XYL,YUM,ZBRA,ZBH,ZION,ZTS
2021-11-01 13:31:00+00:00,178.43,73.65,129.260,115.360,335.000000,359.600,78.7500,64.320,651.440,224.41,...,211.7650,28.250,89.700,64.28,131.15,125.436,532.80,143.5900,63.77,217.220
2021-11-01 13:32:00+00:00,178.58,73.99,129.245,115.460,334.600000,359.805,79.0600,64.425,651.920,224.15,...,211.7575,28.270,89.550,64.28,131.15,125.540,532.80,143.5900,63.88,217.500
2021-11-01 13:33:00+00:00,178.52,74.04,129.310,115.530,334.795833,359.530,79.3620,64.380,652.670,223.71,...,211.7500,28.300,89.205,64.25,131.83,125.470,533.53,143.9000,64.08,217.340
2021-11-01 13:34:00+00:00,178.62,74.11,129.500,115.630,334.991667,359.000,79.4202,64.340,653.120,223.80,...,211.5600,28.285,89.440,64.28,131.82,125.700,534.16,144.2581,63.93,217.400
2021-11-01 13:35:00+00:00,178.52,74.26,129.310,115.240,335.187500,358.800,79.6600,64.225,651.480,223.15,...,211.9100,28.300,89.980,64.23,131.81,125.620,534.79,144.8400,63.94,216.735
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-02-24 20:55:00+00:00,143.77,67.79,118.850,145.100,306.345000,315.880,80.9600,75.940,463.600,200.34,...,198.0200,29.940,86.620,64.83,87.79,121.660,415.27,123.4650,67.03,191.560
2022-02-24 20:56:00+00:00,143.77,67.79,118.820,145.130,306.800000,315.925,80.9950,75.950,463.880,200.26,...,198.0000,29.945,86.690,64.84,87.79,121.650,415.23,123.5000,67.10,191.260
2022-02-24 20:57:00+00:00,143.47,67.66,118.555,145.045,306.560000,315.260,80.9900,75.830,463.585,199.87,...,197.5700,29.905,86.470,64.77,87.69,121.320,414.46,123.3000,66.91,190.880
2022-02-24 20:58:00+00:00,143.46,67.67,118.540,145.010,306.120000,315.390,80.9900,75.770,463.500,199.83,...,197.7300,29.905,86.520,64.83,87.68,121.340,414.60,123.3500,66.92,190.720


In [16]:
df = data.dropna()
df

Unnamed: 0,MMM,AOS,ABT,ABBV,ABMD,ACN,ATVI,ADM,ADBE,ADP,...,WHR,WMB,WYNN,XEL,XYL,YUM,ZBRA,ZBH,ZION,ZTS
2021-11-01 13:31:00+00:00,178.43,73.65,129.2600,115.360,335.000000,359.600,78.7500,64.320,651.440,224.41,...,211.7650,28.250,89.700,64.28,131.15,125.436,532.80,143.5900,63.77,217.220
2021-11-01 13:32:00+00:00,178.58,73.99,129.2450,115.460,334.600000,359.805,79.0600,64.425,651.920,224.15,...,211.7575,28.270,89.550,64.28,131.15,125.540,532.80,143.5900,63.88,217.500
2021-11-01 13:33:00+00:00,178.52,74.04,129.3100,115.530,334.795833,359.530,79.3620,64.380,652.670,223.71,...,211.7500,28.300,89.205,64.25,131.83,125.470,533.53,143.9000,64.08,217.340
2021-11-01 13:34:00+00:00,178.62,74.11,129.5000,115.630,334.991667,359.000,79.4202,64.340,653.120,223.80,...,211.5600,28.285,89.440,64.28,131.82,125.700,534.16,144.2581,63.93,217.400
2021-11-01 13:35:00+00:00,178.52,74.26,129.3100,115.240,335.187500,358.800,79.6600,64.225,651.480,223.15,...,211.9100,28.300,89.980,64.23,131.81,125.620,534.79,144.8400,63.94,216.735
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-02-24 20:54:00+00:00,143.81,67.69,118.7901,145.100,306.705000,316.110,80.9900,75.920,464.480,200.42,...,198.1000,29.940,86.720,64.83,87.93,121.680,415.56,123.4900,67.08,191.555
2022-02-24 20:55:00+00:00,143.77,67.79,118.8500,145.100,306.345000,315.880,80.9600,75.940,463.600,200.34,...,198.0200,29.940,86.620,64.83,87.79,121.660,415.27,123.4650,67.03,191.560
2022-02-24 20:56:00+00:00,143.77,67.79,118.8200,145.130,306.800000,315.925,80.9950,75.950,463.880,200.26,...,198.0000,29.945,86.690,64.84,87.79,121.650,415.23,123.5000,67.10,191.260
2022-02-24 20:57:00+00:00,143.47,67.66,118.5550,145.045,306.560000,315.260,80.9900,75.830,463.585,199.87,...,197.5700,29.905,86.470,64.77,87.69,121.320,414.46,123.3000,66.91,190.880


In [17]:
df.to_csv(data_folder+'sp500_intra.csv')