In [2]:
import yfinance as yf #good for real time analysis and very accurate
from fredapi import Fred
import pandas as pd
import numpy as np
import os
from dotenv import load_dotenv
from sklearn.preprocessing import StandardScaler,MinMaxScaler
import datetime as dt
import matplotlib.pyplot as plt

load_dotenv(".env")

True

## Data Preparation

### Data Download

In [3]:
symbol = "GOOG"
stock = yf.Ticker(symbol)
df = yf.download(symbol,period="1mo",interval="1h")
#with open("./info.txt","w") as f:
#    f.write("\n\n".join([news["content"]["summary"] for news in stock.news]))
if not isinstance(df.columns,pd.MultiIndex):
    #normal
    df.columns = pd.Index([col.split(" ")[-1].lower() for col in df.columns])
else:
    #forex
    df.columns = pd.Index([col[0].split(" ")[-1].lower() for col in df.columns])
dataDF = df

YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  1 of 1 completed


In [4]:
fred_key = os.environ.get("FRED_API")
fred = Fred(api_key=fred_key)

unemployment = fred.get_series("UNRATE")

print(unemployment.tail())  # Show latest values

2024-10-01    4.1
2024-11-01    4.2
2024-12-01    4.1
2025-01-01    4.0
2025-02-01    4.1
dtype: float64


In [5]:
print(df.head())
print(df.columns)

                                close        high         low        open  \
Datetime                                                                    
2025-02-28 18:30:00+00:00  169.320007  170.289993  168.960007  169.919998   
2025-02-28 19:30:00+00:00  171.014999  171.050003  169.169998  169.320007   
2025-02-28 20:30:00+00:00  172.160004  172.500000  170.369995  171.029999   
2025-03-03 14:30:00+00:00  172.479996  175.000000  170.779999  173.710007   
2025-03-03 15:30:00+00:00  171.979996  172.789993  171.770004  172.479996   

                            volume  
Datetime                            
2025-02-28 18:30:00+00:00        0  
2025-02-28 19:30:00+00:00  1891645  
2025-02-28 20:30:00+00:00  3330257  
2025-03-03 14:30:00+00:00  5575794  
2025-03-03 15:30:00+00:00  1776438  
Index(['close', 'high', 'low', 'open', 'volume'], dtype='object')


In [6]:
def plot_lines(columns):
    fig, ax = plt.subplots(figsize=(20,6))
    minV=float("inf")
    maxV=float("-inf")
    cmap = plt.get_cmap("jet")
    # Create a Line2D object
    for i,col in enumerate(columns):
        color = cmap(i/len(columns))
        line = plt.Line2D(ydata=dataDF[col].to_numpy(), xdata=np.arange(0, dataDF.shape[0]),color=color,label=col)
        minV = min(minV,dataDF[col].min())
        maxV = max(maxV,dataDF[col].max())
        # Add the line to the axis
        ax.add_line(line)

    # Adjust limits
    ax.legend()
    ax.set_ylim(minV, maxV)
    ax.set_xlim(0, dataDF.shape[0])
    # Show plot
    plt.show()

### Moving Average calculation

In [7]:
def calc_moving_average(data,column="close",prevCandles=50):
    data[f'SMA_{prevCandles}'] = data[column].rolling(prevCandles,min_periods=1).mean()
    return data[f'SMA_{prevCandles}']
calc_moving_average(dataDF,prevCandles=50)
calc_moving_average(dataDF,prevCandles=20)

Datetime
2025-02-28 18:30:00+00:00    169.320007
2025-02-28 19:30:00+00:00    170.167503
2025-02-28 20:30:00+00:00    170.831670
2025-03-03 14:30:00+00:00    171.243752
2025-03-03 15:30:00+00:00    171.391000
                                ...    
2025-03-31 13:30:00+00:00    162.659301
2025-03-31 14:30:00+00:00    161.878301
2025-03-31 15:30:00+00:00    161.221801
2025-03-31 16:30:00+00:00    160.558801
2025-03-31 17:30:00+00:00    159.985301
Name: SMA_20, Length: 148, dtype: float64

In [8]:
def calc_exp_average(data,column="close",smoothing=10):
    data[f'EMA_{smoothing}'] = data[column].ewm(span=smoothing,adjust=True).mean()
    return data[f'EMA_{smoothing}'] 
calc_exp_average(dataDF,smoothing=12)
calc_exp_average(dataDF,smoothing=26)

Datetime
2025-02-28 18:30:00+00:00    169.320007
2025-02-28 19:30:00+00:00    170.200099
2025-02-28 20:30:00+00:00    170.904274
2025-03-03 14:30:00+00:00    171.344777
2025-03-03 15:30:00+00:00    171.492087
                                ...    
2025-03-31 13:30:00+00:00    162.400744
2025-03-31 14:30:00+00:00    161.802163
2025-03-31 15:30:00+00:00    161.345700
2025-03-31 16:30:00+00:00    160.906014
2025-03-31 17:30:00+00:00    160.522601
Name: EMA_26, Length: 148, dtype: float64

In [9]:
#plot_lines(["SMA_50","SMA_20","close"])
#plot_lines(["EMA_12","EMA_26","close"])

### rsi calculation

In [10]:
def calculate_rsi(data, period=14):
    delta = data['close'].diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=period,min_periods=1).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=period,min_periods=1).mean()
    gain = gain.replace(0,1e-5)
    loss = loss.replace(0, 1e-5)
    rs = gain / loss
    rsi = 100 - (100 / (1 + rs))
    data[f'rsi_{period}'] = rsi
    return rsi
calculate_rsi(dataDF)
print(dataDF["rsi_14"])

Datetime
2025-02-28 18:30:00+00:00    50.000000
2025-02-28 19:30:00+00:00    99.998820
2025-02-28 20:30:00+00:00    99.998944
2025-03-03 14:30:00+00:00    99.998734
2025-03-03 15:30:00+00:00    86.338755
                               ...    
2025-03-31 13:30:00+00:00     4.645704
2025-03-31 14:30:00+00:00    12.177161
2025-03-31 15:30:00+00:00    19.572460
2025-03-31 16:30:00+00:00    19.973525
2025-03-31 17:30:00+00:00    22.053570
Name: rsi_14, Length: 148, dtype: float64


### MACD calculation

In [11]:
def calculate_macd(data, short_window=12, long_window=26, signal_window=9):
    if f'EMA_{short_window}' not in data.columns:
        data[f'EMA_{short_window}'] = calc_exp_average(data,short_window)
    if f'EMA_{long_window}' not in data.columns:
        data[f'EMA_{long_window}'] = calc_exp_average(data,long_window)
    data['MACD'] = data[f'EMA_{short_window}'] - data[f'EMA_{long_window}']
    data['Signal'] = calc_exp_average(data,"MACD",signal_window)
    data['Histogram'] = data['MACD'] - data['Signal']
    return data[['MACD', 'Signal', 'Histogram']]
calculate_macd(dataDF)

Unnamed: 0_level_0,MACD,Signal,Histogram
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2025-02-28 18:30:00+00:00,0.000000,0.000000,0.000000
2025-02-28 19:30:00+00:00,0.038029,0.021127,0.016902
2025-02-28 20:30:00+00:00,0.083963,0.046879,0.037083
2025-03-03 14:30:00+00:00,0.114350,0.069735,0.044615
2025-03-03 15:30:00+00:00,0.108558,0.081284,0.027274
...,...,...,...
2025-03-31 13:30:00+00:00,-3.526979,-2.497183,-1.029797
2025-03-31 14:30:00+00:00,-3.628976,-2.723541,-0.905434
2025-03-31 15:30:00+00:00,-3.562234,-2.891280,-0.670954
2025-03-31 16:30:00+00:00,-3.487696,-3.010563,-0.477133


In [12]:
#plot_lines(["MACD","Signal","Histogram"])

In [13]:
#plot_lines(["rsi_14"])

### volatility calculation

In [14]:
def calculate_volatility(data,window=14):
    data[f"vol_{window}"] = data["close"].rolling(window=window,min_periods=1).std()
    data[f"vol_{window}"]=data[f"vol_{window}"].fillna(1e-6)
    return data[f"vol_{window}"]
calculate_volatility(dataDF)

Datetime
2025-02-28 18:30:00+00:00    0.000001
2025-02-28 19:30:00+00:00    1.198540
2025-02-28 20:30:00+00:00    1.428846
2025-03-03 14:30:00+00:00    1.428395
2025-03-03 15:30:00+00:00    1.280096
                               ...   
2025-03-31 13:30:00+00:00    4.424966
2025-03-31 14:30:00+00:00    4.403503
2025-03-31 15:30:00+00:00    4.160359
2025-03-31 16:30:00+00:00    3.854376
2025-03-31 17:30:00+00:00    3.423272
Name: vol_14, Length: 148, dtype: float64

In [15]:
#plot_lines(["vol_14"])

### bollinger band calculation

In [16]:
def calculate_bollinger_bands(data,window=14):
    if f"SMA_{window}" not in data.columns:
        data[f"SMA_{window}"] = calc_moving_average(data,prevCandles=window)
    data["upr_band"] =data[f"SMA_{window}"] -2*data[f"vol_{window}"] 
    data["lwr_band"] =data[f"SMA_{window}"] +2*data[f"vol_{window}"] 
    data['band_width'] = (data['upr_band'] - data['lwr_band']) / (data[f'SMA_{window}']+1e-6)
    data['%B'] = (data['close'] - data['lwr_band']) / (data['upr_band'] - data['lwr_band']+1e-6)
    return data[["upr_band","lwr_band","band_width","%B"]]
calculate_bollinger_bands(dataDF)

Unnamed: 0_level_0,upr_band,lwr_band,band_width,%B
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2025-02-28 18:30:00+00:00,169.320005,169.320009,-2.362391e-08,0.666667
2025-02-28 19:30:00+00:00,167.770423,172.564584,-2.817319e-02,0.323223
2025-02-28 20:30:00+00:00,167.973977,173.689363,-3.345624e-02,0.267586
2025-03-03 14:30:00+00:00,168.386961,174.100542,-3.336519e-02,0.283631
2025-03-03 15:30:00+00:00,168.830809,173.951192,-2.987545e-02,0.384971
...,...,...,...,...
2025-03-31 13:30:00+00:00,151.512783,169.212648,-1.103739e-01,0.907502
2025-03-31 14:30:00+00:00,150.735710,168.349721,-1.104031e-01,0.796509
2025-03-31 15:30:00+00:00,150.518426,167.159862,-1.047691e-01,0.692240
2025-03-31 16:30:00+00:00,150.450035,165.867540,-9.748118e-02,0.678290


In [17]:
#plot_lines(["lwr_band","upr_band"])

In [18]:
##plot_lines(["band_width","%B"])

### VWAP calculation

In [19]:
def calculate_VWAP(data):
    data['VWAP'] = (data['close'] * data['volume']).cumsum() / (data['volume'].cumsum()+1e-6)
    return data["VWAP"]
calculate_VWAP(dataDF)

Datetime
2025-02-28 18:30:00+00:00      0.000000
2025-02-28 19:30:00+00:00    171.014999
2025-02-28 20:30:00+00:00    171.745223
2025-03-03 14:30:00+00:00    172.124651
2025-03-03 15:30:00+00:00    172.104214
                                ...    
2025-03-31 13:30:00+00:00    167.020852
2025-03-31 14:30:00+00:00    166.911927
2025-03-31 15:30:00+00:00    166.817644
2025-03-31 16:30:00+00:00    166.757718
2025-03-31 17:30:00+00:00    166.700496
Name: VWAP, Length: 148, dtype: float64

In [20]:
#plot_lines(["VWAP","close"])

In [21]:
def calculate_VPR(data):
    data['VPR'] = data['volume'] / (data['high'] - data['low']+1e-6)
    return data["VPR"]
calculate_VPR(dataDF)
#plot_lines(["VPR"])

Datetime
2025-02-28 18:30:00+00:00    0.000000e+00
2025-02-28 19:30:00+00:00    1.006191e+06
2025-02-28 20:30:00+00:00    1.563497e+06
2025-03-03 14:30:00+00:00    1.321278e+06
2025-03-03 15:30:00+00:00    1.741623e+06
                                 ...     
2025-03-31 13:30:00+00:00    2.271987e+06
2025-03-31 14:30:00+00:00    1.986459e+06
2025-03-31 15:30:00+00:00    1.755080e+06
2025-03-31 16:30:00+00:00    1.483137e+06
2025-03-31 17:30:00+00:00    1.731031e+06
Name: VPR, Length: 148, dtype: float64

In [22]:
print(dataDF.columns)

Index(['close', 'high', 'low', 'open', 'volume', 'SMA_50', 'SMA_20', 'EMA_12',
       'EMA_26', 'rsi_14', 'MACD', 'EMA_9', 'Signal', 'Histogram', 'vol_14',
       'SMA_14', 'upr_band', 'lwr_band', 'band_width', '%B', 'VWAP', 'VPR'],
      dtype='object')


In [23]:
lag_features = ["close","high","low","open","volume"]
for feat in lag_features:
    dataDF[feat+"_lag"] = dataDF[feat].shift(1)

In [24]:
dataDF["prediction"] = dataDF["close"].shift(-1)
dataDF.dropna(inplace=True) #we don't need to do that but the loss of data is neglegible (don't do it for transformer mnodel becuase we don't even use those features)
dataDF["symbol"] =2

In [25]:
columns = list(dataDF.columns)
columns.remove("prediction")
X = np.array(dataDF[columns])
Y = np.array(dataDF["prediction"]).reshape(-1,1)

In [26]:
scaler = StandardScaler()
X = scaler.fit_transform(X)
mmscaler = MinMaxScaler((-1,1))
Y =mmscaler.fit_transform(Y)

In [27]:
np.save("data1.npy",X)
np.save("labels1.npy",Y)

In [28]:
listing = pd.read_csv("listing_status.csv")
symbols = list(listing["symbol"].values)

In [29]:
lag_features = ["close","high","low","open","volume"]
data_all= None
labels_all = None
interval = "5m"
for symbol in symbols:
    try:
        stock = yf.Ticker(symbol) 
        df = stock.history(period="max")
    except:
        continue
    if df.empty or df.shape[0]<100:
        continue
    #with open("./info.txt","w") as f:
    #    f.write("\n\n".join([news["content"]["summary"] for news in stock.news]))
    if not isinstance(df.columns,pd.MultiIndex):
        #normal
        df.columns = pd.Index([col.split(" ")[-1].lower() for col in df.columns])
    else:
        #forex
        df.columns = pd.Index([col[0].split(" ")[-1].lower() for col in df.columns])
    dataDF = df
    columns=lag_features
    calc_moving_average(dataDF,prevCandles=50)
    calc_moving_average(dataDF,prevCandles=20)

    calc_exp_average(dataDF,smoothing=12)
    calc_exp_average(dataDF,smoothing=26)

    calculate_rsi(dataDF)

    calculate_macd(dataDF)

    calculate_volatility(dataDF)

    calculate_bollinger_bands(dataDF)

    calculate_VWAP(dataDF)

    calculate_VPR(dataDF)
    
    pred_features = ["open","high","low","close","volume"]
    for feat in lag_features:
        dataDF[feat+"_lag"] = dataDF[feat].shift(1)
    for feat in pred_features:
        dataDF[feat+"_pred"] = dataDF[feat].shift(-1)
    #dataDF["prediction"] = dataDF["close"].shift(-1)
    dataDF["symbol"] = symbols.index(symbol)
    #dataDF["timestamp"] = np.arange(0,dataDF.shape[0])
    dataDF.dropna(inplace=True) #we don't need to do that for XGB but the loss of data is neglegible
    if dataDF.empty or dataDF.shape[0]<100:
        continue
    columns = list(dataDF.columns)
    
    for feat in pred_features:
        columns.remove(feat+"_pred")
    #columns.remove("prediction")
    if "gains" in columns:
        columns.remove("gains")
    X = np.array(dataDF[columns])
    Y = np.array(dataDF[[feat+"_pred" for feat in pred_features]])
    scaler = StandardScaler()
    X[:,:-1] = scaler.fit_transform(X[:,:-1])
    #Y =scaler.fit_transform(Y)
    if data_all is None:
        data_all=X
        labels_all=Y
        continue
    data_all = np.concat([data_all,X])
    labels_all = np.concat([labels_all,Y])
    print(symbol)
np.save(f"datasc.npy",data_all)
np.save(f"labelssc.npy",labels_all)
data_all

AA
AAA
AAAU
AACG
AACT


404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/AACT-U?modules=financialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&corsDomain=finance.yahoo.com&formatted=false&symbol=AACT-U&crumb=.zwuFTwEhtt
$AACT-WS: possibly delisted; no timezone found


AADR
AAL
AAM


$AAM-U: possibly delisted; no timezone found
$AAM-WS: possibly delisted; no timezone found


AAME
AAMI
AAOI
AAON
AAP
AAPB
AAPD


$AAPGV: possibly delisted; no price data found  (1d 1926-04-25 -> 2025-03-31) (Yahoo error = "No data found, symbol may be delisted")


AAPL
AAPR
AAPU
AAPX
AAPY
AAT
AAVM
AAXJ
AB
ABAT
ABBV
ABCB
ABCL
ABCS
ABEO
ABEQ
ABEV
ABFL
ABG
ABHY
ABL
ABLD
ABLG
ABLLL


ABLLW: Period 'max' is invalid, must be of the format 1d, 5d, etc.
ABLVW: Period 'max' is invalid, must be of the format 1d, 5d, etc.


ABLV
ABM
ABNB
ABNY
ABOS


ABPWW: Period 'max' is invalid, must be of the format 1d, 5d, etc.


ABOT
ABR


$ABR-P-D: possibly delisted; no timezone found
$ABR-P-E: possibly delisted; no timezone found
$ABR-P-F: possibly delisted; no timezone found


ABSI


$ABST: possibly delisted; no timezone found


ABT
ABTS
ABUS
ABVC
ABVE


ABVEW: Period 'max' is invalid, must be of the format 1d, 5d, etc.


ABVX
AC
ACA
ACAD
ACB
ACCD
ACCO
ACCS
ACDC
ACEL
ACES
ACET
ACGL
ACGLN
ACGLO
ACGR
ACHC
ACHL
ACHR


$ACHR-WS: possibly delisted; no timezone found


ACHV
ACI
ACIC
ACIO
ACIU
ACIW
ACLC
ACLS
ACLX
ACM
ACMR
ACN
ACNB
ACNT


ACONW: Period 'max' is invalid, must be of the format 1d, 5d, etc.


ACON
ACP


$ACP-P-A: possibly delisted; no timezone found


ACR


$ACR-P-C: possibly delisted; no timezone found
$ACR-P-D: possibly delisted; no timezone found


ACRE
ACRS
ACRV
ACSI


$ACST: possibly delisted; no timezone found


ACT
ACTG
ACTU
ACTV
ACU
ACV
ACVA
ACVF
ACWI
ACWV
ACWX
ACXP
ADAG


$ADAL: possibly delisted; no timezone found
$ADALU: possibly delisted; no timezone found
$ADALW: possibly delisted; no timezone found


ADAP
ADBE
ADC


$ADC-P-A: possibly delisted; no timezone found


ADCT
ADD
ADEA
ADFI
ADGM
ADI
ADIL
ADIV
ADM
ADMA
ADME
ADN


ADNWW: Period 'max' is invalid, must be of the format 1d, 5d, etc.


ADNT


$ADOCR: possibly delisted; no timezone found


ADP
ADPT
ADPV
ADSE


ADSEW: Period 'max' is invalid, must be of the format 1d, 5d, etc.


ADSK
ADT
ADTN
ADTX
ADUS
ADV
ADVE
ADVM
ADX
ADXN
AEAE


AEAEW: Period 'max' is invalid, must be of the format 1d, 5d, etc.


AEAEU
AEE
AEF
AEFC
AEG


$AEHA: possibly delisted; no timezone found


AEHL
AEHR
AEI
AEIS
AEM
AEMD
AENT


AENTW: Period 'max' is invalid, must be of the format 1d, 5d, etc.


AEO
AEON
AEP


$AEPPZ: possibly delisted; no timezone found


AER
AERT


AERTW: Period 'max' is invalid, must be of the format 1d, 5d, etc.


AES
AESI
AESR
AETH


AEVAW: Period 'max' is invalid, must be of the format 1d, 5d, etc.


AEVA
AEYE
AFB
AFBI
AFCG
AFG
AFGB
AFGC
AFGD
AFGE
AFIF


AFJKR: Period 'max' is invalid, must be of the format 1d, 5d, etc.


AFJK
AFJKU
AFK
AFL
AFLG
AFMC
AFMD
AFRI


AFRIW: Period 'max' is invalid, must be of the format 1d, 5d, etc.


AFRM
AFSM


$AFT: possibly delisted; no timezone found


AFYA
AG
AGAE
AGCO
AGD
AGEN


$AGFS: possibly delisted; no timezone found


AGFY
AGG
AGGH
AGGS
AGGY
AGI
AGIH
AGIO
AGIX
AGL
AGM
AGM-A


$AGM-P-D: possibly delisted; no timezone found
$AGM-P-E: possibly delisted; no timezone found
$AGM-P-F: possibly delisted; no timezone found
$AGM-P-G: possibly delisted; no timezone found


AGMH
AGMI
AGNC
AGNCL
AGNCM
AGNCN
AGNCO
AGNCP
AGNG
AGO
AGOV
AGOX
AGQ
AGRH
AGRI
AGRO
AGS


$AGTC: possibly delisted; no timezone found


AGX
AGYS
AGZ
AGZD
AHCO
AHG
AHH


$AHH-P-A: possibly delisted; no timezone found


AHI


$AHL-P-D: possibly delisted; no timezone found
$AHL-P-E: possibly delisted; no timezone found


AHLT
AHR


$AHRNU: possibly delisted; no timezone found


AHT


$AHT-P-D: possibly delisted; no timezone found
$AHT-P-F: possibly delisted; no timezone found
$AHT-P-G: possibly delisted; no timezone found
$AHT-P-H: possibly delisted; no timezone found
$AHT-P-I: possibly delisted; no timezone found


AHYB
AI
AIA
AIBD
AIBU
AIEQ
AIEV


$AIF: possibly delisted; no timezone found


AIFD


AIFER: Period 'max' is invalid, must be of the format 1d, 5d, etc.


AIFF
AIFU
AIG
AIHS
AILE
AIM
AIMAU


AIMAW: Period 'max' is invalid, must be of the format 1d, 5d, etc.


AIMBU
AIMD


AIMDW: Period 'max' is invalid, must be of the format 1d, 5d, etc.


AIN
AIO
AIOT
AIP
AIPI
AIQ
AIR
AIRE
AIRG
AIRI
AIRJ


AIRJW: Period 'max' is invalid, must be of the format 1d, 5d, etc.


AIRL
AIRR
AIRS
AIRT
AIRTP
AISP


AISPW: Period 'max' is invalid, must be of the format 1d, 5d, etc.


AIT
AITR


AITRR: Period 'max' is invalid, must be of the format 1d, 5d, etc.


AITRU
AIV
AIVC
AIVI
AIVL
AIXI
AIZ
AIZN
AJAN
AJG
AJUL


$AJXA: possibly delisted; no timezone found


AKA
AKAM
AKAN
AKBA
AKO-A
AKO-B
AKR
AKRO
AKTX


$AKUS: possibly delisted; no timezone found


AKYA
AL
ALAB
ALAI
ALAR
ALB
ALBT
ALC
ALCO
ALCY


ALCYW: Period 'max' is invalid, must be of the format 1d, 5d, etc.


ALCYU


ALDFW: Period 'max' is invalid, must be of the format 1d, 5d, etc.


ALDFU
ALDX
ALE
ALEC
ALEX
ALF


ALFUW: Period 'max' is invalid, must be of the format 1d, 5d, etc.


ALFUU
ALG
ALGM
ALGN
ALGS
ALGT
ALHC
ALIT
ALK
ALKS


$ALKSV: possibly delisted; no timezone found


ALKT
ALL


$ALL-P-B: possibly delisted; no timezone found
$ALL-P-H: possibly delisted; no timezone found
$ALL-P-I: possibly delisted; no timezone found
$ALL-P-J: possibly delisted; no timezone found


ALLE


$ALLG-WS: possibly delisted; no timezone found


ALLK
ALLO
ALLR
ALLT
ALLY
ALMS
ALNT
ALNY


$ALOR: possibly delisted; no timezone found
$ALORU: possibly delisted; no timezone found
$ALORW: possibly delisted; no timezone found
ALPS: Period 'max' is invalid, must be of the format 1d, 5d, etc.


ALOT
ALRM
ALRS
ALSN
ALT
ALTG


$ALTG-P-A: possibly delisted; no timezone found


ALTI


$ALTIW: possibly delisted; no timezone found


ALTL
ALTO
ALTR
ALTS
ALTY
ALUR


$ALUR-WS: possibly delisted; no timezone found


ALV


ALVOW: Period 'max' is invalid, must be of the format 1d, 5d, etc.


ALVO
ALX
ALXO
ALYAF
ALZN
AM
AMAL


$AMAO: possibly delisted; no timezone found
$AMAOW: possibly delisted; no timezone found


AMAT
AMAX
AMBA
AMBC
AMBI


$AMBI-WS: possibly delisted; no timezone found


AMBO
AMBP


$AMBP-WS: possibly delisted; no timezone found


AMBR
AMC
AMCR
AMCX
AMD
AMDL
AMDS
AMDY
AME
AMED


$AMEH: possibly delisted; no timezone found


AMG
AMGN
AMH


$AMH-P-G: possibly delisted; no timezone found
$AMH-P-H: possibly delisted; no timezone found


AMID
AMIX
AMJB
AMKR
AMLP
AMLX
AMN
AMOD


AMODW: Period 'max' is invalid, must be of the format 1d, 5d, etc.


AMOM
AMP
AMPG
AMPGW
AMPH
AMPL
AMPS
AMPX


$AMPX-WS: possibly delisted; no timezone found


AMPY
AMR
AMRC
AMRK
AMRN


$AMRS: possibly delisted; no timezone found


AMRX
AMS
AMSC
AMSF
AMST
AMT
AMTB
AMTD
AMTM


$AMTM-W: possibly delisted; no timezone found


AMTX
AMUB


$AMV: possibly delisted; no timezone found


AMWD
AMWL
AMX
AMZA
AMZD
AMZE
AMZN
AMZP
AMZU
AMZY
AMZZ
AN
ANAB
ANDE
ANEB
ANET
ANEW
ANF


$ANG-P-B: possibly delisted; no timezone found
ANGHW: Period 'max' is invalid, must be of the format 1d, 5d, etc.


ANGH
ANGI
ANGL
ANGO
ANIK
ANIP
ANIX
ANL


ANNAW: Period 'max' is invalid, must be of the format 1d, 5d, etc.


ANNA
ANNX
ANRO
ANSC
ANSCU


ANSCW: Period 'max' is invalid, must be of the format 1d, 5d, etc.


ANSS
ANTE
ANTX
ANVS
ANY
AOA
AOCT
AOD
AOHY
AOK
AOM
AOMN
AOMR
AON
AONC


AONCW: Period 'max' is invalid, must be of the format 1d, 5d, etc.


AOR
AORT
AOS
AOSL
AOTG
AOUT
AP


$AP-WS: possibly delisted; no timezone found


APA
APAM
APCB


APCXW: Period 'max' is invalid, must be of the format 1d, 5d, etc.


APCX
APD
APDN
APEI
APG


$APGB: possibly delisted; no timezone found
$APGB-U: possibly delisted; no timezone found
$APGB-WS: possibly delisted; no timezone found


APGE
APH
API
APIE
APLD
APLE
APLM
APLMW
APLS
APLT
APLY


KeyboardInterrupt: 

In [30]:
np.save(f"datasc.npy",data_all)
np.save(f"labelssc.npy",labels_all)
data_all

array([[-5.64806807e-01, -5.72514884e-01, -6.00927468e-01, ...,
        -5.28420457e-01,  2.48398658e+01,  0.00000000e+00],
       [-5.87729415e-01, -5.58547182e-01, -5.97362606e-01, ...,
        -5.64573706e-01,  5.00187424e+00,  0.00000000e+00],
       [-5.70978264e-01, -5.63784979e-01, -5.98253714e-01, ...,
        -5.87500533e-01,  1.37230288e+00,  0.00000000e+00],
       ...,
       [-6.57370208e-01, -6.61984930e-01, -6.52335390e-01, ...,
        -6.57921061e-01, -4.42968520e-02,  5.84000000e+02],
       [-6.57645722e-01, -6.61984930e-01, -6.52147840e-01, ...,
        -6.58349282e-01, -6.03623672e-02,  5.84000000e+02],
       [-6.57737559e-01, -6.61984930e-01, -6.52351019e-01, ...,
        -6.58624568e-01, -5.94510333e-02,  5.84000000e+02]],
      shape=(1623253, 30))