In [232]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import yfinance as yf
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC, NuSVC, LinearSVC
from sklearn.metrics import classification_report, precision_score, recall_score, confusion_matrix, accuracy_score
from sklearn.model_selection import train_test_split, TimeSeriesSplit, cross_val_score, cross_val_predict, KFold
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression, Lasso, SGDClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.feature_selection import SelectKBest, SelectFromModel
import ta
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.feature_selection import f_classif, chi2, SelectPercentile, SelectFpr, SelectFdr, SelectFwe, GenericUnivariateSelect, RFE
from sklearn.decomposition import PCA
from ta.momentum import RSIIndicator, StochasticOscillator, WilliamsRIndicator, UltimateOscillator, StochRSIIndicator
from ta.trend import CCIIndicator, ADXIndicator, MACD
from ta.volatility import BollingerBands
import tensorflow
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

### 特徵工程：添加相關技術指標

In [233]:
def indicators(df):
    # SMA
    df['SMA_5'] = ta.trend.sma_indicator(df['Adj Close'], window=5)
    df['SMA_20'] = ta.trend.sma_indicator(df['Adj Close'], window=20)
    df['SMA_60'] = ta.trend.sma_indicator(df['Adj Close'], window=60)

    # EMA
    df['EMA_5'] = ta.trend.ema_indicator(df['Adj Close'], window=5)
    df['EMA_20'] = ta.trend.ema_indicator(df['Adj Close'], window=20)
    df['EMA_60'] = ta.trend.ema_indicator(df['Adj Close'], window=60)

    # MACD
    df['MACD'] = ta.trend.macd(df['Adj Close'])
    df['MACD_Signal'] = ta.trend.macd_signal(df['Adj Close'])
    df['MACD_Diff'] = ta.trend.macd_diff(df['Adj Close'])

    # RSI
    df['RSI'] = ta.momentum.rsi(df['Adj Close'], window=14)

    # Stochastic Oscillator
    df['Stochastic_%K'] = ta.momentum.stoch(df['Adj Close'], df['Adj Close'], df['Adj Close'], window=14, smooth_window=3)
    df['Stochastic_%D'] = ta.momentum.stoch_signal(df['Adj Close'], df['Adj Close'], df['Adj Close'], window=14, smooth_window=3)

    # Bollinger Bands
    bollinger = ta.volatility.BollingerBands(close=df['Adj Close'], window=20, window_dev=2)
    df['Bollinger_High'] = bollinger.bollinger_hband()
    df['Bollinger_Low'] = bollinger.bollinger_lband()

    # ATR
    df['ATR'] = ta.volatility.average_true_range(high=df['Adj Close'], low=df['Adj Close'], close=df['Adj Close'], window=14)

    # StochRSI
    df['StochRSI'] = ta.momentum.stochrsi(df['Adj Close'], window=14, smooth1=3, smooth2=3)

    # CCI
    df['CCI'] = ta.trend.cci(df['Adj Close'], df['Adj Close'], df['Adj Close'], window=20)

    # ADX
    df['ADX'] = ta.trend.adx(df['Adj Close'], df['Adj Close'], df['Adj Close'], window=14)

    # AO
    df['AO'] = ta.momentum.awesome_oscillator(df['Adj Close'], df['Adj Close'])

    # Momentum
    df['Momentum'] = ta.momentum.roc(df['Adj Close'], window=10)  # 使用 Rate of Change 作為動量指標

    # Bull Bear Power
    df['Bull_Power'] = df['Adj Close'] - df['EMA_20']
    df['Bear_Power'] = df['Adj Close'] - df['EMA_20']

    # Target
    chg_pct = df['Adj Close'].pct_change()
    df['Target'] = pd.cut(chg_pct, bins=[-np.inf, chg_pct.quantile(0.5), np.inf], labels=[0, 1])


def indicators_signals(df):
    def cross_signal(line_1, line_2):
        signal = np.where((line_1.shift(1) <= line_2.shift(1)) & (line_1 > line_2), 1,
                  np.where((line_1.shift(1) >= line_2.shift(1)) & (line_1 < line_2), -1, 0))
        return signal

    def get_over_signal(line, upper_lim, lower_lim, extra):
        signal = np.where(line > upper_lim + extra, -2,
                  np.where(line > upper_lim, -1,
                   np.where(line < lower_lim - extra, 2,
                    np.where(line < lower_lim, 1, 0))))
        return signal

    df['Signal_SMA'] = cross_signal(df['Adj Close'], df['SMA_20'])
    df['Signal_EMA'] = cross_signal(df['Adj Close'], df['EMA_20'])
    df['Signal_MACD'] = cross_signal(df['MACD'], df['MACD_Signal'])
    df['Signal_Stochastic'] = cross_signal(df['Stochastic_%K'], df['Stochastic_%D'])

    df['Signal_Bollinger'] = get_over_signal(df['Adj Close'], df['Bollinger_High'], df['Bollinger_Low'], extra=0)
    df['Signal_RSI'] = get_over_signal(df['RSI'], 70, 30, extra=10)
    df['Signal_StochRSI'] = get_over_signal(df['StochRSI'], 0.8, 0.2, extra=0.1)
    df['Signal_CCI'] = get_over_signal(df['CCI'], 100, -100, extra=0)


def integrate_signals(signals):
    X = df[signals].iloc[:-1]
    y = df['Target'].iloc[1:]

    X_scaled = StandardScaler().fit_transform(X)

    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X_scaled, y)

    df['integrative_signal'] = 0
    for i in range(len(signals)):
        df['integrative_signal'] += df[signals[i]] * model.feature_importances_[i]


def diff_feat(df):
    numeric_columns = df.select_dtypes(include=[np.number]).columns
    for col in numeric_columns:
        df[f'diff_{col}'] = df[col].diff()

In [234]:
df = yf.download("AAPL", period='10y')

indicators(df)
indicators_signals(df)

signals = ['Signal_SMA',
           'Signal_EMA',
           'Signal_MACD',
           'Signal_Stochastic',
           'Signal_Bollinger',
           'Signal_RSI',
           'Signal_StochRSI',
           'Signal_CCI']

integrate_signals(signals)

df.dropna(inplace=True)
df.drop_duplicates(inplace=True)

[*********************100%%**********************]  1 of 1 completed


### 特徵工程 | 特徵篩選方法

In [235]:
def split_X_y(df, window):
    # scale X
    X_df_scaled = pd.DataFrame(StandardScaler().fit_transform(df.copy()), index=df.index, columns=df.columns)

    # split X
    X = []
    for i in range(len(X_df_scaled)-window):
        X.append(X_df_scaled.iloc[i:i+window])
        
    # split y
    y = []
    for i in range(len(df)-window):
        y.append(df['Target'].iloc[i+window])

    return X, y

In [236]:
def get_feature_scores(X, y):
    feature_names = X.columns

    selector = SelectKBest(k=10)
    selector.fit(X, y)

    features_score = {}
    for i in range(len(selector.scores_)):
        features_score[feature_names[i]] = selector.scores_[i]

    return features_score


def get_X_best_by_models(X, y, n_features, *models) -> pd.DataFrame:
    X_scaled = StandardScaler().fit_transform(X)
    selected_features = []

    for model in models:
        selector = SelectFromModel(model, max_features=n_features).fit(X_scaled, y)
        selected_features.append(selector.get_support())

    combined_support = [all(features) for features in zip(*selected_features)]
    X_best = X.loc[:, combined_support]
    
    return X_best


def get_X_best_by_chi2(X, y, n_features):
    X_scaled = pd.DataFrame()
    
    for i in range(len(X.iloc[0])):
        X_scaled[X.columns[i]] = (X.iloc[:,i] - min(X.iloc[:,i])) / (max(X.iloc[:,i]) - min(X.iloc[:,i]))
        
    X_best = SelectKBest(chi2, k=n_features).fit_transform(X_scaled, y)
    
    return X_best


def get_X_best_by_RFE(X, y, n_features, model):
    X_scaled = StandardScaler().fit_transform(X)
    X_best = RFE(estimator=model, n_features_to_select=n_features).fit_transform(X_scaled, y)

    return X_best

In [237]:
X, y = split_X_y(df, 30)

# split to train test
train_size = round(len(X) * 0.7)
val_size = round(len(X) * 0.2)

X_train, X_val, X_test = X[: train_size], X[train_size : train_size+val_size], X[train_size+val_size:]
y_train, y_val, y_test = y[: train_size], y[train_size : train_size+val_size], y[train_size+val_size:]


# turn to nparray
X_train, X_val, X_test, y_train, y_val, y_test = np.array(X_train), np.array(X_val), np.array(X_test), np.array(y_train), np.array(y_val), np.array(y_test)

### Deep model

In [260]:
# create model
model = Sequential()
model.add(LSTM(64, return_sequences=True, input_shape=X_train.shape[1:]))
model.add(LSTM(128, return_sequences=True))
model.add(LSTM(256, return_sequences=False))
model.add(Dense(64))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# 训练模型，添加验证数据和早停回调
model.fit(X_train, y_train, epochs=50, batch_size=16, validation_data=(X_val, y_val), callbacks=[early_stopping])

Epoch 1/50


  super().__init__(**kwargs)


[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 36ms/step - accuracy: 0.4661 - loss: 0.7082 - val_accuracy: 0.4722 - val_loss: 0.7013
Epoch 2/50
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 31ms/step - accuracy: 0.5149 - loss: 0.6946 - val_accuracy: 0.4804 - val_loss: 0.6994
Epoch 3/50
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 33ms/step - accuracy: 0.5026 - loss: 0.6952 - val_accuracy: 0.4742 - val_loss: 0.6986
Epoch 4/50
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 33ms/step - accuracy: 0.5197 - loss: 0.6936 - val_accuracy: 0.5010 - val_loss: 0.6952
Epoch 5/50
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 33ms/step - accuracy: 0.5239 - loss: 0.6937 - val_accuracy: 0.4701 - val_loss: 0.7024
Epoch 6/50
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 35ms/step - accuracy: 0.5141 - loss: 0.6933 - val_accuracy: 0.4990 - val_loss: 0.6967
Epoch 7/50
[1m107/107[0m [32m━

<keras.src.callbacks.history.History at 0x21c12632060>

In [261]:
y_pred = model.predict(X_test)
y_pred

[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 58ms/step


array([[0.48152503],
       [0.48084208],
       [0.48012623],
       [0.4791479 ],
       [0.4791236 ],
       [0.47984847],
       [0.48171622],
       [0.48194277],
       [0.4818938 ],
       [0.48095003],
       [0.4808054 ],
       [0.48173484],
       [0.48380145],
       [0.48750243],
       [0.49221325],
       [0.49655953],
       [0.4997634 ],
       [0.50183034],
       [0.5037124 ],
       [0.5056441 ],
       [0.50702965],
       [0.5080364 ],
       [0.5088564 ],
       [0.50988805],
       [0.5119778 ],
       [0.5148541 ],
       [0.5168309 ],
       [0.51718843],
       [0.5167243 ],
       [0.5173885 ],
       [0.5196705 ],
       [0.52301896],
       [0.52744704],
       [0.53176916],
       [0.53511536],
       [0.5378668 ],
       [0.53982425],
       [0.54163265],
       [0.54293215],
       [0.54364055],
       [0.54354286],
       [0.54319376],
       [0.54309404],
       [0.5422536 ],
       [0.5408406 ],
       [0.53912807],
       [0.537289  ],
       [0.536

In [264]:
y_pred = model.predict(X_test)
y_pred_copy = y_pred.copy()
for i in range(len(y_pred)):
    if y_pred_copy[i] > 0.5:
        y_pred_copy[i] = 1
    else:
        y_pred_copy[i] = 0

[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step


In [265]:
classification_report(y_test, y_pred_copy)

'              precision    recall  f1-score   support\n\n           0       0.53      0.32      0.40       119\n           1       0.53      0.73      0.61       124\n\n    accuracy                           0.53       243\n   macro avg       0.53      0.52      0.50       243\nweighted avg       0.53      0.53      0.51       243\n'

In [251]:
y_test

array([0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1,
       1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0,
       1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1,
       1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0,
       1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0,
       1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0,
       1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1,
       1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0,
       1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1,
       1], dtype=int64)

In [144]:
# scaler operation
X = 2
y = 3
X * y

# vector operation
X = np.array([1,2,3,4,5,6,7,8,9,10])
y = np.array([11,12,13,14,15,16,17,18,19,20])
s = 20
X * s
X + s # this's not define in math
X + y
X * y

# multidimensional vector operation
X = np.array([[10],[20],[30],[40],[50]])
M = np.array([[.73,.2]])
d = 3
M@X

ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 5 is different from 2)

In [154]:
e = np.random.binomial(1, 0.8, size=5)
e

array([77, 82, 80, 84, 83])

In [195]:
def forward_example():
    X = np.random.rand(10, 2)
    hidden = [X.copy()]
    
    # Hidden Layer_1
    w = np.random.rand(2, 32)
    b = np.random.rand(32) * 10
    X = X @ w + b
    X = np.maximum(X, 0)
    
    hidden.append(X)
    X.shape # (10, 32)
    
    # # Hidden Layer_2
    w1 = np.random.rand(32, 64)
    b1 = np.random.rand(64) * 10
    X = X @ w1 + b1
    X = np.maximum(X, 0)
    
    hidden.append(X)
    X.shape # (10, 64)
    
    # Output Layer
    w2 = np.random.rand(64, 1)
    b2 = np.random.rand(1)
    X = X @ w2 + b2
    X = np.maximum(X, 0) 
    
    hidden.append(X)
    X.shape # (10, 1)

    return hidden, X

In [196]:
forward_example()

([array([[0.27675664, 0.70824088],
         [0.51404935, 0.83016196],
         [0.37230938, 0.59982877],
         [0.19858599, 0.34836767],
         [0.78584395, 0.18868442],
         [0.88002403, 0.9641547 ],
         [0.32535482, 0.84536914],
         [0.83820274, 0.88114091],
         [0.67261908, 0.9450551 ],
         [0.71349911, 0.12421998]]),
  array([[ 4.595563  ,  2.63378906,  7.47357117,  2.01918273,  1.55762269,
           3.02074613,  3.25141835,  7.51415376,  1.70035681,  0.8716078 ,
           2.25839683,  2.44194186,  8.18477316,  3.71959427,  6.31905017,
          10.4119626 ,  1.61376359,  8.39134826,  9.22339511,  5.24892293,
           8.47299202,  7.87353338,  7.99771306,  5.8619031 ,  8.91367316,
           2.88039026,  3.715981  ,  4.66065655,  3.98018236,  0.51736907,
           6.32937018,  0.92160933],
         [ 4.84612207,  2.88123101,  7.53445049,  2.09064389,  1.62323365,
           3.1396767 ,  3.35150154,  7.80165122,  1.89598973,  1.04894376,
           

In [272]:
def forward_example(X, layer):
    hidden = [X]
    for i in range(len(layer)):
        X = X @ layer[i][0] + layer[i][1]
        
        if i != len(layer)-1:
            X = np.maximum(X, 0)
            
        hidden.append(X)
        
    return X, hidden
    
error = y_true - y_pred
loss = sum((error) ** 2) / len(y_ture)

def backward_example(error):
    # C -> a
    # a -> z
    # z -> w, b
    
    # calculate loss 
    for i in range(len(layer)-1, -1, -1):

        a = 2error
        
        if i != len(layer)-1:
            z = # 解掉RuLU，一個神秘的公式
            
        w = hidden[i-1]
        C '     

SyntaxError: invalid syntax (1928247164.py, line 21)

In [217]:
X = np.array([np.random.rand(10) * 30, np.random.rand(10) * 50]) # (2, 10)
y = np.array(np.random.binomial(1, 0.5, size=(2, 10)))

layer = [[np.random.rand(10, 32), np.random.rand(32) * 10], # (10, 32)
         [np.random.rand(32, 64), np.random.rand(64) * 10], # (10, 64)
         [np.random.rand(64, 1), np.random.rand(1) * 10]] # (10, 1)

output, hidden = forward_example(X, layer)

(array([[44308.35108051],
        [82265.17588531]]),
 [array([[18.18284576, 26.37324629, 19.07937764, 17.88511866, 16.72019913,
          10.35033647,  7.79210421,  1.47868067, 18.8875305 , 19.98212476],
         [ 7.77043737, 41.93989679, 29.39181769, 42.0361618 , 29.85694238,
          12.81898089, 23.90686023, 27.95970505, 45.45781512, 36.45025159]]),
  array([[ 96.01946386,  97.48063121,  75.27142496,  64.59735247,
           88.26255888,  55.13994325,  62.86412437,  69.53983531,
           86.39602884, 114.70237806,  60.80560147, 111.9011127 ,
          112.06619035,  79.81486044,  69.24201902,  93.13049363,
           71.44386341,  63.12327333,  81.96493729,  66.94880128,
           72.03436226,  68.45400241,  70.86088724,  91.99360619,
           73.36221404,  79.99019239,  78.63077765, 104.19972601,
           71.52201441,  74.69242149,  66.36041365,  81.82381449],
         [174.10449241, 187.38299134, 145.57652085, 124.20472107,
          144.75516651,  98.12323946, 104.67181

In [271]:
y = np.array(np.random.binomial(1, 0.5, size=(2, 10)))
y

array([[1, 1, 1, 0, 1, 1, 0, 0, 0, 1],
       [1, 0, 1, 1, 1, 0, 1, 0, 1, 1]])

In [228]:
    grad = grad @ self.__layers[i][0].T


5
4
4
3
3
2
2
1
1
0
0
