In [3]:
!pip install yfinance

import numpy as np
import pandas as pd
import yfinance as yf
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense, LSTM
from keras.optimizers import Adam
from sklearn.metrics import f1_score

def yfinance_scrap(start, end, ticker):
    return yf.download(ticker, start=start, end=end, interval="1h")

def time_count(x, index_dict_base, index_dict):
    times = ['09:30', '10:30', '11:30', '12:30', '13:30', '14:30', '15:30']
    return times[index_dict_base[x] - index_dict[x]]

def create_sequences(data, time_steps):
    X, y = [], []
    for i in range(len(data) - time_steps):
        X.append(data.iloc[i:(i + time_steps)].values)
        y.append(data.iloc[i + time_steps].values)
    return np.array(X), np.array(y)

ticker = 'AAPL'
PATH = '/content'

df_yfinance = yfinance_scrap(start='2022-01-01', end='2022-02-01', ticker=ticker)
df_yfinance = df_yfinance[df_yfinance.index != '2018-12-31']
index_dict_base = df_yfinance.index.astype(str).value_counts().to_dict()
index_dict = index_dict_base.copy()

index_iter = pd.Series(df_yfinance.index.astype(str))
df_yfinance['time'] = index_iter.apply(lambda x: time_count(x, index_dict_base, index_dict)).tolist()

df_yfinance.to_csv(f'{ticker}.csv')

sentiment_data = pd.DataFrame({
    'Date': df_yfinance.index,
    'Sentiment_Score': np.random.rand(len(df_yfinance))
})

merged_data = pd.concat([pd.DataFrame(df_yfinance[['Open', 'High', 'Low', 'Close', 'Volume']].values, index=df_yfinance.index,
                                       columns=['Open', 'High', 'Low', 'Close', 'Volume']),
                        pd.DataFrame(sentiment_data['Sentiment_Score'].values, index=sentiment_data['Date'],
                                     columns=['Sentiment_Score'])], axis=1)

scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(merged_data)

time_steps = 10

X, y = create_sequences(pd.DataFrame(scaled_data, index=merged_data.index),
                        time_steps)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = Sequential()
model.add(LSTM(units=50, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dense(units=6, activation='linear'))
model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error', metrics=['mape'])

model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test), verbose=2)

predictions = model.predict(X_test)

threshold = 0.5
binary_predictions = (predictions > threshold).astype(float)
y_test_binary = (y_test > threshold).astype(float)

f1 = f1_score(y_test_binary.flatten(), binary_predictions.flatten())
print(f'F1-Score: {f1}')

print("Shape of predictions:", predictions.shape)

predictions_actual = scaler.inverse_transform(predictions.reshape(-1, 6))
y_test_actual = scaler.inverse_transform(y_test.reshape(-1, 6))


[*********************100%%**********************]  1 of 1 completed
Epoch 1/50
4/4 - 2s - loss: 0.2254 - mape: 752071.7500 - val_loss: 0.1796 - val_mape: 929605.0625 - 2s/epoch - 483ms/step
Epoch 2/50
4/4 - 0s - loss: 0.1929 - mape: 930527.5625 - val_loss: 0.1478 - val_mape: 817691.0000 - 126ms/epoch - 31ms/step
Epoch 3/50
4/4 - 0s - loss: 0.1569 - mape: 1393758.7500 - val_loss: 0.1131 - val_mape: 712750.5625 - 89ms/epoch - 22ms/step
Epoch 4/50
4/4 - 0s - loss: 0.1168 - mape: 1834599.5000 - val_loss: 0.0809 - val_mape: 564287.6250 - 109ms/epoch - 27ms/step
Epoch 5/50
4/4 - 0s - loss: 0.0820 - mape: 2811155.0000 - val_loss: 0.0637 - val_mape: 263609.3438 - 108ms/epoch - 27ms/step
Epoch 6/50
4/4 - 0s - loss: 0.0623 - mape: 3498099.7500 - val_loss: 0.0513 - val_mape: 279618.7812 - 94ms/epoch - 24ms/step
Epoch 7/50
4/4 - 0s - loss: 0.0453 - mape: 3650670.2500 - val_loss: 0.0346 - val_mape: 819931.8750 - 100ms/epoch - 25ms/step
Epoch 8/50
4/4 - 0s - loss: 0.0318 - mape: 3443063.5000 - val_

In [7]:
import numpy as np
import pandas as pd
import yfinance as yf
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense, LSTM, Bidirectional
from keras.optimizers import Adam
from sklearn.metrics import f1_score

def yfinance_scrap(start, end, ticker):
    return yf.download(ticker, start=start, end=end, interval="1h")

def compute_rsi(data, window=14):
    delta = data.diff()
    up = delta.clip(lower=0)
    down = -1 * delta.clip(upper=0)
    gain = up.rolling(window).mean()
    loss = down.rolling(window).mean()
    RS = gain / loss
    return 100 - (100 / (1 + RS))

ticker = 'AAPL'
df_yfinance = yfinance_scrap(start='2022-01-01', end='2022-02-01', ticker=ticker)
df_yfinance['RSI'] = compute_rsi(df_yfinance['Close'])
df_yfinance = df_yfinance.dropna()

scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(df_yfinance[['Open', 'High', 'Low', 'Close', 'Volume', 'RSI']])

def create_sequences(data, time_steps):
    X, y = [], []
    for i in range(len(data) - time_steps):
        X.append(data[i:(i + time_steps), :])
        y.append(data[i + time_steps, :])
    return np.array(X), np.array(y)

time_steps = 10

X, y = create_sequences(scaled_data, time_steps)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = Sequential()
model.add(Bidirectional(LSTM(units=50, activation='relu'), input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dense(units=scaled_data.shape[1], activation='linear'))
model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error', metrics=['mape'])

model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test), verbose=2)

predictions = model.predict(X_test)

threshold = 0.5
binary_predictions = (predictions > threshold).astype(float)
y_test_binary = (y_test > threshold).astype(float)

f1 = f1_score(y_test_binary.flatten(), binary_predictions.flatten(), average='macro')
print(f'F1-Score: {f1}')

print("Shape of predictions:", predictions.shape)

predictions_actual = scaler.inverse_transform(predictions)
y_test_actual = scaler.inverse_transform(y_test)

np.savetxt(f"{ticker}_predictions.csv", predictions_actual, delimiter=",")
np.savetxt(f"{ticker}_actuals.csv", y_test_actual, delimiter=",")


[*********************100%%**********************]  1 of 1 completed
Epoch 1/50
3/3 - 4s - loss: 0.2804 - mape: 336561.7500 - val_loss: 0.2295 - val_mape: 84.9018 - 4s/epoch - 1s/step
Epoch 2/50
3/3 - 0s - loss: 0.2217 - mape: 722871.6250 - val_loss: 0.1766 - val_mape: 77.2478 - 102ms/epoch - 34ms/step
Epoch 3/50
3/3 - 0s - loss: 0.1677 - mape: 1105445.6250 - val_loss: 0.1297 - val_mape: 72.2297 - 99ms/epoch - 33ms/step
Epoch 4/50
3/3 - 0s - loss: 0.1200 - mape: 1723087.0000 - val_loss: 0.0887 - val_mape: 68.7054 - 98ms/epoch - 33ms/step
Epoch 5/50
3/3 - 0s - loss: 0.0811 - mape: 2597908.0000 - val_loss: 0.0572 - val_mape: 71.0543 - 108ms/epoch - 36ms/step
Epoch 6/50
3/3 - 0s - loss: 0.0504 - mape: 3160710.5000 - val_loss: 0.0426 - val_mape: 78.9707 - 102ms/epoch - 34ms/step
Epoch 7/50
3/3 - 0s - loss: 0.0396 - mape: 3832481.0000 - val_loss: 0.0439 - val_mape: 85.5680 - 120ms/epoch - 40ms/step
Epoch 8/50
3/3 - 0s - loss: 0.0410 - mape: 4226937.5000 - val_loss: 0.0438 - val_mape: 83.44



F1-Score: 0.9305019305019305
Shape of predictions: (24, 6)
