In [None]:
# Install yfinance library
!pip install yfinance

import numpy as np
import pandas as pd
import yfinance as yf
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense, LSTM
from keras.optimizers import Adam
from sklearn.metrics import f1_score

# Function to scrape historical stock data using yfinance
def yfinance_scrap(start, end, ticker):
    return yf.download(ticker, start=start, end=end, interval="1h")

# Function to create 'time' column based on index values
def time_count(x, index_dict_base, index_dict):
    if index_dict_base[x] - index_dict[x] == 0:
        index_dict[x] -= 1
        return '09:30'
    elif index_dict_base[x] - index_dict[x] == 1:
        index_dict[x] -= 1
        return '10:30'
    elif index_dict_base[x] - index_dict[x] == 2:
        index_dict[x] -= 1
        return '11:30'
    elif index_dict_base[x] - index_dict[x] == 3:
        index_dict[x] -= 1
        return '12:30'
    elif index_dict_base[x] - index_dict[x] == 4:
        index_dict[x] -= 1
        return '13:30'
    elif index_dict_base[x] - index_dict[x] == 5:
        index_dict[x] -= 1
        return '14:30'
    elif index_dict_base[x] - index_dict[x] == 6:
        index_dict[x] -= 1
        return '15:30'

# Function to create sequences for time series prediction
def create_sequences(data, time_steps):
    X, y = [], []
    for i in range(len(data) - time_steps):
        X.append(data.iloc[i:(i + time_steps)].values)
        y.append(data.iloc[i + time_steps].values)
    return np.array(X), np.array(y)

# Default values
ticker = 'AAPL'
PATH = '/content'

# Download data from Yahoo Finance
df_yfinance = yfinance_scrap(start='2022-01-01', end='2022-02-01', ticker=ticker)
df_yfinance = df_yfinance[df_yfinance.index != '2018-12-31']
index_dict_base = df_yfinance.index.astype(str).value_counts().to_dict()
index_dict = df_yfinance.index.astype(str).value_counts().to_dict()

# Apply time_count function to create the 'time' column
index_iter = pd.Series(df_yfinance.index.astype(str))
df_yfinance['time'] = index_iter.apply(lambda x: time_count(x, index_dict_base, index_dict)).tolist()

# Save the DataFrame to a CSV file
df_yfinance.to_csv(f'{ticker}.csv')

# Sentiment analysis data (dummy data, replace it with actual sentiment analysis results)
sentiment_data = pd.DataFrame({
    'Date': df_yfinance.index,
    'Sentiment_Score': np.random.rand(len(df_yfinance))
})

# Merge the two datasets
merged_data = pd.concat([pd.DataFrame(df_yfinance[['Open', 'High', 'Low', 'Close', 'Volume']].values, index=df_yfinance.index,
                                       columns=['Open', 'High', 'Low', 'Close', 'Volume']),
                        pd.DataFrame(sentiment_data['Sentiment_Score'].values, index=sentiment_data['Date'],
                                     columns=['Sentiment_Score'])], axis=1)

# Normalize the data
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(merged_data)

# Set time steps
time_steps = 10  # Adjust as needed

# Create sequences for training
X, y = create_sequences(pd.DataFrame(scaled_data, index=merged_data.index),
                        time_steps)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build the neural network model
model = Sequential()
model.add(LSTM(units=50, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dense(units=6, activation='linear'))  # Assuming your target variable has 6 features
# If your target variable has 6 features, adjust the mean_squared_error function
model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error', metrics=['mape'])

# Train the model
model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test), verbose=2)

# Make predictions
predictions = model.predict(X_test)

# Convert predictions to binary classes based on a threshold (adjust as needed)

threshold = 0.5
binary_predictions = (predictions > threshold).astype(float)
y_test_binary = (y_test > threshold).astype(float)


# Calculate F1-score
f1 = f1_score(y_test_binary.flatten(), binary_predictions.flatten())
print(f'F1-Score: {f1}')

# Print the shape of predictions
print("Shape of predictions:", predictions.shape)

# Reshape predictions
predictions_actual = scaler.inverse_transform(predictions.reshape(-1, 6))
y_test_actual = scaler.inverse_transform(y_test.reshape(-1, 6))


[*********************100%%**********************]  1 of 1 completed
Epoch 1/50
4/4 - 2s - loss: 0.2494 - mape: 384622.2188 - val_loss: 0.2020 - val_mape: 146704.7656 - 2s/epoch - 444ms/step
Epoch 2/50
4/4 - 0s - loss: 0.2070 - mape: 512260.5000 - val_loss: 0.1622 - val_mape: 312952.1250 - 57ms/epoch - 14ms/step
Epoch 3/50
4/4 - 0s - loss: 0.1639 - mape: 630809.7500 - val_loss: 0.1214 - val_mape: 522373.6562 - 66ms/epoch - 17ms/step
Epoch 4/50
4/4 - 0s - loss: 0.1200 - mape: 1034598.4375 - val_loss: 0.0808 - val_mape: 771178.0000 - 77ms/epoch - 19ms/step
Epoch 5/50
4/4 - 0s - loss: 0.0787 - mape: 1409376.8750 - val_loss: 0.0475 - val_mape: 1059411.7500 - 58ms/epoch - 15ms/step
Epoch 6/50
4/4 - 0s - loss: 0.0479 - mape: 2240994.7500 - val_loss: 0.0356 - val_mape: 1189683.1250 - 63ms/epoch - 16ms/step
Epoch 7/50
4/4 - 0s - loss: 0.0398 - mape: 2529331.7500 - val_loss: 0.0295 - val_mape: 1082051.5000 - 56ms/epoch - 14ms/step
Epoch 8/50
4/4 - 0s - loss: 0.0345 - mape: 2636963.7500 - val_lo