In [6]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import LSTM, Conv1D, Dense, Dropout, Input, Concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from transformers import TFBertModel
from transformers import BertTokenizer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from collections import deque
import random

# Load and preprocess data
from transformers import BertTokenizer

def load_and_preprocess_data(max_length=128):
    # Load CSV files
    news_df = pd.read_csv('RedditNews.csv')
    price_df = pd.read_csv('upload_DJIA_table.csv')
    
    # Merge dataframes on date
    merged_df = pd.merge(news_df, price_df, on='Date')
    
    # Prepare price data
    price_data = merged_df[['Open', 'High', 'Low', 'Close', 'Volume', 'Adj Close']].values
    scaler = MinMaxScaler()
    scaled_prices = scaler.fit_transform(price_data)
    
    # Prepare news data
    news_data = merged_df['News'].values
    
    # Tokenize news data
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    tokenized_news = tokenizer(list(news_data), padding='max_length', truncation=True, max_length=max_length, return_tensors='tf')
    
    # Convert to NumPy arrays
    input_ids = tokenized_news['input_ids'].numpy()
    attention_mask = tokenized_news['attention_mask'].numpy()
    
    return scaled_prices, input_ids, attention_mask, scaler

# Define model architecture
def create_model(seq_length, num_features):
    input_ids = tf.keras.layers.Input(shape=(128,), dtype=tf.int32, name="input_ids")
    attention_mask = tf.keras.layers.Input(shape=(128,), dtype=tf.int32, name="attention_mask")
    price_input = tf.keras.layers.Input(shape=(seq_length, num_features), name='price_input')

    bert_model = TFBertModel.from_pretrained('bert-base-uncased')
    bert_layer = BertLayer(bert_model)
    bert_output = bert_layer({'input_ids': input_ids, 'attention_mask': attention_mask})
    bert_output = tf.keras.layers.GlobalAveragePooling1D()(bert_output)

    # Apply pooling to price_input to match dimensions with bert_output
    price_output = tf.keras.layers.GlobalAveragePooling1D()(price_input)

    # Combine price and news features
    combined_input = tf.keras.layers.concatenate([price_output, bert_output])
    
    # Add an extra dimension to combined_input using Lambda layer
    combined_input_expanded = tf.keras.layers.Lambda(lambda x: tf.expand_dims(x, axis=1))(combined_input)

    lstm_out = tf.keras.layers.LSTM(50, return_sequences=False)(combined_input_expanded)
    output = tf.keras.layers.Dense(1, activation='linear')(lstm_out)

    model = tf.keras.models.Model(inputs=[input_ids, attention_mask, price_input], outputs=output)
    model.compile(optimizer='adam', loss='mse')
    return model

def train_model(model, X_price, input_ids, attention_mask, y, epochs=1, batch_size=32):
    model.compile(optimizer='adam', loss='mse')
    model.fit([input_ids, attention_mask, X_price], y, epochs=epochs, batch_size=batch_size, validation_split=0.2)
# Reinforcement learning component
class TradingEnvironment:
    def __init__(self, data, sequence_length):
        self.data = data
        self.sequence_length = sequence_length
        self.current_step = sequence_length
        
    def reset(self):
        self.current_step = self.sequence_length
        return self.data[self.current_step - 1]  # Return only the current state
    
    def step(self, action):
        self.current_step += 1
        if self.current_step >= len(self.data):
            done = True
        else:
            done = False
        
        next_state = self.data[self.current_step - 1]  # Get the next state
        reward = self.calculate_reward(action)
        
        return next_state, reward, done
    
    def calculate_reward(self, action):
        current_price = self.data[self.current_step-1][3]  # Close price
        next_price = self.data[self.current_step][3]  # Next close price
        
        if action == 0:  # Buy
            return next_price - current_price
        elif action == 1:  # Sell
            return current_price - next_price
        else:  # Hold
            return 0

class BertLayer(tf.keras.layers.Layer):
    def __init__(self, bert_model):
        super(BertLayer, self).__init__()
        self.bert = bert_model

    def call(self, inputs):
        input_ids = inputs['input_ids']
        attention_mask = inputs['attention_mask']
        return self.bert(input_ids=input_ids, attention_mask=attention_mask)[0]
# DQN Agent
class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95    # discount rate
        self.epsilon = 1.0   # exploration rate
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.learning_rate = 0.001
        self.model = self._build_model()

    def _build_model(self):
        model = tf.keras.Sequential([
            Dense(24, input_shape=(self.state_size,), activation='relu'),
            Dense(24, activation='relu'),
            Dense(self.action_size, activation='linear')
        ])
        model.compile(loss='mse', optimizer=Adam(learning_rate=self.learning_rate))
        return model

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        act_values = self.model.predict(state)
        return np.argmax(act_values[0])

    def replay(self, batch_size):
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target = (reward + self.gamma *
                          np.amax(self.model.predict(next_state)[0]))
            target_f = self.model.predict(state)
            target_f[0][action] = target
            self.model.fit(state, target_f, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

# Main execution
if __name__ == "__main__":
    # Load and preprocess data
    price_data, input_ids, attention_mask, scaler = load_and_preprocess_data()
    
    # Create sequences for LSTM
    sequence_length = 10  # You can adjust this value
    X_price_seq = np.array([price_data[i:i+sequence_length] for i in range(len(price_data)-sequence_length)])
    y = price_data[sequence_length:, 3]  # Assuming we're predicting the 'close' price
    
    # Adjust tokenized_news to match the sequence
    input_ids = input_ids[sequence_length:]
    attention_mask = attention_mask[sequence_length:]
    
    # Split data
    X_price_train, X_price_test, input_ids_train, input_ids_test, attention_mask_train, attention_mask_test, y_train, y_test = train_test_split(
        X_price_seq, input_ids, attention_mask, y, test_size=0.2, random_state=42
    )
    
    print("X_price_train shape:", X_price_train.shape)
    print("input_ids_train shape:", input_ids_train.shape)
    print("attention_mask_train shape:", attention_mask_train.shape)
    print("y_train shape:", y_train.shape)
    
    # Create and train model
    model = create_model(seq_length=sequence_length, num_features=X_price_train.shape[2])
    
    # Train model
    train_model(model, X_price_train, input_ids_train, attention_mask_train, y_train)
    # Create RL environment
    env = TradingEnvironment(price_data, sequence_length)

    # RL Parameters
    state_size = price_data.shape[1]  # number of features
    action_size = 3  # buy, sell, hold
    batch_size = 32
    n_episodes = 1000

    # Create DQN agent
    agent = DQNAgent(state_size, action_size)

    # Training loop
    for e in range(n_episodes):
        state = env.reset()
        state = np.reshape(state, [1, state_size])
        
        for time in range(len(price_data) - sequence_length):
            action = agent.act(state)
            next_state, reward, done = env.step(action)
            next_state = np.reshape(next_state, [1, state_size])
            
            agent.remember(state, action, reward, next_state, done)
            state = next_state
            
            if done:
                print(f"Episode: {e}/{n_episodes}, Score: {time}")
                break
            
            if len(agent.memory) > batch_size:
                agent.replay(batch_size)
        
        # After each episode, update the main model with RL insights
        # This is a simplistic approach and can be refined
        rl_predictions = agent.model.predict(price_data)
        
        # Combine RL predictions with the main model
        combined_predictions = (model.predict([X_price, X_news]) + rl_predictions) / 2
        
        # Fine-tune the main model
        model.fit([X_price, X_news], combined_predictions, epochs=1, batch_size=32, verbose=0)

    print("Reinforcement learning training complete.")

    # Final evaluation
    test_predictions = model.predict([X_test_price, X_test_news])
    mse = np.mean((test_predictions - y_test)**2)
    print(f"Final Mean Squared Error: {mse}")

X_price_train shape: (39766, 10, 6)
input_ids_train shape: (39766, 128)
attention_mask_train shape: (39766, 128)
y_train shape: (39766,)


Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions w

[1m995/995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - loss: 0.0127