#RL Modified Code
=

In [1]:
#Sentiment Score
import requests
import datetime
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import numpy as np

In [37]:
# Finnhub API key (replace with your own if needed)
API_KEY = 'cv1bm91r01qhkk81os0gcv1bm91r01qhkk81os10'

def fetch_finnhub_news(symbol="AVGO", days=30, target_date=None):
    """
    Fetch company news for the given symbol from Finnhub over the past `days` days relative to `target_date`.

    Parameters:
      - symbol: Stock ticker symbol (default "AVGO" for Broadcom)
      - days: Number of days in the past to fetch news (default is 30)
      - target_date: The reference date (as a datetime.date object or ISO format string).
                     If None, uses today's date.

    Returns:
      A list of news articles (each is a dictionary with keys like 'headline', 'datetime', etc.)
    """
    # Allow target_date to be a string; if so, convert it to a date object.
    if target_date is None:
        target_date = datetime.date.today()
    elif isinstance(target_date, str):
        try:
            # fromisoformat supports "YYYY-MM-DD" (and extended formats)
            target_date = datetime.datetime.fromisoformat(target_date).date()
        except Exception as e:
            print(f"Error parsing target_date string: {e}. Using today's date instead.")
            target_date = datetime.date.today()

    start_date = (target_date - datetime.timedelta(days=days)).strftime("%Y-%m-%d")
    end_date = target_date.strftime("%Y-%m-%d")

    url = f"https://finnhub.io/api/v1/company-news?symbol={symbol}&from={start_date}&to={end_date}&token={API_KEY}"
    response = requests.get(url)
    if response.status_code == 200:
        news_data = response.json()
        return news_data
    else:
        print(f"Error fetching news: {response.status_code} - {response.text}")
        return []

In [3]:
# -----------------------------
# FinBERT Sentiment Analysis
# -----------------------------

# Load FinBERT model and tokenizer from HuggingFace
tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
finbert_model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")
finbert_model.eval()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

In [4]:
def get_sentiment_score(text):
    """
    Uses FinBERT to compute a sentiment score for the given text.
    Score = (Positive probability - Negative probability), range ~[-1, 1].
    """
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    with torch.no_grad():
        outputs = finbert_model(**inputs)
    scores = torch.softmax(outputs.logits, dim=1)
    # Assuming label order: 0 = negative, 1 = neutral, 2 = positive
    pos_prob = scores[0][2].item()
    neg_prob = scores[0][0].item()
    sentiment = pos_prob - neg_prob
    return sentiment

In [25]:
def compute_sentiment_for_date(target_date_str, symbol="AVGO", decay_rate=0.1):
    """
    Callable parent function that:
      - Accepts a target date string ("YYYY-MM-DD")
      - Fetches news articles for the past 30 days relative to that date
      - Computes FinBERT sentiment scores for each article
      - Applies exponential decay weighting (more recent news gets higher weight)
      - Returns the aggregated weighted sentiment score as an integer.

    Parameters:
      - target_date_str: Date for analysis in "YYYY-MM-DD" format.
      - symbol: Stock ticker symbol (default "AVGO")
      - decay_rate: Exponential decay factor (default 0.1)

    Returns:
      - Aggregated weighted sentiment score (integer)
    """
    # Convert target_date_str to a datetime.date object
    target_date = datetime.datetime.strptime(target_date_str, "%Y-%m-%d").date()

    # Fetch news articles for the past 30 days relative to target_date
    news_articles = fetch_finnhub_news(symbol=symbol, days=30, target_date=target_date)

    if not news_articles:
        print("No news articles found.")
        return 0  # Return neutral sentiment if no news found

    # Use target_date (set at midnight) as the reference for weighting
    reference_datetime = datetime.datetime.combine(target_date, datetime.datetime.min.time())

    weighted_scores = []
    total_weight = 0.0

    for article in news_articles:
        text = article.get('headline', '')
        if not text:
            continue

        sentiment = get_sentiment_score(text)
        article_date = datetime.datetime.fromtimestamp(article['datetime'])
        age_days = (reference_datetime - article_date).days
        weight = np.exp(-decay_rate * age_days)

        weighted_scores.append(sentiment * weight)
        total_weight += weight

    if total_weight > 0:
        aggregated_sentiment = sum(weighted_scores) / total_weight
    else:
        aggregated_sentiment = 0.0

    # Return the aggregated sentiment score as an integer
    return aggregated_sentiment

In [47]:
def train_dqn(
    env,
    agent,
    n_episodes=50,
    max_steps_per_episode=200,
    render=False
):
    rewards_history = []
    for episode in range(n_episodes):
        state = env.reset()
        episode_reward = 0.0

        for step in range(max_steps_per_episode):
            if render:
                env.render()

            action = agent.act(state)
            next_state, reward, done = env.step(action)

            agent.remember(state, action, reward, next_state, float(done))
            agent.replay()

            state = next_state
            episode_reward += reward

            if done:
                break

        rewards_history.append(episode_reward)
        print(f"Episode {episode+1}/{n_episodes}, Reward: {episode_reward:.2f}, Epsilon: {agent.epsilon:.3f}")

    return rewards_history


Sentiment score

In [32]:
def get_sentiment_score(text):
    """
    Uses FinBERT to compute a sentiment score for the given text.
    Score = (Positive probability - Negative probability), range ~[-1, 1].
    """
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    with torch.no_grad():
        outputs = finbert_model(**inputs)
    scores = torch.softmax(outputs.logits, dim=1)
    # Assuming label order: 0 = negative, 1 = neutral, 2 = positive
    pos_prob = scores[0][2].item()
    neg_prob = scores[0][0].item()
    sentiment = pos_prob - neg_prob
    return sentiment


In [43]:
# -----------------------------
# FinBERT Sentiment Analysis
# -----------------------------

# Load FinBERT model and tokenizer from HuggingFace
tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
finbert_model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")
finbert_model.eval()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/252 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/758 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

In [44]:
def compute_sentiment_for_date(target_date_str, symbol="AVGO", decay_rate=0.1):
    """
    Callable parent function that:
      - Accepts a target date string ("YYYY-MM-DD")
      - Fetches news articles for the past 30 days relative to that date
      - Computes FinBERT sentiment scores for each article
      - Applies exponential decay weighting (more recent news gets higher weight)
      - Returns the aggregated weighted sentiment score as an integer.

    Parameters:
      - target_date_str: Date for analysis in "YYYY-MM-DD" format.
      - symbol: Stock ticker symbol (default "AVGO")
      - decay_rate: Exponential decay factor (default 0.1)

    Returns:
      - Aggregated weighted sentiment score (integer)
    """
    # Convert target_date_str to a datetime.date object
    target_date = datetime.datetime.strptime(target_date_str, "%Y-%m-%d").date()

    # Fetch news articles for the past 30 days relative to target_date
    news_articles = fetch_finnhub_news(symbol=symbol, days=30, target_date=target_date)

    if not news_articles:
        print("No news articles found.")
        return 0  # Return neutral sentiment if no news found

    # Use target_date (set at midnight) as the reference for weighting
    reference_datetime = datetime.datetime.combine(target_date, datetime.datetime.min.time())

    weighted_scores = []
    total_weight = 0.0

    for article in news_articles:
        text = article.get('headline', '')
        if not text:
            continue

        sentiment = get_sentiment_score(text)
        article_date = datetime.datetime.fromtimestamp(article['datetime'])
        age_days = (reference_datetime - article_date).days
        weight = np.exp(-decay_rate * age_days)

        weighted_scores.append(sentiment * weight)
        total_weight += weight

    if total_weight > 0:
        aggregated_sentiment = sum(weighted_scores) / total_weight
    else:
        aggregated_sentiment = 0.0

    # Return the aggregated sentiment score as an integer
    return aggregated_sentiment


In [48]:
from tqdm import tqdm

In [49]:
batch_size = 5
results = []

# Initialize the tqdm progress bar
tqdm.pandas(desc="Processing Batches")

for start in range(0, len(df_up2), batch_size):
    end = start + batch_size
    batch = df_up2.iloc[start:end].copy()

    # Apply the sentiment function with progress bar
    batch['sentiment_score'] = batch['Date'].progress_apply(compute_sentiment_for_date)

    results.append(batch)

# Combine all batches into a single DataFrame
df_combined = pd.concat(results, ignore_index=True)

Processing Batches: 100%|██████████| 5/5 [01:45<00:00, 21.20s/it]
Processing Batches: 100%|██████████| 5/5 [01:21<00:00, 16.36s/it]
Processing Batches: 100%|██████████| 5/5 [01:33<00:00, 18.62s/it]
Processing Batches: 100%|██████████| 4/4 [01:17<00:00, 19.31s/it]


In [50]:
df_combined

Unnamed: 0,Date,Prediction,Close,Volume,sentiment_score
0,2025-02-03,202.994052,217.73,24507274,0.257384
1,2025-02-04,202.348254,222.43,23768517,0.287317
2,2025-02-05,201.735105,232.0,29250582,0.315834
3,2025-02-06,201.167285,231.36,21169529,0.339451
4,2025-02-07,200.657562,224.87,22117468,0.366326
5,2025-02-10,200.30112,235.04,23901643,0.374501
6,2025-02-11,200.119439,235.04,18476200,0.398041
7,2025-02-12,200.021911,236.35,16397336,0.435163
8,2025-02-13,200.019767,235.8,20962031,0.420092
9,2025-02-14,200.125522,233.04,16988777,0.44018


In [53]:
df_combined = df_combined[['Prediction', 'Close','Volume','sentiment_score']]
df_combined

Unnamed: 0,Prediction,Close,Volume,sentiment_score
0,202.994052,217.73,24507274,0.257384
1,202.348254,222.43,23768517,0.287317
2,201.735105,232.0,29250582,0.315834
3,201.167285,231.36,21169529,0.339451
4,200.657562,224.87,22117468,0.366326
5,200.30112,235.04,23901643,0.374501
6,200.119439,235.04,18476200,0.398041
7,200.021911,236.35,16397336,0.435163
8,200.019767,235.8,20962031,0.420092
9,200.125522,233.04,16988777,0.44018


In [51]:
import random
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

from collections import deque


2) Custom Environment with Weighted States
We create a class StockTradingEnv that:

Assumes data of shape
(
𝑁
,
4
)
(N,4) with columns:
[ close_price, lstm_price, sentiment_score, volume ]
[ close_price, lstm_price, sentiment_score, volume ]
Three possible actions:
0
=
Sell (short)
,

1
=
Hold
,

2
=
Buy (long)
0=Sell (short),1=Hold,2=Buy (long)
Reward each step:
reward
=
(
price
𝑡
+
1
−
price
𝑡
)
×
position
𝑡

−

transaction_cost if changed position
reward=(price
t+1
​
 −price
t
​
 )×position
t
​
 −transaction_cost if changed position
A weighting vector for features. For example:
feature_weights
=
[
0.7
,
0.7
,
1.5
,
1.5
]
feature_weights=[0.7,0.7,1.5,1.5]
meaning we down‐weight close_price and lstm_price (0.7 each), while up‐weighting sentiment_score and volume (1.5 each).
We’ll apply these weights in _get_observation() so that the neural network sees a transformed version of the state.

In [54]:
class StockTradingEnv:
    """
    A simplified environment for RL with data columns:
      [close_price, lstm_price, sentiment_score, volume].
    Weighted features are fed into the agent’s state.

    Actions: 0=Sell/Short, 1=Hold, 2=Buy/Long
    Reward: (price_{t+1} - price_t) * position - transaction_cost_if_changed
    """
    def __init__(self, data,
                 feature_weights=None,
                 transaction_cost=1.0):
        """
        data: np.array of shape [N, 4]
        feature_weights: list/array of length 4
                         e.g. [0.7, 0.7, 1.5, 1.5]
                         for weighting [close_price, lstm_price, sentiment, volume].
        transaction_cost: penalty for changing position (overtrading).
        """
        self.data = data
        self.n_steps = len(data)
        self.transaction_cost = transaction_cost

        # If no feature_weights provided, default to 1.0 for each feature
        if feature_weights is None:
            feature_weights = [0.7, 0.7, 1.5, 1.5]
        self.feature_weights = np.array(feature_weights, dtype=np.float32)

        self.current_step = 0
        self.position = 0        # -1=short, 0=flat, +1=long
        self.prev_position = 0
        self.total_reward = 0.0

    def reset(self):
        """
        Resets environment to the start (day 0).
        Returns the first weighted state.
        """
        self.current_step = 0
        self.position = 0
        self.prev_position = 0
        self.total_reward = 0.0
        return self._get_observation()

    def step(self, action):
        """
        action: int in {0,1,2} -> short, hold, long
        Returns: (next_state, reward, done)
        """
        # Convert action to position
        if action == 0:
            self.position = -1
        elif action == 1:
            self.position = 0
        elif action == 2:
            self.position = 1

        # Get current day's close price
        current_price = self.data[self.current_step, 0]

        # Next day index
        next_step = self.current_step + 1
        done = False

        if next_step >= self.n_steps:
            # No future price available, end the episode
            next_price = current_price
            done = True
        else:
            next_price = self.data[next_step, 0]

        # Reward: daily PnL
        reward = (next_price - current_price) * self.position

        # Subtract transaction cost if the position changed
        if self.position != self.prev_position:
            reward -= self.transaction_cost

        self.prev_position = self.position
        self.total_reward += reward

        self.current_step = next_step
        next_state = self._get_observation()

        return next_state, reward, done

    def _get_observation(self):
        """
        Returns the weighted state for the current day.
        The raw features are: [close_price, lstm_price, sentiment, volume].
        We multiply each by its corresponding weight.
        """
        if self.current_step >= self.n_steps:
            # Out of data range; return zeros
            return np.zeros(4, dtype=np.float32)

        raw_features = self.data[self.current_step]  # shape (4,)
        weighted = raw_features * self.feature_weights  # element-wise multiplication
        return weighted.astype(np.float32)


*italicized text*Key Points
We store feature_weights as an array. For instance:
python
Copy
Edit
feature_weights = [0.7, 0.7, 1.5, 1.5]
means we’re down‐weighting close_price and lstm_price (multiplying by 0.7 each) and up‐weighting sentiment_score and volume (1.5 each).
The rest of the environment logic (position, reward, done, etc.) is similar to previous examples.

3) Replay Memory
A standard replay buffer storing (state, action, reward, next_state, done):

In [55]:
class ReplayMemory:
    def __init__(self, capacity=1000):
        self.capacity = capacity
        self.memory = deque(maxlen=capacity)

    def push(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def sample(self, batch_size):
        batch = random.sample(self.memory, batch_size)
        states, actions, rewards, next_states, dones = zip(*batch)
        return (np.array(states),
                np.array(actions),
                np.array(rewards, dtype=np.float32),
                np.array(next_states),
                np.array(dones, dtype=np.float32))

    def __len__(self):
        return len(self.memory)


4) DQN Model
We’ll build a simple feedforward network with 2 hidden layers. It outputs Q‐values for 3 actions (Sell, Hold, Buy).

In [56]:
class DQN(nn.Module):
    def __init__(self, state_size, action_size, hidden_size=64):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(state_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, action_size)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return self.fc3(x)

state_size = 4 (after weighting, but the dimension is still 4).
action_size = 3.
hidden_size = 64 is arbitrary; you can experiment.

5) DQN Training Function
We do epsilon‐greedy exploration, store transitions in replay memory, sample and train the network with a standard Bellman update.

In [57]:
def train_dqn(env,
              num_episodes=50,
              batch_size=16,
              gamma=0.99,
              lr=1e-3,
              epsilon_start=1.0,
              epsilon_end=0.01,
              epsilon_decay=0.95,
              target_update=5):
    """
    env: StockTradingEnv instance with 4 features
    num_episodes: how many times we loop through the data
    batch_size: replay sample size
    gamma: discount factor
    lr: learning rate
    epsilon_*: for epsilon-greedy
    target_update: how often to update the target_net
    """

    state_size = 4
    action_size = 3

    policy_net = DQN(state_size, action_size)
    target_net = DQN(state_size, action_size)
    target_net.load_state_dict(policy_net.state_dict())
    target_net.eval()

    optimizer = optim.Adam(policy_net.parameters(), lr=lr)
    memory = ReplayMemory(capacity=2000)

    epsilon = epsilon_start

    for episode in range(num_episodes):
        state = env.reset()
        done = False

        while not done:
            # Epsilon-greedy policy
            if random.random() < epsilon:
                action = random.randint(0, action_size - 1)
            else:
                state_t = torch.FloatTensor(state).unsqueeze(0)
                q_values = policy_net(state_t)  # shape [1, 3]
                action = torch.argmax(q_values, dim=1).item()

            next_state, reward, done = env.step(action)

            # Store in replay
            memory.push(state, action, reward, next_state, done)

            state = next_state

            # If enough samples, do a training step
            if len(memory) > batch_size:
                (states_b, actions_b, rewards_b,
                 next_states_b, dones_b) = memory.sample(batch_size)

                states_t = torch.FloatTensor(states_b)
                actions_t = torch.LongTensor(actions_b)
                rewards_t = torch.FloatTensor(rewards_b)
                next_states_t = torch.FloatTensor(next_states_b)
                dones_t = torch.FloatTensor(dones_b)

                # Current Q
                q_values_current = policy_net(states_t)
                # Gather the Q-value of the chosen action
                q_values_current = q_values_current.gather(1, actions_t.unsqueeze(1)).squeeze(1)

                # Next Q from target net
                with torch.no_grad():
                    q_values_next = target_net(next_states_t).max(1)[0]

                q_targets = rewards_t + gamma * q_values_next * (1 - dones_t)

                loss = F.mse_loss(q_values_current, q_targets)

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

        # Decay epsilon after each episode
        epsilon = max(epsilon_end, epsilon_decay * epsilon)

        # Update target network
        if episode % target_update == 0:
            target_net.load_state_dict(policy_net.state_dict())

        print(f"Episode {episode+1}/{num_episodes} - "
              f"TotalReward: {env.total_reward:.2f}, Eps: {epsilon:.2f}")

    return policy_net, target_net

6) Putting It All Together + Next‐Day Prediction
Below is a sample “main” section. We’ll simulate some random data with 4 columns, instantiate the environment with weighting, train the DQN, and then use the trained policy_net to predict an action for a new day’s data.

In [60]:
df_combined

Unnamed: 0,Prediction,Close,Volume,sentiment_score
0,202.994052,217.73,24507274,0.257384
1,202.348254,222.43,23768517,0.287317
2,201.735105,232.0,29250582,0.315834
3,201.167285,231.36,21169529,0.339451
4,200.657562,224.87,22117468,0.366326
5,200.30112,235.04,23901643,0.374501
6,200.119439,235.04,18476200,0.398041
7,200.021911,236.35,16397336,0.435163
8,200.019767,235.8,20962031,0.420092
9,200.125522,233.04,16988777,0.44018


In [63]:
if __name__ == "__main__":
    import pandas as pd

    # ---------------------------------------
    # STEP 1: Load or have your full dataframe (20 rows)
    # Example structure: columns = [close_price, lstm_price, sentiment_score, volume]
    # df_combined = pd.read_csv("some_file.csv")  # or however you get the data
    # For demonstration, let's create a small example DataFrame:
    df_combined = df_combined
    df_combined['Volume'] = df_combined['Volume'].str.replace(',', '').astype(float)


    print(df_combined)

    # ---------------------------------------
    # STEP 2: Convert to NumPy and do train/test split
    #   Train: first 16 rows
    #   Test: last 4 rows
    # ---------------------------------------
    data_all = df_combined[['Prediction', 'Close', 'Volume', 'sentiment_score']].values


    train_data = data_all[:15]  # shape (16, 4)
    train_data = df_combined[['Prediction','Close','Volume','sentiment_score']].values.astype(np.float32)
    test_data = data_all[15:]   # shape (4, 4)

    print("\nTrain data shape:", train_data.shape)
    print("Test data shape:", test_data.shape)

    # ---------------------------------------
    # STEP 3: Create environment with train_data
    # ---------------------------------------
    # We'll give more weight to sentiment & volume, less to close_price & lstm_price
    feature_weights = [0.7, 0.7, 1.5, 1.5]
    env = StockTradingEnv(data=train_data,
                          feature_weights=feature_weights,
                          transaction_cost=2.0)

    # ---------------------------------------
    # STEP 4: Train DQN on the 16-point environment
    # ---------------------------------------
    policy_net, target_net = train_dqn(
        env,
        num_episodes=30,
        batch_size=8,    # smaller batch since only 16 data points
        gamma=0.95,
        lr=1e-3,
        epsilon_start=1.0,
        epsilon_end=0.01,
        epsilon_decay=0.90,
        target_update=5
    )

    # ---------------------------------------
    # STEP 5: Test (Inference) on the 4 unseen points
    #         We'll do day-by-day predictions
    # ---------------------------------------
    print("\n=== Testing on new 4 data points ===")
    action_map = {0: "Sell/Short", 1: "Hold", 2: "Buy/Long"}

    # For each test row, treat it as "next day" features
    for i in range(len(test_data)):
        next_day_features = test_data[i]  # shape (4,)
        # Weighted
        weighted_features = next_day_features * np.array(feature_weights, dtype=np.float32)

        state_t = torch.FloatTensor(weighted_features).unsqueeze(0)
        q_values = policy_net(state_t)  # shape [1, 3]
        action_idx = torch.argmax(q_values, dim=1).item()

        print(f"\nTest Point #{i+1}: Raw features = {next_day_features}")
        print(f"Weighted features = {weighted_features}")
        print(f"Q-values = {q_values.detach().numpy()}")
        print(f"Chosen Action => {action_map[action_idx]}")


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_combined['Volume'] = df_combined['Volume'].str.replace(',', '').astype(float)


    Prediction   Close      Volume  sentiment_score
0   202.994052  217.73  24507274.0         0.257384
1   202.348254  222.43  23768517.0         0.287317
2   201.735105  232.00  29250582.0         0.315834
3   201.167285  231.36  21169529.0         0.339451
4   200.657562  224.87  22117468.0         0.366326
5   200.301120  235.04  23901643.0         0.374501
6   200.119439  235.04  18476200.0         0.398041
7   200.021911  236.35  16397336.0         0.435163
8   200.019767  235.80  20962031.0         0.420092
9   200.125522  233.04  16988777.0         0.440180
10  200.353183  228.53  26080547.0         0.422643
11  200.719018  228.73  16375487.0         0.410543
12  201.348053  226.74  16103996.0         0.414964
13  201.971813  218.66  24883556.0         0.433570
14  202.768887  207.93  33050425.0         0.473532
15  203.566415  202.54  33100814.0         0.457013
16  204.441781  212.94  21368302.0         0.452661
17  205.582047  197.80  27572887.0         0.429969
18  206.8887