# Neural Networks



COLAB LINK: https://colab.research.google.com/drive/19sDAyPWmMXs0ks-TrHS3KakK4H1Ca0Mv?usp=sharing

We apply a neural network to our Twitter financial news sentiment project.  
Each tweet or headline is represented with a FinancialBERT embedding.  
The neural network takes these embeddings as input and predicts a 3 class sentiment label: Neutral, Bullish, or Bearish.


In [None]:
import pandas as pd
import numpy as np

from sklearn.cluster import KMeans, DBSCAN
from sklearn.metrics import (
    silhouette_score,
    accuracy_score,
    precision_recall_fscore_support,
    classification_report,
    confusion_matrix,
)
from sklearn.decomposition import PCA
from sklearn.preprocessing import LabelEncoder

import matplotlib.pyplot as plt
import plotly.express as px

import torch
import torch.nn as nn
import torch.optim as optim

In [None]:
import pandas as pd

train_ok = pd.read_csv("balanced_training_embed.csv", engine="python")
print("Training rows:", len(train_ok))

valid_ok = pd.read_csv("valid_embed.csv", engine="python")
print("Validation rows:", len(valid_ok))


Training rows: 9543
Validation rows: 2388


In [None]:
training_twitter = pd.read_csv("training_embed.csv", engine="python")
testing_twitter  = pd.read_csv("valid_embed.csv", engine="python")

print("Training shape:", training_twitter.shape)
print("Validation shape:", testing_twitter.shape)

Training shape: (9543, 10)
Validation shape: (2388, 10)


# PCA - Dimensionality  Reduction

## Data and feature representation

We use the training and validation splits provided for the Twitter Financial News Sentiment dataset.

- clean_text contains the cleaned tweet or headline.  
- label is the sentiment label for each row.  
- financialBERT_embedding is a string representation of the FinancialBERT embedding for that text.

We will convert the embedding strings into numeric vectors and then reduce the dimensionality with PCA before feeding them to the neural network.



Create training dataframe to apply PCA:

In [None]:
training_df = pd.DataFrame()
training_df["clean_text"] = training_twitter["clean_text"]
training_df["label"] = training_twitter["label"]

training_df["embeddings"] = training_twitter["financialBERT_embedding"].apply(
    lambda x: np.array(x.strip("[]").split(), dtype=float)
)

training_embeddings = np.vstack(training_df["embeddings"].values)
print("Training embeddings shape:", training_embeddings.shape)


Training embeddings shape: (9543, 768)


Create testing dataframe to apply PCA:

In [None]:
# Create new dataframe that has the clean text and financial BERT embeddings
testing_df = pd.DataFrame()
testing_df["clean_text"] = testing_twitter["clean_text"]
testing_df["label"] = testing_twitter["label"]

# Convert embeddings into a numpy array (original uses spaces to separate)
testing_df["embeddings"] = testing_twitter["financialBERT_embedding"].apply(
    lambda x: np.array(x.strip("[]").split(), dtype=float)
)

# Create single array of all the embeddings
testing_embeddings = np.vstack(testing_df["embeddings"].values)
print("Validation embeddings shape:", testing_embeddings.shape)


Validation embeddings shape: (2388, 768)


In [None]:
# Reduce dimensionality of embeddings with PCA
pca = PCA(n_components=100)
training_reduced_embeddings = pca.fit_transform(training_embeddings)
testing_reduced_embeddings = pca.transform(testing_embeddings)

print("Reduced training PCA embeddings shape:", training_reduced_embeddings.shape)
print("Reduced validation PCA embeddings shape:", testing_reduced_embeddings.shape)

Reduced training PCA embeddings shape: (9543, 100)
Reduced validation PCA embeddings shape: (2388, 100)


# Building Neural Network


## PCA on FinancialBERT embeddings

FinancialBERT embeddings are high dimensional.  
To make training more efficient, we apply PCA and keep 100 principal components.

- training_reduced_embeddings is the result of fitting PCA on the training embeddings and transforming them.  
- We then use the same PCA transformation on the validation embeddings.  

This keeps most of the variance in the data while reducing the number of input features for the neural network.


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# Encode string labels as integers 0, 1, 2 for PyTorch
label_encoder = LabelEncoder()
training_df["label_encoded"] = label_encoder.fit_transform(training_df["label"])
testing_df["label_encoded"] = label_encoder.transform(testing_df["label"])

print("Classes:", list(label_encoder.classes_))

# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(training_reduced_embeddings, dtype=torch.float32)
y_train_tensor = torch.tensor(training_df["label_encoded"].values, dtype=torch.long)

X_val_tensor = torch.tensor(testing_reduced_embeddings, dtype=torch.float32)
y_val_tensor = torch.tensor(testing_df["label_encoded"].values, dtype=torch.long)

X_train_tensor.shape, y_train_tensor.shape, X_val_tensor.shape, y_val_tensor.shape

Classes: [np.int64(0), np.int64(1), np.int64(2)]


(torch.Size([9543, 100]),
 torch.Size([9543]),
 torch.Size([2388, 100]),
 torch.Size([2388]))

## Neural network architecture

We use a feedforward neural network for multi class classification. We train the model with cross entropy loss and stochastic gradient descent.

In [None]:
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(100, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 3)
        )

    def forward(self, x):
        return self.net(x)

model = SimpleNN()
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)



In [None]:
torch.manual_seed(42)

epochs = 100
model.train()

loss_history = []

for epoch in range(epochs):
    # Forward pass
    logits = model(X_train_tensor)
    loss = loss_fn(logits, y_train_tensor)

    # Backward pass
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    loss_history.append(loss.item())

    # Print loss every 10 epochs
    if (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}")

Epoch [10/100], Loss: 1.0162
Epoch [20/100], Loss: 0.9846
Epoch [30/100], Loss: 0.9581
Epoch [40/100], Loss: 0.9357
Epoch [50/100], Loss: 0.9169
Epoch [60/100], Loss: 0.9013
Epoch [70/100], Loss: 0.8882
Epoch [80/100], Loss: 0.8772
Epoch [90/100], Loss: 0.8679
Epoch [100/100], Loss: 0.8599


## Training procedure and hyperparameters

Training details:

- Optimizer: stochastic gradient descent  
- Learning rate: 0.01  
- Loss function: cross entropy loss  
- Epochs: 100  
- Batch size: full batch (we use all training examples in each update)

To select the learning rate, we tried several values in a small range  
(0.001, 0.01, 0.1) and monitored validation accuracy.  
A learning rate of 0.01 converged reliably and gave better validation performance  
than 0.001, while 0.1 was unstable, so we kept 0.01 for this check in.

We also chose 100 epochs because the loss curve flattened out by that point and  
additional training did not significantly improve validation accuracy.

COLAB LINK: https://colab.research.google.com/drive/19sDAyPWmMXs0ks-TrHS3KakK4H1Ca0Mv?usp=sharing

# Calculating Performance Metrics on Test Dataset

In [None]:
model.eval()
with torch.no_grad():
    val_logits = model(X_val_tensor)
    val_preds = torch.argmax(val_logits, dim=1)

# Convert tensors to numpy arrays for sklearn metrics
y_val_true = y_val_tensor.numpy()
y_val_pred = val_preds.numpy()

val_accuracy = accuracy_score(y_val_true, y_val_pred)
precision, recall, f1, _ = precision_recall_fscore_support(
    y_val_true, y_val_pred, average="weighted"
)

print("Validation accuracy:", val_accuracy)
print("Validation precision (weighted):", precision)
print("Validation recall (weighted):", recall)
print("Validation F1 (weighted):", f1)

print("\nClassification report (validation):")
target_names = [str(c) for c in label_encoder.classes_]
print(classification_report(y_val_true, y_val_pred, target_names=target_names))

print("\nConfusion matrix (validation):")
print(confusion_matrix(y_val_true, y_val_pred))


Validation accuracy: 0.6557788944723618
Validation precision (weighted): 0.43004595843539306
Validation recall (weighted): 0.6557788944723618
Validation F1 (weighted): 0.5194485324955582

Classification report (validation):
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       347
           1       0.00      0.00      0.00       475
           2       0.66      1.00      0.79      1566

    accuracy                           0.66      2388
   macro avg       0.22      0.33      0.26      2388
weighted avg       0.43      0.66      0.52      2388


Confusion matrix (validation):
[[   0    0  347]
 [   0    0  475]
 [   0    0 1566]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


COLAB LINK: https://colab.research.google.com/drive/19sDAyPWmMXs0ks-TrHS3KakK4H1Ca0Mv?usp=sharing