In [39]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

# Define the neural network
class SentimentClassifier(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim):
        super(SentimentClassifier, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.rnn = nn.LSTM(embedding_dim, hidden_dim)
        self.fc = nn.Linear(hidden_dim, output_dim)
        
    def forward(self, text):
        embedded = self.embedding(text)
        output, (hidden, cell) = self.rnn(embedded)
        return self.fc(hidden[-1])

# Generate synthetic training data
happy_sentences = ["I'm feeling great!", "This is so much fun!", "I am so happy right now",
                              "I'm overjoyed with happiness!",
                              "Every day is a new chance to smile.",
                              "The sun is shining, and life is beautiful.",
                              "I can't stop laughing with my friends.",
                              "Achieving my goals brings me immense joy.",
                              "Happiness radiates from within.",
                              "Family gatherings fill my heart with happiness.",
                              "Grateful for the little things that make me happy.",
                              "Spending time in nature always lifts my spirits.",
                              "Surrounded by loved ones, I'm truly happy.",]
sad_sentences = ["I'm feeling down and there is some sadness in me.", "This is really sad.", 
                            "Feeling down and unable to shake it off.",
                            "The world seems grey on gloomy days.",
                            "Heartache lingers after saying goodbye.",
                            "Sometimes, sadness creeps in unexpectedly.",
                            "Tears fall like raindrops from cloudy eyes.",
                            "Loneliness echoes in an empty room.",
                            "Missing someone who's no longer here.",
                            "Grief is a heavy burden to carry.",
                            "Dark clouds cast shadows over my thoughts.",
                            "Hurtful words leave scars on the heart.",
                            "I feel terrible.",]

all_sentences = happy_sentences + sad_sentences
all_labels = torch.tensor([0] * len(happy_sentences) + [1] * len(sad_sentences))

# Create a vocabulary and convert sentences to word indices
vocab = set(word for sentence in all_sentences for word in sentence.split())
word_to_idx = {word: idx for idx, word in enumerate(vocab)}

# Convert sentences to sequences of word indices
max_sequence_length = max(len(sentence.split()) for sentence in all_sentences)
train_data = []
for sentence in all_sentences:
    word_indices = [word_to_idx[word] for word in sentence.split()]
    # Pad the sequence to the maximum length
    padded_indices = word_indices + [0] * (max_sequence_length - len(word_indices))
    train_data.append(torch.tensor(padded_indices))

# Convert labels to tensors
train_labels = all_labels

# Convert the list of tensors to a stacked tensor
stacked_train_data = torch.stack(train_data)

# Set hyperparameters
vocab_size = len(vocab)
embedding_dim = 100
hidden_dim = 128
output_dim = 2  # Happy and sad classes

# Create the model
model = SentimentClassifier(vocab_size, embedding_dim, hidden_dim, output_dim)

# Define loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Create TensorDataset and DataLoader
train_dataset = TensorDataset(stacked_train_data, train_labels)
train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True)




In [40]:
# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    for batch_data, batch_labels in train_loader:
        optimizer.zero_grad()
        
        # Since your data is padded, mask out the padding indices
        mask = (batch_data != 0)  # 0 is assumed to be the padding index
        
        predictions = model(batch_data)
        
        # Apply the mask to predictions and labels
        masked_predictions = torch.masked_select(predictions, mask.unsqueeze(-1))
        masked_labels = torch.masked_select(batch_labels.view(-1, 1), mask)  # Remove the unsqueeze
        
        loss = criterion(masked_predictions.view(-1, output_dim), masked_labels.view(-1))  # Adjust the shapes
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')


Epoch [1/10], Loss: 0.6800
Epoch [2/10], Loss: 0.6958
Epoch [3/10], Loss: 0.4924
Epoch [4/10], Loss: 0.6913
Epoch [5/10], Loss: 0.7437
Epoch [6/10], Loss: 0.5997
Epoch [7/10], Loss: 0.4016
Epoch [8/10], Loss: 0.7345
Epoch [9/10], Loss: 0.5769
Epoch [10/10], Loss: 0.2698


In [41]:
vocab = set(word for sentence in all_sentences for word in sentence.split())
vocab.add('<UNK>')  # Add the unknown token
word_to_idx = {word: idx for idx, word in enumerate(vocab)}


In [42]:
new_sentences = ["This is a happy sentence.", "I feel sad about this.", "I am gay"]

# Preprocess new sentences
new_data = []
for sentence in new_sentences:
    word_indices = [word_to_idx.get(word, word_to_idx['<UNK>']) for word in sentence.split()]
    padded_indices = word_indices + [0] * (max_sequence_length - len(word_indices))
    new_data.append(torch.tensor(padded_indices))

# Convert the list of tensors to a stacked tensor
stacked_new_data = torch.stack(new_data)

# Make predictions
with torch.no_grad():
    predictions = model(stacked_new_data)

# Interpret predictions
predicted_labels = predictions.argmax(dim=1)

# Map label indices back to sentiment labels
sentiments = ["happy" if label == 0 else "sad" for label in predicted_labels]

# Print results
for sentence, sentiment in zip(new_sentences, sentiments):
    print(f"Sentence: '{sentence}' | Predicted Sentiment: {sentiment}")

Sentence: 'This is a happy sentence.' | Predicted Sentiment: happy
Sentence: 'I feel sad about this.' | Predicted Sentiment: sad
Sentence: 'I am gay' | Predicted Sentiment: sad
