<a href="https://colab.research.google.com/github/akessela/deep-learning-code-examples/blob/main/rnn/RNN_intro.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [13]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_extraction.text import CountVectorizer
from torch.utils.data import Dataset, DataLoader

# Define the dataset
class TweetDataset(Dataset):
    def __init__(self, texts, labels):
        self.texts = texts
        self.labels = labels

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        return torch.tensor(self.texts[idx], dtype=torch.long), torch.tensor(self.labels[idx], dtype=torch.long)

# Sample tweets and labels
tweets = [
    "I love the new movie I watched yesterday! It was fantastic.",
    "Just had a terrible experience at the restaurant. Never going back.",
    "Feeling really happy today! The weather is perfect and everything is going well.",
    "I'm so frustrated with my work right now. Nothing seems to be going right.",
    "Had a wonderful day out with friends. Great food and great company!",
    "I'm disappointed with the new update. It made things worse.",
    "Today is such a beautiful day. I can't wait to go for a walk in the park!",
    "I'm really sad about the news I received today. Things are tough right now.",
    "Just finished a great book. Highly recommend it to anyone who loves thrillers!",
    "Had a stressful day at work, but I'm hopeful things will improve soon."
]
labels = [1, 0, 1, 0, 1, 0, 1, 0, 1, 0]  # 1 = Positive, 0 = Negative

# Encode labels
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(labels)

# Vectorize tweets
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(tweets).toarray()

# Prepare DataLoader
dataset = TweetDataset(X, labels)
dataloader = DataLoader(dataset, batch_size=2, shuffle=True)


In [14]:
class SimpleRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleRNN, self).__init__()
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        h0 = torch.zeros(1, x.size(0), hidden_size).to(x.device)
        out, _ = self.rnn(x, h0)
        out = self.fc(out[:, -1, :])
        return self.softmax(out)

# Hyperparameters
input_size = X.shape[1]  # Size of input features
hidden_size = 128
output_size = 2  # Positive or Negative
num_epochs = 5
learning_rate = 0.001

# Model, Loss and Optimizer
model = SimpleRNN(input_size, hidden_size, output_size)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training Loop
for epoch in range(num_epochs):
    for texts, labels in dataloader:
        texts = texts.float()  # Convert to float type
        outputs = model(texts.unsqueeze(1))  # Add sequence dimension
        loss = criterion(outputs, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [1/5], Loss: 0.7059
Epoch [2/5], Loss: 0.6463
Epoch [3/5], Loss: 0.6170
Epoch [4/5], Loss: 0.5759
Epoch [5/5], Loss: 0.4952


In [15]:
def predict_sentiment(model, text, vectorizer, device='cpu'):
    model.eval()
    with torch.no_grad():
        # Vectorize the input text
        text_vector = vectorizer.transform([text]).toarray()
        text_tensor = torch.tensor(text_vector, dtype=torch.float32).unsqueeze(1).to(device)
        # Predict
        output = model(text_tensor)
        _, predicted = torch.max(output, 1)
        return 'Positive' if predicted.item() == 1 else 'Negative'

# Example usage
test_text = "I had an amazing day at the park!"
sentiment = predict_sentiment(model, test_text, vectorizer)
print(f'The sentiment of the text is: {sentiment}')


The sentiment of the text is: Positive
