<a href="https://colab.research.google.com/github/agrigoridou/Recurrent-Neural-Networks/blob/main/%CE%92_Recurrent_Neural_Networks.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Εγκατάσταση Απαιτούμενων Βιβλιοθηκών

In [None]:
!pip install torchtext sklearn pandas tqdm

#1. Εκτέλεση Αρχικού Κώδικα

Κώδικας για Προετοιμασία Δεδομένων:

In [None]:
import torch
from torchtext import data
from torchtext import datasets

In [None]:
# Ρύθμιση του πεδίου για τα δεδομένα
TEXT = data.Field(sequential=True, include_lengths=True, batch_first=True, fix_length=25)
LABEL = data.LabelField(dtype=torch.long)

In [None]:
# Φόρτωση του AG News dataset
train_data, test_data = datasets.AG_NEWS.splits(TEXT, LABEL)


In [None]:
# Δημιουργία του λεξιλογίου
TEXT.build_vocab(train_data, max_size=20000, min_freq=10)
LABEL.build_vocab(train_data)

In [None]:
# Χωρισμός του training set σε training και validation set
train_data, valid_data = train_data.split(split_ratio=0.95)

In [None]:
# Δημιουργία iterators για τα datasets
from torchtext.data import BucketIterator

train_iterator, valid_iterator, test_iterator = BucketIterator.splits(
    (train_data, valid_data, test_data),
    batch_size=1024,
    device=torch.device('cuda' if torch.cuda.is_available() else 'cpu'),
    sort_within_batch=True,
    sort_key=lambda x: len(x.text)
)

#Κώδικας για το RNN Μοντέλο:

In [None]:
import torch.nn as nn
import torch.optim as optim
import time

class RNN(nn.Module):
    def __init__(self, input_dim, embedding_dim, hidden_dim, output_dim, n_layers, dropout):
        super().__init__()

        # Ενσωμάτωση λέξεων
        self.embedding = nn.Embedding(input_dim, embedding_dim)

        # Αναδρομικό Νευρωνικό Δίκτυο (RNN)
        self.rnn = nn.RNN(embedding_dim, hidden_dim, num_layers=n_layers, dropout=dropout, bidirectional=False)

        # Κρυφό επίπεδο
        self.fc = nn.Linear(hidden_dim, output_dim)

        # Κανονικοποίηση Dropout
        self.dropout = nn.Dropout(dropout)

    def forward(self, text, text_lengths):
        embedded = self.dropout(self.embedding(text))
        packed_embedded = nn.utils.rnn.pack_padded_sequence(embedded, text_lengths.cpu(), batch_first=True, enforce_sorted=False)
        packed_output, hidden = self.rnn(packed_embedded)
        output, output_lengths = nn.utils.rnn.pad_packed_sequence(packed_output, batch_first=True)
        hidden = self.dropout(hidden[-1])
        return self.fc(hidden)


#Κώδικας Εκπαίδευσης του Μοντέλου:

In [None]:
def train_model(model, iterator, optimizer, criterion):
    model.train()
    epoch_loss = 0
    epoch_acc = 0

    for batch in iterator:
        text, text_lengths = batch.text
        labels = batch.label
        optimizer.zero_grad()
        predictions = model(text, text_lengths).squeeze(1)
        loss = criterion(predictions, labels)
        acc = accuracy(predictions, labels)
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()
        epoch_acc += acc.item()

    return epoch_loss / len(iterator), epoch_acc / len(iterator)

def evaluate_model(model, iterator, criterion):
    model.eval()
    epoch_loss = 0
    epoch_acc = 0

    with torch.no_grad():
        for batch in iterator:
            text, text_lengths = batch.text
            labels = batch.label
            predictions = model(text, text_lengths).squeeze(1)
            loss = criterion(predictions, labels)
            acc = accuracy(predictions, labels)

            epoch_loss += loss.item()
            epoch_acc += acc.item()

    return epoch_loss / len(iterator), epoch_acc / len(iterator)

# Παράμετροι εκπαίδευσης
EPOCHS = 15
LEARNING_RATE = 1e-3
BATCH_SIZE = 1024
EMBEDDING_DIM = 100
HIDDEN_DIM = 64
OUTPUT_DIM = len(LABEL.vocab)
N_LAYERS = 1
DROPOUT = 0.5

# Δημιουργία του μοντέλου
model = RNN(input_dim=len(TEXT.vocab), embedding_dim=EMBEDDING_DIM, hidden_dim=HIDDEN_DIM,
            output_dim=OUTPUT_DIM, n_layers=N_LAYERS, dropout=DROPOUT)

# Ρύθμιση optimizer και criterion
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
criterion = nn.CrossEntropyLoss()

# Εκπαίδευση του μοντέλου
for epoch in range(EPOCHS):
    start_time = time.time()

    train_loss, train_acc = train_model(model, train_iterator, optimizer, criterion)
    valid_loss, valid_acc = evaluate_model(model, valid_iterator, criterion)

    end_time = time.time()
    epoch_mins, epoch_secs = epoch_time(start_time, end_time)

    print(f'Epoch {epoch+1} | Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
    print(f'\tVal. Loss: {valid_loss:.3f} | Val. Acc: {valid_acc*100:.2f}%')


#2. Τροποποίηση για Bidirectional RNN

In [None]:
class BiRNN(nn.Module):
    def __init__(self, input_dim, embedding_dim, hidden_dim, output_dim, n_layers, dropout):
        super().__init__()

        self.embedding = nn.Embedding(input_dim, embedding_dim)

        # Bidirectional RNN
        self.rnn = nn.RNN(embedding_dim, hidden_dim, num_layers=n_layers, dropout=dropout, bidirectional=True)

        self.fc = nn.Linear(hidden_dim * 2, output_dim)  # Διπλασιάζεται το μέγεθος του hidden_dim λόγω bidirectional
        self.dropout = nn.Dropout(dropout)

    def forward(self, text, text_lengths):
        embedded = self.dropout(self.embedding(text))
        packed_embedded = nn.utils.rnn.pack_padded_sequence(embedded, text_lengths.cpu(), batch_first=True, enforce_sorted=False)
        packed_output, hidden = self.rnn(packed_embedded)
        output, output_lengths = nn.utils.rnn.pad_packed_sequence(packed_output, batch_first=True)
        hidden = self.dropout(hidden[-2:].sum(dim=0))  # Συνένωση των εξόδων των δύο κατευθύνσεων
        return self.fc(hidden)


#3. Διπλό Στρώμα Bidirectional RNN

#4. Χρήση LSTM Αντί για RNN

In [None]:
class LSTMModel(nn.Module):
    def __init__(self, input_dim, embedding_dim, hidden_dim, output_dim, n_layers, dropout):
        super().__init__()

        self.embedding = nn.Embedding(input_dim, embedding_dim)

        # LSTM αντί για RNN
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers=n_layers, dropout=dropout, bidirectional=False)

        self.fc = nn.Linear(hidden_dim, output_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, text, text_lengths):
        embedded = self.dropout(self.embedding(text))
        packed_embedded = nn.utils.rnn.pack_padded_sequence(embedded, text_lengths.cpu(), batch_first=True, enforce_sorted=False)
        packed_output, (hidden, cell) = self.lstm(packed_embedded)
        output, output_lengths = nn.utils.rnn.pad_packed_sequence(packed_output, batch_first=True)
        hidden = self.dropout(hidden[-1])
        return self.fc(hidden)


#5. Συμπλήρωμα Αποτελεσμάτων