In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import torch.nn as nn

In [2]:
# mengatur perangkat untuk digunakan
# jika GPU tersedia, gunakan GPU, jika tidak, gunakan CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
# Mengubah gambar menjadi tensor
transform = transforms.ToTensor()

**Note:** Berfungsi untuk mengubah data gambar menjadi bentuk sequnce.

In [None]:
# menentukan hyperparameter
input_size = 28           
sequence_length = 28      
hidden_size = 130         
num_layers = 2            
num_classes = 26          # jumlah huruf (A-Z)
learning_rate = 0.001     
batch_size = 200          
num_epochs = 10           

In [5]:
# Download EMNIST 'letters' subset
train_dataset = torchvision.datasets.EMNIST(root='./data',
                                            split='letters',
                                            train=True,
                                            download=True,
                                            transform=transforms.ToTensor())

test_dataset = torchvision.datasets.EMNIST(root='./data',
                                           split='letters',
                                           train=False,
                                           download=True,
                                           transform=transforms.ToTensor())

In [6]:
# proses pengelompokan data
train_loader = DataLoader(dataset=train_dataset,
                          batch_size=batch_size,
                          shuffle=True)

test_loader = DataLoader(dataset=test_dataset,
                         batch_size=batch_size,
                         shuffle=False)

# LSTM

In [7]:
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(LSTMModel, self).__init__()
        
        # Layer LSTM dengan batch_first=True berarti input berbentuk (batch, seq, feature)
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        
        # Layer fully connected untuk mengubah output LSTM menjadi prediksi kelas
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        # Proses LSTM, mengabaikan hidden state (_, _)
        out, _ = self.lstm(x)
        
        # Ambil output dari sequence terakhir dan klasifikasikan
        out = self.fc(out[:, -1, :])
        return out

**Note:**

** Inisialisasi model LSTM untuk klasifikasi huruf.**
        
        Args:
            input_size: ukuran input per timestep (28 piksel per baris)
            hidden_size: jumlah unit hidden dalam LSTM
            num_layers: jumlah layer LSTM yang ditumpuk
            num_classes: jumlah kelas output (26 huruf A-Z)


**Forward pass model.**
        
        Args:
            x: input tensor dengan bentuk (batch_size, sequence_length, input_size)
        
        Returns:
            tensor output dengan bentuk (batch_size, num_class

In [8]:
model = LSTMModel(input_size, hidden_size, num_layers, num_classes).to(device)

In [9]:
# mengukur seberapa jauh hasil prediksi model dari label yang seharusnya
criterion = nn.CrossEntropyLoss()

# menagtur dan melakukanperubahan pada bobot di dalam model berdasarkan loss pada langkah sebelumnya
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) 

In [10]:
# Proses pelatihan model
total_step = len(train_loader)

for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        # mengubah bentuk data menjadi (batch_size, sequence_length, input_size)
        images = images.squeeze(1).to(device)  
        labels = labels.to(device)
        labels = labels - 1  # menyesuaikan label ke rentang dari 0-25
        
        # Forward pass - lakukan prediksi
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # Backward dan optimisasi - update bobot model
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (i+1) % 100 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{total_step}], Loss: {loss.item():.4f}')
    
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [1/10], Step [100/624], Loss: 1.5526
Epoch [1/10], Step [200/624], Loss: 0.9493
Epoch [1/10], Step [300/624], Loss: 0.5976
Epoch [1/10], Step [400/624], Loss: 0.6800
Epoch [1/10], Step [500/624], Loss: 0.6289
Epoch [1/10], Step [600/624], Loss: 0.5845
Epoch [1/10], Loss: 0.4652
Epoch [2/10], Step [100/624], Loss: 0.4356
Epoch [2/10], Step [200/624], Loss: 0.3397
Epoch [2/10], Step [300/624], Loss: 0.3643
Epoch [2/10], Step [400/624], Loss: 0.3284
Epoch [2/10], Step [500/624], Loss: 0.4011
Epoch [2/10], Step [600/624], Loss: 0.2830
Epoch [2/10], Loss: 0.2921
Epoch [3/10], Step [100/624], Loss: 0.2963
Epoch [3/10], Step [200/624], Loss: 0.2827
Epoch [3/10], Step [300/624], Loss: 0.4354
Epoch [3/10], Step [400/624], Loss: 0.2626
Epoch [3/10], Step [500/624], Loss: 0.3485
Epoch [3/10], Step [600/624], Loss: 0.3106
Epoch [3/10], Loss: 0.2037
Epoch [4/10], Step [100/624], Loss: 0.2527
Epoch [4/10], Step [200/624], Loss: 0.3568
Epoch [4/10], Step [300/624], Loss: 0.2303
Epoch [4/10], St

**Note:** 

Fugsi code "images.squeeze(1).to(device)" yaitu mengubah data kedalam bentuk sequence, dimana LSTM membutuhkan inputan dalam bentuk (batch_size, sequence_length, input_size). penjelasan detailnya yaitu awal mula bentuk inputan yaitu 

[batch_size, 1, 28, 28] dirubah menjadi [batch_size, 28, 28].

In [11]:
# Set model to evaluation mode
model.eval()

# Initialize variables for accuracy calculation
n_correct = 0
n_samples = 0

# No need to track gradients during testing
with torch.no_grad():
    for images, labels in test_loader:
        # Reshape and move data to device
        images = images.squeeze(1).to(device)
        labels = labels.to(device)
        labels = labels - 1  # Adjust labels to 0-25 range
        
        # Forward pass
        outputs = model(images)
        
        # Get predictions
        _, predicted = torch.max(outputs.data, 1)
        
        # Update counts
        n_samples += labels.size(0)
        n_correct += (predicted == labels).sum().item()

# Calculate accuracy
acc = 100.0 * n_correct / n_samples
print(f'Accuracy of the model on the test images: {acc:.2f}%')

Accuracy of the model on the test images: 92.84%


# RNN

In [24]:
input_size_rnn = 28           
sequence_length_rnn = 28      
hidden_size_rnn = 130         
num_layers_rnn = 2            
num_classes_rnn = 26          # jumlah huruf (A-Z)
learning_rate_rnn = 0.001     
batch_size_rnn = 200          
num_epochs_rnn= 5   

In [25]:
class RNN(nn.Module):
    def __init__(self, input_size_rnn, hidden_size_rnn, num_layers_rnn, _rnn):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size_rnn
        self.num_layers = num_layers_rnn
        
        self.rnn = nn.RNN(input_size_rnn, hidden_size_rnn, num_layers_rnn, batch_first=True)
        self.fc = nn.Linear(hidden_size_rnn, num_classes_rnn)

    def forward(self, x):
        # Menetapkan kondisi awal hidden state
        # x: (n, 28, 28), h0: (2, n, 128)
        # dimana n adalah ukuran batch, 28x28 adalah ukuran gambar, dan 128 adalah hidden size
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        
        # Melakukan propagasi maju pada RNN
        # out: (n, 28, 128)
        # output berukuran batch_size x sequence_length x hidden_size
        out, _ = self.rnn(x, h0)
        
        # Mengambil hidden state dari langkah terakhir
        # out: (n, 128)
        # mengambil hasil terakhir dari sequence
        out = out[:, -1, :]
         
        # out: (n, 10)
        # melakukan klasifikasi menggunakan fully connected layer
        out = self.fc(out)
        return out


In [26]:
model_rnn = RNN(input_size_rnn, hidden_size_rnn, num_layers_rnn, num_classes_rnn).to(device)

In [27]:
# # mengukur seberapa jauh hasil prediksi model dari label yang seharusnya
# criterion = nn.CrossEntropyLoss()

# menagtur dan melakukanperubahan pada bobot di dalam model berdasarkan loss pada langkah sebelumnya
optimizer_rnn = torch.optim.Adam(model_rnn.parameters(), lr=learning_rate)  

In [28]:
# Melatih model
n_total_steps_rnn = len(train_loader)
for epoch_rnn in range(num_epochs_rnn):
    for i, (images_rnn, labels_rnn) in enumerate(train_loader):  
        # Mengubah bentuk gambar dari [N, 1, 28, 28] menjadi [N, 28, 28]
        # agar sesuai dengan input RNN
        images_rnn = images_rnn.reshape(-1, sequence_length_rnn, input_size_rnn).to(device)
        labels_rnn = labels_rnn.to(device)
        labels_rnn = labels_rnn - 1 
        
        # Melakukan prediksi dengan model
        outputs_rnn = model_rnn(images_rnn)
        loss_rnn = criterion(outputs_rnn, labels_rnn)
        
        # Memperbarui bobot model
        optimizer_rnn.zero_grad()
        loss_rnn.backward()
        optimizer_rnn.step()
        
        # Menampilkan hasil pelatihan setiap 100 langkah
        if (i+1) % 100 == 0:
            print (f'Epoch [{epoch_rnn+1}/{num_epochs_rnn}], Step [{i+1}/{n_total_steps_rnn}], Loss: {loss_rnn.item():.4f}')

Epoch [1/5], Step [100/624], Loss: 2.0267
Epoch [1/5], Step [200/624], Loss: 1.7708
Epoch [1/5], Step [300/624], Loss: 1.6549
Epoch [1/5], Step [400/624], Loss: 1.3486
Epoch [1/5], Step [500/624], Loss: 1.1391
Epoch [1/5], Step [600/624], Loss: 1.0421
Epoch [2/5], Step [100/624], Loss: 0.9473
Epoch [2/5], Step [200/624], Loss: 0.8995
Epoch [2/5], Step [300/624], Loss: 0.8069
Epoch [2/5], Step [400/624], Loss: 0.8134
Epoch [2/5], Step [500/624], Loss: 0.7090
Epoch [2/5], Step [600/624], Loss: 0.7328
Epoch [3/5], Step [100/624], Loss: 0.6717
Epoch [3/5], Step [200/624], Loss: 0.6390
Epoch [3/5], Step [300/624], Loss: 0.7511
Epoch [3/5], Step [400/624], Loss: 0.5626
Epoch [3/5], Step [500/624], Loss: 0.5801
Epoch [3/5], Step [600/624], Loss: 0.5822
Epoch [4/5], Step [100/624], Loss: 0.6321
Epoch [4/5], Step [200/624], Loss: 0.4542
Epoch [4/5], Step [300/624], Loss: 0.5587
Epoch [4/5], Step [400/624], Loss: 0.5644
Epoch [4/5], Step [500/624], Loss: 0.6599
Epoch [4/5], Step [600/624], Loss:

In [29]:
# Test model
# Dalam fase testing,  tidak perlu menghitung gradien (untuk menghemat memori)
with torch.no_grad():

    # menghitung jumlah prediksi yang benar dan menghitung total sampel yang diuji
    n_correct_rnn = 0  
    n_samples_rnn = 0  

    for images_rnn, labels_rnn in test_loader:
        # Mengubah bentuk gambar agar sesuai dengan input RNN
        images_rnn = images_rnn.reshape(-1, sequence_length_rnn, input_size_rnn).to(device)
        labels_rnn = labels_rnn.to(device)
        labels_rnn = labels_rnn - 1 
        
        # Melakukan prediksi dengan model
        outputs_rnn = model_rnn(images_rnn)
        
        # Mengambil nilai prediksi tertinggi sebagai hasil prediksi
        _, predicted_rnn = torch.max(outputs_rnn.data, 1)
        
        # Menghitung total sampel dan prediksi yang benar
        n_samples_rnn += labels_rnn.size(0)
        n_correct_rnn += (predicted_rnn == labels_rnn).sum().item()

    # Menghitung akurasi dalam persen
    acc = 100.0 * n_correct_rnn / n_samples_rnn
    print(f'Akurasi model pada 10000 gambar test: {acc} %')


Akurasi model pada 10000 gambar test: 84.98076923076923 %


# GRU

In [30]:
class GRUU(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(GRUU, self).__init__()
        
        # Layer GRU dengan batch_first=True berarti input berbentuk (batch, seq, feature)
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)
        
        # Layer fully connected untuk mengubah output GRU menjadi prediksi kelas
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        # Proses GRU, mengabaikan hidden state (_, _)
        out, _ = self.gru(x)
        
        # Ambil output dari sequence terakhir dan klasifikasikan
        out = self.fc(out[:, -1, :])
        return out

In [32]:
model_gru = LSTMModel(input_size, hidden_size, num_layers, num_classes).to(device)

In [33]:
# menagtur dan melakukanperubahan pada bobot di dalam model berdasarkan loss pada langkah sebelumnya
optimizer_gru = torch.optim.Adam(model_gru.parameters(), lr=learning_rate) 

In [34]:
# Proses pelatihan model
total_step_gru = len(train_loader)

for epoch_gru in range(num_epochs):
    for i, (images_gru, labels_gru) in enumerate(train_loader):
        # mengubah bentuk data menjadi (batch_size, sequence_length, input_size)
        images_gru = images_gru.squeeze(1).to(device)  
        labels_gru = labels_gru.to(device)
        labels_gru = labels_gru - 1  # menyesuaikan label ke rentang dari 0-25
        
        # Forward pass - lakukan prediksi
        outputs_gru = model_gru(images_gru)
        loss_gru = criterion(outputs_gru, labels_gru)
        
        # Backward dan optimisasi - update bobot model
        optimizer_gru.zero_grad()
        loss_gru.backward()
        optimizer_gru.step()
        
        if (i+1) % 100 == 0:
            print(f'Epoch [{epoch_gru+1}/{num_epochs}], Step [{i+1}/{total_step_gru}], Loss: {loss_gru.item():.4f}')
    
    print(f'Epoch [{epoch_gru+1}/{num_epochs}], Loss: {loss_gru.item():.4f}')


Epoch [1/10], Step [100/624], Loss: 1.7655
Epoch [1/10], Step [200/624], Loss: 1.1046
Epoch [1/10], Step [300/624], Loss: 0.7458
Epoch [1/10], Step [400/624], Loss: 0.7427
Epoch [1/10], Step [500/624], Loss: 0.5491
Epoch [1/10], Step [600/624], Loss: 0.4781
Epoch [1/10], Loss: 0.3915
Epoch [2/10], Step [100/624], Loss: 0.5722
Epoch [2/10], Step [200/624], Loss: 0.3852
Epoch [2/10], Step [300/624], Loss: 0.4179
Epoch [2/10], Step [400/624], Loss: 0.3614
Epoch [2/10], Step [500/624], Loss: 0.2472
Epoch [2/10], Step [600/624], Loss: 0.4895
Epoch [2/10], Loss: 0.3234
Epoch [3/10], Step [100/624], Loss: 0.2880
Epoch [3/10], Step [200/624], Loss: 0.3080
Epoch [3/10], Step [300/624], Loss: 0.2358
Epoch [3/10], Step [400/624], Loss: 0.2892
Epoch [3/10], Step [500/624], Loss: 0.2910
Epoch [3/10], Step [600/624], Loss: 0.2136
Epoch [3/10], Loss: 0.2367
Epoch [4/10], Step [100/624], Loss: 0.3249
Epoch [4/10], Step [200/624], Loss: 0.2566
Epoch [4/10], Step [300/624], Loss: 0.1935
Epoch [4/10], St

In [35]:
# Test the model
with torch.no_grad():
    n_correct_gru = 0
    n_samples_gru = 0
    for images_gru, labels_gru in test_loader:
        images_gru = images_gru.squeeze(1).to(device)
        labels_gru = labels_gru.to(device)
        labels_gru = labels_gru - 1
        
        outputs_gru = model_gru(images_gru)
        _, predicted_gru = torch.max(outputs_gru.data, 1)
        
        n_samples_gru += labels_gru.size(0)
        n_correct_gru += (predicted_gru == labels_gru).sum().item()

    acc_gru = 100.0 * n_correct_gru / n_samples_gru
    print(f'Accuracy of the GRU model: {acc_gru:.2f}%')

Accuracy of the GRU model: 93.08%


# Kesimpulan

- Data yang digunakan pada kasus ini yaitu EMNIST yaitu data huruf. untuk sumber data sendiri saya menggunakan data dari dataset EMNIST yang telah tersedia pada torchvision.

- Dampak yang terjadi akibat pergantian dataset yaitu terdapat pada hyperparameter, dimana setiap nilai hyperparameter diatur kembali untuk menyesuaikan dengan dataset yang ada, baik itu dari jumlah dataset, jenis dataset atauapun hal lainnya. selain itu dampah terhadap peforma adalah pada model RNN akurasi yang didapatkan yaitu 84% dengan epoch 5, sedangkan untuk LSTM mendapatkan akurasi 92.84% dengan epoch 10 dengan nilai epoch yang turun secara signifikan dan tidak mengalami kenaikan yang signifikan kembali, dan model GRU mendapatkan akurasi sebesar 93% dnegan total epoch yaitu sama seperti LSTM 10.

- tidak terdapat kendala dalam modifikasi kode, karena tidak banyak terdapat perubahan dalam kode sebelumnya. perubahan hanya terjadi pada saat pelatihan dan testing model. dimana terdapat tambahan code yaitu "labels = labels - 1" hal ini berfungsi untuk menyesuaikan range data dimana mulai dari 0-25

