In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, random_split
from torchsummary import summary
import matplotlib.pyplot as plt
from torchvision.datasets import CIFAR10
from torchvision import models



In [3]:

# Set random seed for reproducibility
torch.manual_seed(42)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

# Define transforms
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

# Load CIFAR-10 dataset
dataset = CIFAR10(root='./data', train=True, download=True, transform=transform)

# Define the data split
train_len = int(0.007 * len(dataset))
val_len = int(0.001 * len(dataset))
test_len = len(dataset) - train_len - val_len

train_dataset, val_dataset, test_dataset = random_split(dataset, [train_len, val_len, test_len])

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4)



Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:12<00:00, 14169004.58it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data




In [4]:
# Define the GRU model
class GRUModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(GRUModel, self).__init__()
        self.gru = nn.GRU(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        out, _ = self.gru(x)
        out = self.fc(out[:, -1, :])
        return out

In [5]:
# Reshape the input data for the GRU model
sequence_length = 224 * 224  # Flatten the image into a sequence
input_size = 3  # Assuming 3 channels for CIFAR-10 images
hidden_size = 64
output_size = 10  # Number of classes in CIFAR-10
gru_model = GRUModel(input_size, hidden_size, output_size)

# Print model summary
summary(gru_model, (sequence_length, input_size))



----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
               GRU-1  [[-1, 50176, 64], [-1, 2, 64]]               0
            Linear-2                   [-1, 10]             650
Total params: 650
Trainable params: 650
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.57
Forward/backward pass size (MB): 3136.00
Params size (MB): 0.00
Estimated Total Size (MB): 3136.58
----------------------------------------------------------------


In [None]:
# Training loop for 2 epochs
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(gru_model.parameters(), lr=0.001, momentum=0.9)

for epoch in range(2):
    gru_model.train()
    running_loss = 0.0

    for inputs, labels in train_loader:
        # Reshape the inputs to match the GRU model's expectations
        inputs = inputs.view(-1, sequence_length, input_size)

        optimizer.zero_grad()
        outputs = gru_model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f"Epoch {epoch + 1}/2, Training Loss: {running_loss / len(train_loader)}")




Epoch 1/2, Training Loss: 2.310733925212513




In [None]:

    # Validation
    gru_model.eval()
    val_loss = sum(criterion(gru_model(inputs.view(-1, sequence_length, input_size)), labels).item() for inputs, labels in val_loader) / len(val_loader)
    print(f"Epoch {epoch + 1}/2, Validation Loss: {val_loss}")

print("Training complete.")

In [10]:

# Define your LSTM model
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.fc(out[:, -1, :])
        return out



In [11]:
# Create an instance of your LSTM model
input_size = 10
hidden_size = 20
num_layers = 2
output_size = 5

lstm_model = LSTMModel(input_size, hidden_size, num_layers, output_size)

# Print model summary
summary(lstm_model, (sequence_length, input_size))


AttributeError: ignored

In [None]:
# Training loop for 2 epochs
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(lstm_model.parameters(), lr=0.001, momentum=0.9)

for epoch in range(2):  # Train for 2 epochs as in your example
    lstm_model.train()
    running_loss = 0.0

    for inputs, labels in train_loader:
        # Reshape the inputs to match the LSTM model's expectations
        inputs = inputs.view(-1, sequence_length, input_size)

        optimizer.zero_grad()
        outputs = lstm_model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f"Epoch {epoch + 1}/2, Training Loss: {running_loss / len(train_loader)}")


In [None]:
 # Validation
    lstm_model.eval()
    val_loss = sum(criterion(lstm_model(inputs.view(-1, sequence_length, input_size)), labels).item() for inputs, labels in val_loader) / len(val_loader)
    print(f"Epoch {epoch + 1}/2, Validation Loss: {val_loss}")

print("Training complete.")