In [45]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torchvision.datasets import MNIST
from torch.utils.data import DataLoader, random_split

class MLP(nn.Module):
    def __init__(self, input_size, hidden_sizes, output_size):
        super(MLP, self).__init__()
        self.network = nn.Sequential(
            nn.Linear(input_size, hidden_sizes[0]),
            nn.ReLU(),
            nn.Linear(hidden_sizes[0], hidden_sizes[1]),
            nn.ReLU(),
            nn.Linear(hidden_sizes[1], output_size)
            nn.Softmax(dim=1)
        )
    
    def forward(self, x):
        x = x.view(x.size(0), -1)  
        return self.network(x)

# Model instantiation for MNIST
input_size = 28*28  
hidden_sizes = [128, 64]
output_size = 10 

# usage for MNIST
# Data loading and preprocessing
dataset = MNIST(root='.', train=True, transform=transforms.ToTensor(), download=True)
train_size = int(0.9 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# Model, Loss, and Optimizer
model = MLP(input_size, hidden_sizes, output_size) 
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training and Validation
num_epochs = 50

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        
        images = images.view(images.size(0), -1)
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    # Validation loss
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for images, labels in val_loader:
            
            images = images.view(images.size(0), -1)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

    print(f'Epoch {epoch+1}, Training Loss: {running_loss/len(train_loader)}, Validation Loss: {val_loss/len(val_loader)}')

Epoch 1, Training Loss: 0.2975421574654199, Validation Loss: 0.15366305049231394
Epoch 2, Training Loss: 0.12488181145737054, Validation Loss: 0.12079703886596922
Epoch 3, Training Loss: 0.08527204241593682, Validation Loss: 0.11255587978991977
Epoch 4, Training Loss: 0.06345646022809327, Validation Loss: 0.09809952446487435
Epoch 5, Training Loss: 0.0501909196491871, Validation Loss: 0.1089514776633339
Epoch 6, Training Loss: 0.04110282563563, Validation Loss: 0.10506006036272045
Epoch 7, Training Loss: 0.033241383601354875, Validation Loss: 0.0973052381242714
Epoch 8, Training Loss: 0.028662993890535823, Validation Loss: 0.10053190004080224
Epoch 9, Training Loss: 0.024098262809313976, Validation Loss: 0.11193243389417593
Epoch 10, Training Loss: 0.021398113057965976, Validation Loss: 0.10931122344720975
Epoch 11, Training Loss: 0.01819633591454814, Validation Loss: 0.12424562143873931
Epoch 12, Training Loss: 0.01836483513767758, Validation Loss: 0.11629678114746261
Epoch 13, Traini

In [46]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torchvision.datasets import MNIST
from torch.utils.data import DataLoader, random_split

class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.cnn_model = nn.Sequential(
            nn.Conv2d(1, 6, 5),  
            nn.ReLU(),
            nn.AvgPool2d(2, stride=2),  
            nn.Conv2d(6, 16, 5),
            nn.ReLU(),
            nn.AvgPool2d(2, stride=2)
        )
        self.fc_model = nn.Sequential(
            nn.Linear(256, 120),
            nn.ReLU(),
            nn.Linear(120, 84),
            nn.ReLU(),
            nn.Linear(84, 10)
        )
        
    def forward(self, x):
        x = self.cnn_model(x)
        x = x.view(x.size(0), -1)  
        x = self.fc_model(x)
        return x

dataset = MNIST(root='.', train=True, transform=transforms.ToTensor(), download=True)
train_size = int(0.9 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# Model, Loss, and Optimizer
model = LeNet()  
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training and Validation
num_epochs = 50

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    # Validation loss
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for images, labels in val_loader:
            
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

    print(f'Epoch {epoch+1}, Training Loss: {running_loss/len(train_loader)}, Validation Loss: {val_loss/len(val_loader)}')

Epoch 1, Training Loss: 0.3304752808089602, Validation Loss: 0.16701227325470525
Epoch 2, Training Loss: 0.1030746514094338, Validation Loss: 0.08519981270457835
Epoch 3, Training Loss: 0.06965394085483494, Validation Loss: 0.07792720769004441
Epoch 4, Training Loss: 0.05478341951713179, Validation Loss: 0.07124579050070408
Epoch 5, Training Loss: 0.04624792418690049, Validation Loss: 0.05473621449671052
Epoch 6, Training Loss: 0.037443981970772734, Validation Loss: 0.052599341150353404
Epoch 7, Training Loss: 0.030259294712445872, Validation Loss: 0.05736102719187174
Epoch 8, Training Loss: 0.026605457575488753, Validation Loss: 0.06313925595276622
Epoch 9, Training Loss: 0.023263377666541917, Validation Loss: 0.06297933396318184
Epoch 10, Training Loss: 0.02230519692158928, Validation Loss: 0.059148587802886315
Epoch 11, Training Loss: 0.01803589461899593, Validation Loss: 0.0574143065557806
Epoch 12, Training Loss: 0.015772132393258705, Validation Loss: 0.07723691984483594
Epoch 13,

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torchvision.datasets import MNIST
from torch.utils.data import DataLoader, random_split

class FlexNet(nn.Module):
    def __init__(self, input_channels=1):
        super(FlexNet, self).__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(input_channels, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.BatchNorm2d(64),
            nn.Conv2d(64, 128, kernel_size=3, padding=2, dilation=2),
            nn.ReLU(),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.BatchNorm2d(128)
        )
        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128 * 7 * 7, 512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(256, 10),
            nn.LogSoftmax(dim=1)
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = self.fc_layers(x)
        return x

def train_model(dataset_class, input_channels):
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ])
    dataset = dataset_class(root='.', train=True, download=True, transform=transform)
    train_size = int(0.9 * len(dataset))
    val_size = len(dataset) - train_size
    train_dataset, val_dataset = random_split(dataset, [train_size, val_size])
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

    model = FlexNet(input_channels=input_channels)
    criterion = nn.NLLLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    num_epochs = 50
    for epoch in range(num_epochs):
        total_train_loss = 0
        total_val_loss = 0
        model.train()
        for images, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            total_train_loss += loss.item()

        model.eval()
        with torch.no_grad():
            for images, labels in val_loader:
                outputs = model(images)
                loss = criterion(outputs, labels)
                total_val_loss += loss.item()

        print(f'Epoch {epoch+1}: Training Loss: {total_train_loss / len(train_loader)}, Validation Loss: {total_val_loss / len(val_loader)}')

train_model(MNIST, 1)

Epoch 1: Training Loss: 0.14511992530353632, Validation Loss: 0.04739575870836872
Epoch 2: Training Loss: 0.07340626115999935, Validation Loss: 0.06710829718534651
Epoch 3: Training Loss: 0.058841791262323306, Validation Loss: 0.042739348355191754
Epoch 4: Training Loss: 0.051676422281884106, Validation Loss: 0.04153339621272641
Epoch 5: Training Loss: 0.041697825035294206, Validation Loss: 0.04126557059193555
