In [21]:
import torch.nn
from datetime import datetime
import torchvision.models as models
import torch.nn.functional as F



In [23]:
import torch
from torchvision import datasets, transforms

def load_original_data_pytorch(path):
    # Define a transform to normalize the data
    transform = transforms.Compose([transforms.ToTensor(),
                                    transforms.Normalize((0.5,), (0.5,))])
    
    # Download and load the training data
    trainset = datasets.MNIST(path, download=True, train=True, transform=transform)
    train_loader = torch.utils.data.DataLoader(trainset, batch_size=256, shuffle=True)

    # Download and load the test data
    testset = datasets.MNIST(path, download=True, train=False, transform=transform)
    test_loader = torch.utils.data.DataLoader(testset, batch_size=256, shuffle=True)

    return train_loader, test_loader
path = 'D:\\Casper\\OTHER\\Data\\MNIST_data'
train_loader, test_loader = load_original_data_pytorch(path)
loaders = {
    'train': train_loader,
    'test': test_loader
}


In [10]:
def pprint(output = '\n', show_time = False): # print and fprint at the same time
    filename = "hw2-1.txt"
    print(output)
    with open(filename, 'a') as f:
        if show_time:
            f.write(datetime.now().strftime("[%Y-%m-%d %H:%M:%S] "))

        f.write(str(output))
        f.write('\n')
pprint("build function", True)

build function


In [11]:
def count_parameters(model):
    total_num = 0

    for parameter in model.parameters():
        if parameter.requires_grad:
            total_num += parameter.numel() 
    return total_num


In [27]:
import time
import torchvision.models as models
from torch import nn, optim
from tqdm import tqdm
def train(model, model_name):
    pprint(f"test {model_name}", True)
    model_parameters_amount = count_parameters(model)
    pprint(f"model total parameters: {model_parameters_amount:,}")

    model = model.cuda()
    criterion = nn.CrossEntropyLoss()
    lr= 0.005
    optimizer = optim.Adam(model.parameters(), lr=lr)
    pprint(f"learning rate={lr}")
    iteration = 0
    epochs = 20
    start = time.time()
    phases = ['train', 'test']
    for epoch in range(epochs):
        for phase in phases:
            running_loss = 0.0
            correct_predictions = 0
            correct_top3_predictions = 0
            total_samples = 0
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode
            for images, labels in tqdm(loaders[phase]): # Iterate over data.
                images, labels = images.cuda(), labels.cuda()
                outputs = model(images)
                loss = criterion(outputs, labels)

                with torch.set_grad_enabled(phase == 'train'):
                    if phase == 'train': # backward + optimize only if in training phase
                        optimizer.zero_grad()
                        loss.backward()
                        optimizer.step()
                running_loss += loss.item()

                # Convert outputs to predicted class by selecting the class with the highest score
                _, predicted = torch.max(outputs, 1)
                # Accumulate the number of correct predictions
                correct_predictions += (predicted == labels).sum().item()
                
                _, top3_preds = outputs.topk(3, 1, True, True)
                correct_top3_predictions += sum([labels[i] in top3_preds[i] for i in range(labels.size(0))])

                total_samples += labels.size(0)
                iteration += 1
                # if iteration % 20 == 0:
                #     print(iteration)
            avg_loss = running_loss / total_samples
            top1_accuracy = correct_predictions / total_samples * 100
            top3_accuracy = correct_top3_predictions / total_samples * 100
            pprint(f"Epoch [{epoch+1}/{epochs}], phase: {phase}, samples: {total_samples}, Loss: {avg_loss:.4f}, Top-1 Accuracy: {top1_accuracy:.2f}%, Top-3 Accuracy: {top3_accuracy:.2f}%")
    end = time.time()
    duration = end - start
    pprint(f"Elapsed time: {duration} seconds")
    model_scripted = torch.jit.script(model) # Export to TorchScript
    model_scripted.save(f'{model_name}.pt') # Save
    pprint(f"weight saved as: {model_name}.pt")   


In [25]:
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(784, 128)  # First layer: 784 input features, 128 output features
        self.fc2 = nn.Linear(128, 64)   # Second layer: 128 input features, 64 output features
        self.fc3 = nn.Linear(64, 10)    # Final layer: 64 input features, 10 output features (digits 0-9)

    def forward(self, x):
        x = x.view(-1, 784)  # Flatten the input tensor
        x = F.relu(self.fc1(x))  # Apply ReLU non-linearity after first layer
        x = F.relu(self.fc2(x))  # Apply ReLU non-linearity after second layer
        x = self.fc3(x)  # No non-linearity after final layer
        return F.log_softmax(x, dim=1)  # Apply log-softmax to output for classification


In [None]:
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64 * 7 * 7, 64)  # The image size is reduced to 7x7 after pooling layers
        self.fc2 = nn.Linear(64, 10)  # 10 output classes

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))  # Convolution -> ReLU -> Pooling
        x = self.pool(F.relu(self.conv2(x)))  # Convolution -> ReLU -> Pooling
        x = torch.flatten(x, 1)  # Flatten
        x = F.relu(self.fc1(x))  # Dense layer -> ReLU
        x = self.fc2(x)  # Output layer
        return F.log_softmax(x, dim=1)  # Log Softmax activation for the output


In [28]:
model_list = [
    SimpleNN(),
    SimpleCNN(),
]

model_name = [
    "SimpleNN",
    "SimpleCNN",
]
for ii in range(len(model_name)):
    train(model_list[ii], model_name[ii])

test SimpleNN
model total parameters: 109,386
learning rate=0.005


100%|██████████| 235/235 [00:12<00:00, 18.31it/s]


Epoch [1/20], phase: train, samples: 60000, Loss: 0.0017, Top-1 Accuracy: 86.47%, Top-3 Accuracy: 96.00%


100%|██████████| 40/40 [00:02<00:00, 19.57it/s]


Epoch [1/20], phase: test, samples: 10000, Loss: 0.0009, Top-1 Accuracy: 93.02%, Top-3 Accuracy: 99.05%


100%|██████████| 235/235 [00:12<00:00, 18.45it/s]


Epoch [2/20], phase: train, samples: 60000, Loss: 0.0007, Top-1 Accuracy: 94.18%, Top-3 Accuracy: 99.11%


100%|██████████| 40/40 [00:01<00:00, 21.86it/s]


Epoch [2/20], phase: test, samples: 10000, Loss: 0.0006, Top-1 Accuracy: 95.15%, Top-3 Accuracy: 99.45%


100%|██████████| 235/235 [00:12<00:00, 19.09it/s]


Epoch [3/20], phase: train, samples: 60000, Loss: 0.0006, Top-1 Accuracy: 95.53%, Top-3 Accuracy: 99.45%


100%|██████████| 40/40 [00:01<00:00, 20.42it/s]


Epoch [3/20], phase: test, samples: 10000, Loss: 0.0005, Top-1 Accuracy: 96.24%, Top-3 Accuracy: 99.53%


100%|██████████| 235/235 [00:12<00:00, 18.25it/s]


Epoch [4/20], phase: train, samples: 60000, Loss: 0.0005, Top-1 Accuracy: 96.42%, Top-3 Accuracy: 99.63%


100%|██████████| 40/40 [00:01<00:00, 20.69it/s]


Epoch [4/20], phase: test, samples: 10000, Loss: 0.0006, Top-1 Accuracy: 95.94%, Top-3 Accuracy: 99.44%


100%|██████████| 235/235 [00:12<00:00, 18.10it/s]


Epoch [5/20], phase: train, samples: 60000, Loss: 0.0004, Top-1 Accuracy: 96.60%, Top-3 Accuracy: 99.62%


100%|██████████| 40/40 [00:02<00:00, 19.94it/s]


Epoch [5/20], phase: test, samples: 10000, Loss: 0.0006, Top-1 Accuracy: 95.55%, Top-3 Accuracy: 99.31%


100%|██████████| 235/235 [00:13<00:00, 17.93it/s]


Epoch [6/20], phase: train, samples: 60000, Loss: 0.0004, Top-1 Accuracy: 97.05%, Top-3 Accuracy: 99.72%


100%|██████████| 40/40 [00:01<00:00, 21.86it/s]


Epoch [6/20], phase: test, samples: 10000, Loss: 0.0004, Top-1 Accuracy: 96.72%, Top-3 Accuracy: 99.58%


 70%|███████   | 165/235 [00:09<00:04, 17.37it/s]


KeyboardInterrupt: 