<a href="https://colab.research.google.com/github/anandt555/BigData-Practice/blob/main/updated_mixup.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Mixup with MNIST**

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import numpy as np

# Define the mixup function for MNIST
def mixup_data(x, y, alpha=0.1):
    lam = np.random.beta(alpha, alpha)
    batch_size = x.size()[0]
    index = torch.randperm(batch_size)
    mixed_x = lam * x + (1 - lam) * x[index, :]

    # Convert labels to one-hot encoding
    y_onehot = torch.zeros(y.size(0), 10)
    y_onehot.scatter_(1, y.view(-1, 1), 1)
    y[index] = y[index].long()

    mixed_y = lam * y_onehot + (1 - lam) * y_onehot[index, :]

    return mixed_x, mixed_y

# Create LeNet model for MNIST
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 20, kernel_size=5, stride=1, padding=0)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(20, 50, kernel_size=5, stride=1, padding=0)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(4*4*50, 500)
        self.fc2 = nn.Linear(500, 10)

    def forward(self, x):
        x = self.pool1(torch.relu(self.conv1(x)))
        x = self.pool2(torch.relu(self.conv2(x)))
        x = x.view(-1, 4*4*50)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Download and prepare MNIST dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

train_dataset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=2)

# Initialize the model, loss function, and optimizer for MNIST
model_mixup = LeNet()
model_no_mixup = LeNet()

criterion = nn.CrossEntropyLoss()
optimizer_mixup = optim.SGD(model_mixup.parameters(), lr=0.005, momentum=0.9)
optimizer_no_mixup = optim.SGD(model_no_mixup.parameters(), lr=0.001, momentum=0.9)

# Training and testing loop without mixup
print("Training without mixup:")
for epoch in range(10):
    model_no_mixup.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data
        optimizer_no_mixup.zero_grad()
        outputs = model_no_mixup(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer_no_mixup.step()

        running_loss += loss.item()

        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()


    print('epoch %d - training accuracy: %2.4f' %
    (epoch+1, correct / total))
        # if i % 200 == 199:  # Print every 200 batches
        #     print(f"[Epoch {epoch + 1}, Batch {i + 1}] Loss: {running_loss / 200:.3f} | Accuracy: {100 * correct / total:.2f}%")
        #     running_loss = 0.0

print("Finished Training without mixup")

# Testing loop without mixup
model_no_mixup.eval()
correct = 0
total = 0
with torch.no_grad():
        for data in test_loader:
            inputs, labels = data
            outputs = model_no_mixup(inputs)
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

print(f"Accuracy on test set without mixup: {100 * correct / total:.2f}%")

# Training and testing loop with mixup
print("\nTraining with mixup:")
for epoch in range(5):
    model_mixup.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data

        # Apply mixup
        inputs, labels = mixup_data(inputs, labels)

        optimizer_mixup.zero_grad()
        outputs = model_mixup(inputs)
        loss = criterion(outputs, torch.argmax(labels, dim=1).long())  # Ensure labels are long type
        loss.backward()
        optimizer_mixup.step()

        running_loss += loss.item()

        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(torch.argmax(labels, dim=1).long()).sum().item()  # Ensure labels are long type
    print('epoch %d - training accuracy: %2.4f' %
    (epoch+1, correct / total))
        # if i % 200 == 199:  # Print every 200 batches
        #     print(f"[Epoch {epoch + 1}, Batch {i + 1}] Loss: {running_loss / 200:.3f} | Accuracy: {100 * correct / total:.2f}%")
        #     running_loss = 0.0

print("Finished Training with mixup")

# Testing loop with mixup
model_mixup.eval()
correct = 0
total = 0
with torch.no_grad():
    for data in test_loader:
        inputs, labels = data
        outputs = model_mixup(inputs)
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

print(f"Accuracy on test set with mixup: {100 * correct / total:.2f}%")

Training without mixup:
epoch 1 - training accuracy: 0.7998
epoch 2 - training accuracy: 0.9517
epoch 3 - training accuracy: 0.9683
epoch 4 - training accuracy: 0.9751
epoch 5 - training accuracy: 0.9786
epoch 6 - training accuracy: 0.9817
epoch 7 - training accuracy: 0.9833
epoch 8 - training accuracy: 0.9854
epoch 9 - training accuracy: 0.9865
epoch 10 - training accuracy: 0.9871
Finished Training without mixup
Accuracy on test set without mixup: 98.72%

Training with mixup:
epoch 1 - training accuracy: 0.8869
epoch 2 - training accuracy: 0.9655
epoch 3 - training accuracy: 0.9737
epoch 4 - training accuracy: 0.9764
epoch 5 - training accuracy: 0.9828
Finished Training with mixup
Accuracy on test set with mixup: 98.97%


**Mixup with CIFAR10**

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import numpy as np

# Define the mixup function for CIFAR-10
def mixup_data(x, y, alpha=0.1):
    lam = np.random.beta(alpha, alpha)
    batch_size = x.size()[0]
    index = torch.randperm(batch_size)
    mixed_x = lam * x + (1 - lam) * x[index, :]

    # Convert labels to one-hot encoding
    y_onehot = torch.zeros(y.size(0), 10)
    y_onehot.scatter_(1, y.view(-1, 1), 1)
    y[index] = y[index].long()

    mixed_y = lam * y_onehot + (1 - lam) * y_onehot[index, :]

    return mixed_x, mixed_y

# Create Adjusted LeNet model for CIFAR-10
class AdjustedLeNetLike(nn.Module):
    def __init__(self):
        super(AdjustedLeNetLike, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=5, stride=1, padding=2)  # Adjust input channels
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=5, stride=1, padding=2)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(8 * 8 * 64, 512)  # Adjust the input size for fully connected layer
        self.fc2 = nn.Linear(512, 10)  # Adjust output classes

    def forward(self, x):
        x = self.pool1(torch.relu(self.conv1(x)))
        x = self.pool2(torch.relu(self.conv2(x)))
        x = x.view(-1, 8 * 8 * 64)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Download and prepare CIFAR-10 dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=2)

# Initialize the model, loss function, and optimizer for CIFAR-10
model_mixup = AdjustedLeNetLike()
model_no_mixup = AdjustedLeNetLike()

criterion = nn.CrossEntropyLoss()
optimizer_mixup = optim.SGD(model_mixup.parameters(), lr=0.005, momentum=0.9)
optimizer_no_mixup = optim.SGD(model_no_mixup.parameters(), lr=0.001, momentum=0.9)

# Training and testing loop without mixup
print("Training without mixup:")
for epoch in range(10):
    model_no_mixup.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data
        optimizer_no_mixup.zero_grad()
        outputs = model_no_mixup(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer_no_mixup.step()

        running_loss += loss.item()

        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
    print('epoch %d - training accuracy: %2.4f' %
    (epoch+1, correct / total))

print("Finished Training without mixup")

# Testing loop without mixup
model_no_mixup.eval()
correct = 0
total = 0
with torch.no_grad():
    for data in test_loader:
        inputs, labels = data
        outputs = model_no_mixup(inputs)
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

print(f"Accuracy on test set without mixup: {100 * correct / total:.2f}%")

# Training and testing loop with mixup
print("\nTraining with mixup:")
for epoch in range(5):
    model_mixup.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data

        # Apply mixup
        inputs, labels = mixup_data(inputs, labels)

        optimizer_mixup.zero_grad()
        outputs = model_mixup(inputs)
        loss = criterion(outputs, torch.argmax(labels, dim=1).long())  # Ensure labels are long type
        loss.backward()
        optimizer_mixup.step()

        running_loss += loss.item()

        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(torch.argmax(labels, dim=1).long()).sum().item()  # Ensure labels are long type

    print('epoch %d - training accuracy: %2.4f' %
    (epoch+1, correct / total))

print("Finished Training with mixup")

# Testing loop with mixup
model_mixup.eval()
correct = 0
total = 0
with torch.no_grad():
    for data in test_loader:
        inputs, labels = data
        outputs = model_mixup(inputs)
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

print(f"Accuracy on test set with mixup: {100 * correct / total:.2f}%")

Files already downloaded and verified
Files already downloaded and verified
Training without mixup:
epoch 1 - training accuracy: 0.2749
epoch 2 - training accuracy: 0.4263
epoch 3 - training accuracy: 0.4940
epoch 4 - training accuracy: 0.5340
epoch 5 - training accuracy: 0.5680
epoch 6 - training accuracy: 0.5982
epoch 7 - training accuracy: 0.6248
epoch 8 - training accuracy: 0.6478
epoch 9 - training accuracy: 0.6680
epoch 10 - training accuracy: 0.6868
Finished Training without mixup
Accuracy on test set without mixup: 66.13%

Training with mixup:
epoch 1 - training accuracy: 0.3906
epoch 2 - training accuracy: 0.5504
epoch 3 - training accuracy: 0.6287
epoch 4 - training accuracy: 0.6863
epoch 5 - training accuracy: 0.7274
Finished Training with mixup
Accuracy on test set with mixup: 71.29%
