<a href="https://colab.research.google.com/github/arzoo118/Robo-Speaker/blob/main/IMAGE_CLASSIFICATION.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

train_dataset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 52566465.56it/s]


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 1808913.39it/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 13414380.92it/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 4866035.45it/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw






In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Fully connected
import torch.nn as nn
import torch.nn.functional as F

class FullyConnectedNet(nn.Module):
    def __init__(self):
        super(FullyConnectedNet, self).__init__()
        self.fc1 = nn.Linear(28*28, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 10)

    def forward(self, x):
        x = x.view(-1, 28*28)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

fc_net = FullyConnectedNet()


# Hybrid

class HybridNet(nn.Module):
    def __init__(self):
        super(HybridNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.fc1 = nn.Linear(64*7*7, 256)
        self.fc2 = nn.Linear(256, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2)
        x = x.view(-1, 64*7*7)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

hybrid_net = HybridNet()

# CNN
class FullyConvolutionalNet(nn.Module):
    def __init__(self):
        super(FullyConvolutionalNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.conv4 = nn.Conv2d(128, 10, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(2, 2)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool(x)
        x = F.relu(self.conv2(x))
        x = self.pool(x)
        x = F.relu(self.conv3(x))
        x = self.pool(x)
        x = self.conv4(x)
        x = F.avg_pool2d(x, x.size()[2:])
        x = x.view(-1, 10)
        return x

conv_net = FullyConvolutionalNet()

In [None]:
#training
def train_model(model, train_loader, test_loader, num_epochs=2):
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for images, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}")

        # Validation
        model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for images, labels in test_loader:
                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        print(f"Accuracy: {100 * correct / total:.2f}%")


# Train FCN
train_model(fc_net, train_loader, test_loader)

# Train Hybrid
train_model(hybrid_net, train_loader, test_loader)

# Train CNN
train_model(conv_net, train_loader, test_loader)


def save_model(model, path):
    torch.save(model.state_dict(), path)

def load_model(model, path):
    model.load_state_dict(torch.load(path))
    return model

save_model(fc_net, 'fc_net.pth')
fc_net = load_model(FullyConnectedNet(), 'fc_net.pth')


# Fine-tuning function
def fine_tune_model(model, train_loader, test_loader, num_epochs=5):
    train_model(model, train_loader, test_loader, num_epochs)

fine_tune_model(fc_net, train_loader, test_loader)


Epoch [1/2], Loss: 0.2956
Accuracy: 94.89%
Epoch [2/2], Loss: 0.1330
Accuracy: 96.22%
Epoch [1/2], Loss: 0.1492
Accuracy: 98.51%
Epoch [2/2], Loss: 0.0436
Accuracy: 98.82%
Epoch [1/2], Loss: 0.2608
Accuracy: 97.54%
Epoch [2/2], Loss: 0.0616
Accuracy: 98.82%
Epoch [1/5], Loss: 0.1038
Accuracy: 96.92%
Epoch [2/5], Loss: 0.0834
Accuracy: 96.58%
Epoch [3/5], Loss: 0.0701
Accuracy: 97.52%
Epoch [4/5], Loss: 0.0589
Accuracy: 97.50%
Epoch [5/5], Loss: 0.0519
Accuracy: 97.32%


In [None]:
from sklearn.model_selection import KFold
import numpy as np

def k_fold_cross_val(model_class, dataset, k=5, num_epochs=2):
    kf = KFold(n_splits=k)
    results = []

    for train_index, test_index in kf.split(dataset):
        train_subset = torch.utils.data.Subset(dataset, train_index)
        test_subset = torch.utils.data.Subset(dataset, test_index)

        train_loader = DataLoader(train_subset, batch_size=64, shuffle=True)
        test_loader = DataLoader(test_subset, batch_size=64, shuffle=False)

        model = model_class()
        train_model(model, train_loader, test_loader, num_epochs)


        model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for images, labels in test_loader:
                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        accuracy = 100 * correct / total
        results.append(accuracy)

    print(f"K-Fold Cross-Validation Results: {results}")
    print(f"Mean Accuracy: {np.mean(results):.2f}%")

k_fold_cross_val(FullyConnectedNet, train_dataset)


Epoch [1/2], Loss: 0.3296
Accuracy: 94.09%
Epoch [2/2], Loss: 0.1502
Accuracy: 95.66%
Epoch [1/2], Loss: 0.3284
Accuracy: 92.70%
Epoch [2/2], Loss: 0.1543
Accuracy: 95.80%
Epoch [1/2], Loss: 0.3289
Accuracy: 94.88%
Epoch [2/2], Loss: 0.1455
Accuracy: 94.69%
Epoch [1/2], Loss: 0.3179
Accuracy: 93.79%
Epoch [2/2], Loss: 0.1462
Accuracy: 94.70%
Epoch [1/2], Loss: 0.3385
Accuracy: 94.62%
Epoch [2/2], Loss: 0.1518
Accuracy: 95.98%
K-Fold Cross-Validation Results: [95.65833333333333, 95.8, 94.69166666666666, 94.7, 95.98333333333333]
Mean Accuracy: 95.37%


In [None]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!kaggle datasets download -d salader/dogs-vs-cats

cp: cannot stat 'kaggle.json': No such file or directory
Dataset URL: https://www.kaggle.com/datasets/salader/dogs-vs-cats
License(s): unknown
Downloading dogs-vs-cats.zip to /content
 98% 1.05G/1.06G [00:09<00:00, 174MB/s]
100% 1.06G/1.06G [00:09<00:00, 114MB/s]


In [None]:
import zipfile
zip_ref = zipfile.ZipFile('/content/dogs-vs-cats.zip', 'r')
zip_ref.extractall('/content')
zip_ref.close()

In [None]:
!ls /content/dogs_vs_cats
!ls /content/dogs_vs_cats


test  train
test  train


In [None]:
#Dog-Cat Classifier using pretrained MNIST Model.
from torchvision.datasets import ImageFolder
from torchvision.models import resnet18
import torch.optim as optim

batch_size = 64
learning_rate = 0.001
num_epochs = 10
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Custom data loader for dog-cat dataset
dog_cat_transform = transforms.Compose([
    transforms.Resize((28, 28)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,)),
    transforms.Grayscale(num_output_channels=1)
])

dog_cat_dataset = ImageFolder(root='/content/dogs_vs_cats', transform=dog_cat_transform)
dog_cat_loader = DataLoader(dog_cat_dataset, batch_size=batch_size, shuffle=True)

# Load pretrained model
model = FullyConvolutionalNet()
pretrained_model = model
pretrained_model.to(device)

# Modify the pretrained model for fine-tuning
class DogCatClassifier(nn.Module):
    def __init__(self, pretrained_model):
        super(DogCatClassifier, self).__init__()
        self.conv1 = pretrained_model.conv1
        self.conv2 = pretrained_model.conv2
        self.pool = pretrained_model.pool
        self.fc1 = nn.Linear(64 * 7 * 7, 512)
        self.fc2 = nn.Linear(512, 2)

    def forward(self, x):
        x = torch.relu(self.conv1(x))
        #print(f'After conv1: {x.shape}')
        x = self.pool(x)
        #print(f'After pool1: {x.shape}')
        x = torch.relu(self.conv2(x))
        #print(f'After conv2: {x.shape}')
        x = self.pool(x)
        #print(f'After pool2: {x.shape}')
        x = x.view(-1, 64 * 7 * 7)
        #print(f'After view: {x.shape}')
        x = torch.relu(self.fc1(x))
        #print(f'After fc1: {x.shape}')
        x = self.fc2(x)
        #print(f'After fc2: {x.shape}')
        return x

dog_cat_model = DogCatClassifier(pretrained_model).to(device)
optimizer_dog_cat = optim.Adam(dog_cat_model.parameters(), lr=learning_rate)
criterion_dog_cat = nn.CrossEntropyLoss()

# Fine-tuning on dog-cat dataset
num_epochs_dog_cat = 10
for epoch in range(num_epochs_dog_cat):
    dog_cat_model.train()
    for batch_idx, (data, target) in enumerate(dog_cat_loader):
        data, target = data.to(device), target.to(device)

        outputs = dog_cat_model(data)
        #print(f'Batch {batch_idx}: data shape {data.shape}, outputs shape {outputs.shape}, target shape {target.shape}')

        loss = criterion_dog_cat(outputs, target)

        optimizer_dog_cat.zero_grad()
        loss.backward()
        optimizer_dog_cat.step()

    print(f'Dog-Cat Fine-tuning Epoch [{epoch+1}/{num_epochs_dog_cat}], Loss: {loss.item():.4f}')


Dog-Cat Fine-tuning Epoch [1/10], Loss: 0.6497
Dog-Cat Fine-tuning Epoch [2/10], Loss: 0.4646
Dog-Cat Fine-tuning Epoch [3/10], Loss: 0.5289
Dog-Cat Fine-tuning Epoch [4/10], Loss: 0.5985
Dog-Cat Fine-tuning Epoch [5/10], Loss: 0.6590
Dog-Cat Fine-tuning Epoch [6/10], Loss: 0.3340
Dog-Cat Fine-tuning Epoch [7/10], Loss: 0.3903
Dog-Cat Fine-tuning Epoch [8/10], Loss: 0.6081
Dog-Cat Fine-tuning Epoch [9/10], Loss: 0.3521
Dog-Cat Fine-tuning Epoch [10/10], Loss: 0.5160
