<a href="https://colab.research.google.com/github/helya02/HomeWorks/blob/main/06_NN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import zipfile
import os

zip_path = '/content/drive/MyDrive/NN06/geometry_dataset.zip'
target_dir = os.path.dirname(zip_path)

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(target_dir)

In [None]:
!unzip /content/geometry_dataset.zip

In [8]:
import os
import glob
import pickle
import numpy as np
from PIL import Image
import pickle
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import matplotlib.pyplot as plt

#params
DATASET_DIR = "/content/output"
TRAIN_IMAGES_PER_CLASS = 8000
TEST_IMAGES_PER_CLASS = 2000
NUM_CLASSES = 9
BATCH_SIZE = 64
NUM_EPOCHS = 10
LEARNING_RATE = 0.001
MODEL_SAVE_PATH = "/content/0602-22401349-HashemiAghdam.pt"

torch.backends.cudnn.benchmark = True


def load_image(image_path):
    image = Image.open(image_path).convert("L")
    transform = transforms.ToTensor()
    return transform(image)

class GeometryDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image = Image.open(self.image_paths[idx]).convert("L")
        if self.transform:
            image = self.transform(image)
        label = self.labels[idx]
        return image, label


def prepare_data():

    class_files = {}
    file_pattern = os.path.join(DATASET_DIR, "*.png")
    for file in glob.glob(file_pattern):
        basename = os.path.basename(file)
        for label in ["Circle", "Hexagon", "Heptagon", "Nonagon", "Octagon", "Pentagon", "Square", "Star", "Triangle"]:
            if basename.startswith(label):
                class_files.setdefault(label, []).append(file)
                break

    train_files, test_files = [], []
    train_labels, test_labels = [], []


    for label in sorted(class_files.keys()):
        files = sorted(class_files[label])
        if len(files) < (TRAIN_IMAGES_PER_CLASS + TEST_IMAGES_PER_CLASS):
            print(f"Warning: Not enough images for class {label}. Found {len(files)} images.")
            continue
        train_list = files[:TRAIN_IMAGES_PER_CLASS]
        test_list = files[TRAIN_IMAGES_PER_CLASS:TRAIN_IMAGES_PER_CLASS + TEST_IMAGES_PER_CLASS]
        train_files.extend(train_list)
        test_files.extend(test_list)
        train_labels.extend([label] * len(train_list))
        test_labels.extend([label] * len(test_list))


    with open("/content/training.file", "wb") as f:
        pickle.dump((train_files, train_labels), f)
    with open("/content/testing.file", "wb") as f:
        pickle.dump((test_files, test_labels), f)

    return (train_files, train_labels), (test_files, test_labels)



In [10]:

# class SimpleCNN(nn.Module):
#     def __init__(self):
#         super(SimpleCNN, self).__init__()
#         # Input channel = 1 for grayscale images; change to 3 for RGB images.
#         self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
#         self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
#         self.pool = nn.MaxPool2d(2, 2)
#         # Assuming two poolings: from 200x200 to 50x50 feature maps.
#         self.fc1 = nn.Linear(64 * 50 * 50, 256)
#         self.fc2 = nn.Linear(256, NUM_CLASSES)

#     def forward(self, x):
#         x = F.relu(self.conv1(x))
#         x = self.pool(x)  # Reduces size from 200 to 100
#         x = F.relu(self.conv2(x))
#         x = self.pool(x)  # Reduces size from 100 to 50
#         x = x.view(x.size(0), -1)  # Flatten
#         x = F.relu(self.fc1(x))
#         x = self.fc2(x)
#         return x

# class ImprovedCNN(nn.Module):
#     def __init__(self):
#         super(ImprovedCNN, self).__init__()
#         self.features = nn.Sequential(
#             nn.Conv2d(1, 32, kernel_size=3, padding=1),
#             nn.BatchNorm2d(32),
#             nn.ReLU(),
#             nn.MaxPool2d(2),

#             nn.Conv2d(32, 64, kernel_size=3, padding=1),
#             nn.BatchNorm2d(64),
#             nn.ReLU(),
#             nn.MaxPool2d(2),

#             nn.Conv2d(64, 128, kernel_size=3, padding=1),
#             nn.BatchNorm2d(128),
#             nn.ReLU(),
#             nn.MaxPool2d(2),

#             nn.Dropout(0.3)
#         )

#         self.classifier = nn.Sequential(
#             nn.Linear(128 * 25 * 25, 512),
#             nn.ReLU(),
#             nn.Dropout(0.5),
#             nn.Linear(512, NUM_CLASSES)
#         )

#     def forward(self, x):
#         x = self.features(x)
#         x = x.view(x.size(0), -1)
#         x = self.classifier(x)
#         return x

class EfficientCNN(nn.Module):
    def __init__(self):
        super(EfficientCNN, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2),  # 200 -> 100

            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2),  # 100 -> 50

            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2),  # 50 -> 25

            nn.Dropout(0.3),
            nn.AdaptiveAvgPool2d((1, 1))  # Output: 128 x 1 x 1
        )

        self.classifier = nn.Sequential(
            nn.Flatten(),  # from (128, 1, 1) to (128)
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(128, NUM_CLASSES)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

def train_model(model, train_loader, test_loader, criterion, optimizer, num_epochs, device):
    train_losses = []
    test_losses = []
    train_accuracies = []
    test_accuracies = []


    scaler = torch.cuda.amp.GradScaler()
    label_to_index = {"Circle":0, "Hexagon":1, "Heptagon":2, "Nonagon":3,
                      "Octagon":4, "Pentagon":5, "Square":6, "Star":7, "Triangle":8}

    for epoch in range(1, num_epochs+1):
        model.train()
        running_loss = 0.0
        correct_train = 0
        total_train = 0
        for images, labels in train_loader:
            images = images.to(device)
            targets = torch.tensor([label_to_index[label] for label in labels]).to(device)

            optimizer.zero_grad()
            with torch.cuda.amp.autocast():
                outputs = model(images)
                loss = criterion(outputs, targets)
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            running_loss += loss.item() * images.size(0)
            _, predicted = torch.max(outputs, 1)
            total_train += targets.size(0)
            correct_train += (predicted == targets).sum().item()

        epoch_loss_train = running_loss / total_train
        epoch_acc_train = correct_train / total_train
        train_losses.append(epoch_loss_train)
        train_accuracies.append(epoch_acc_train)

        model.eval()
        running_loss_test = 0.0
        correct_test = 0
        total_test = 0
        with torch.no_grad():
            for images, labels in test_loader:
                images = images.to(device)
                targets = torch.tensor([label_to_index[label] for label in labels]).to(device)
                with torch.cuda.amp.autocast():
                    outputs = model(images)
                    loss = criterion(outputs, targets)
                running_loss_test += loss.item() * images.size(0)
                _, predicted = torch.max(outputs, 1)
                total_test += targets.size(0)
                correct_test += (predicted == targets).sum().item()

        epoch_loss_test = running_loss_test / total_test
        epoch_acc_test = correct_test / total_test
        test_losses.append(epoch_loss_test)
        test_accuracies.append(epoch_acc_test)

        print(f"Epoch {epoch}/{num_epochs}  Train Loss: {epoch_loss_train:.4f}  Train Acc: {epoch_acc_train:.4f}  Test Loss: {epoch_loss_test:.4f}  Test Acc: {epoch_acc_test:.4f}")

    plt.figure()
    plt.plot(range(1, num_epochs+1), train_losses, label="Train Loss")
    plt.plot(range(1, num_epochs+1), test_losses, label="Test Loss")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.legend()
    plt.title("Epoch vs Loss")
    plt.savefig("loss_plot.png")
    plt.close()

    plt.figure()
    plt.plot(range(1, num_epochs+1), train_accuracies, label="Train Accuracy")
    plt.plot(range(1, num_epochs+1), test_accuracies, label="Test Accuracy")
    plt.xlabel("Epoch")
    plt.ylabel("Accuracy")
    plt.legend()
    plt.title("Epoch vs Accuracy")
    plt.savefig("accuracy_plot.png")
    plt.close()

    return model, train_losses, test_losses, train_accuracies, test_accuracies

In [None]:
#Main
if __name__ == "__main__":
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    with open("/content/training.file", "rb") as f:
      train_files, train_labels = pickle.load(f)

    with open("/content/testing.file", "rb") as f:
      test_files, test_labels = pickle.load(f)

    # Data transforms
    train_transform = transforms.Compose([
        transforms.RandomRotation(10),
        transforms.RandomResizedCrop(200, scale=(0.9, 1.0)),
        transforms.ToTensor()
    ])
    test_transform = transforms.ToTensor()

    # Datasets and loaders
    train_dataset = GeometryDataset(train_files, train_labels, transform=train_transform)
    test_dataset = GeometryDataset(test_files, test_labels, transform=test_transform)
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, pin_memory=True)
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4, pin_memory=True)


    model = EfficientCNN().to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

    model, train_losses, test_losses, train_accuracies, test_accuracies = train_model(model, train_loader, test_loader, criterion, optimizer, NUM_EPOCHS, device)

    torch.save(model.state_dict(), MODEL_SAVE_PATH)
    print(f"Model saved to {MODEL_SAVE_PATH}")

    metrics = {
        "train_losses": train_losses,
        "test_losses": test_losses,
        "train_accuracies": train_accuracies,
        "test_accuracies": test_accuracies
    }
    with open("training_metrics.pkl", "wb") as f:
        pickle.dump(metrics, f)

Using device: cuda


  scaler = torch.cuda.amp.GradScaler()
  with torch.cuda.amp.autocast():


In [None]:
# @title
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
from PIL import Image
import torchvision.transforms as transforms

# -------------------------------
# Define the same CNN model structure for inference
# -------------------------------
NUM_CLASSES = 9
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64 * 50 * 50, 256)
        self.fc2 = nn.Linear(256, NUM_CLASSES)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool(x)
        x = F.relu(self.conv2(x))
        x = self.pool(x)
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# -------------------------------
# Label mapping: index to class label
# -------------------------------
index_to_label = {0:"Circle", 1:"Hexagon", 2:"Heptagon", 3:"Nonagon",
                  4:"Octagon", 5:"Pentagon", 6:"Square", 7:"Star", 8:"Triangle"}

# -------------------------------
# Load and preprocess an image
# -------------------------------
def load_and_preprocess(image_path):
    image = Image.open(image_path).convert("L")
    transform = transforms.Compose([
        transforms.Resize((200, 200)),  # Resize image to 200x200 pixels
        transforms.ToTensor(),
    ])
    image_tensor = transform(image)
    # Add batch dimension
    image_tensor = image_tensor.unsqueeze(0)
    return image_tensor

# -------------------------------
# Main inference function
# -------------------------------
if __name__ == "__main__":
    MODEL_SAVE_PATH = "/content/0602-IDNumber-LastName.pt"  # Make sure the path matches the saved model
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Initialize model and load state
    model = SimpleCNN().to(device)
    model.load_state_dict(torch.load(MODEL_SAVE_PATH, map_location=device))
    model.eval()

    # Process all images in the current directory (modify the pattern as needed)
    image_files = [f for f in os.listdir(".") if f.endswith((".png", ".jpg", ".jpeg"))]
    results = {}

    for image_file in image_files:
        image_tensor = load_and_preprocess(image_file)
        image_tensor = image_tensor.to(device)
        with torch.no_grad():
            outputs = model(image_tensor)
            _, predicted = torch.max(outputs, 1)
            predicted_label = index_to_label[int(predicted)]
        results[image_file] = predicted_label

    # Print inference results as required
    for img, label in results.items():
        print(f"{img}: {label}")

Circle_ea2f9192-2a97-11ea-8123-8363a7ec19e6.png: Circle
Square_eb0e858c-2a97-11ea-8123-8363a7ec19e6.png: Triangle
Triangle_b3dd0a70-2a97-11ea-8123-8363a7ec19e6.png: Triangle
Hexagon_c93b3c84-2a97-11ea-8123-8363a7ec19e6.png: Hexagon
Circle_e429c7a4-2a97-11ea-8123-8363a7ec19e6.png: Octagon
Heptagon_c4df22fe-2a97-11ea-8123-8363a7ec19e6.png: Heptagon
Hexagon_bdc7a662-2a97-11ea-8123-8363a7ec19e6.png: Hexagon
Hexagon_b1b6f594-2a97-11ea-8123-8363a7ec19e6.png: Hexagon
Triangle_e9840cdc-2a97-11ea-8123-8363a7ec19e6.png: Triangle
Pentagon_a98e9318-2a97-11ea-8123-8363a7ec19e6.png: Pentagon
Octagon_b714bb2a-2a97-11ea-8123-8363a7ec19e6.png: Octagon
Nonagon_d9b6ce2a-2a97-11ea-8123-8363a7ec19e6.png: Octagon
Triangle_cbfc5714-2a97-11ea-8123-8363a7ec19e6.png: Square
Square_c0db9e80-2a97-11ea-8123-8363a7ec19e6.png: Triangle


In [None]:
#Inferences

In [None]:
import os
import torch
import torch.nn as nn
from PIL import Image
import torchvision.transforms as transforms

NUM_CLASSES = 9

class ImprovedCNN(nn.Module):
    def __init__(self):
        super(ImprovedCNN, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Dropout(0.3)
        )
        self.classifier = nn.Sequential(
            nn.Linear(128 * 25 * 25, 512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, NUM_CLASSES)
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x


index_to_label = {
    0: "Circle", 1: "Hexagon", 2: "Heptagon", 3: "Nonagon",
    4: "Octagon", 5: "Pentagon", 6: "Square", 7: "Star", 8: "Triangle"
}


def load_and_preprocess(image_path):
    image = Image.open(image_path).convert("L")
    transform = transforms.Compose([
        transforms.Resize((200, 200)),
        transforms.ToTensor()
    ])
    image_tensor = transform(image).unsqueeze(0)
    return image_tensor


if __name__ == "__main__":
    MODEL_PATH = "/content/0602-22401349-HashemiAghdam.pt"
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    model = ImprovedCNN().to(device)
    model.load_state_dict(torch.load(MODEL_PATH, map_location=device))
    model.eval()

    image_files = [f for f in os.listdir(".") if f.lower().endswith((".png", ".jpg", ".jpeg"))]
    results = {}

    for image_file in sorted(image_files):
        image_tensor = load_and_preprocess(image_file).to(device)
        with torch.no_grad():
            output = model(image_tensor)
            predicted_idx = torch.argmax(output, dim=1).item()
            predicted_label = index_to_label[predicted_idx]
            results[image_file] = predicted_label


    for img_name, label in results.items():
        print(f"{img_name}: {label}")