<a href="https://colab.research.google.com/github/agniji/VisionSystems/blob/main/VisionSystem.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Dataset Preparation

In [None]:
/dataset
    /class_1
        img1.jpg
        img2.jpg
    /class_2
        img1.jpg
        img2.jpg


##Load & Transform Dataset

In [None]:
import torch
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader

# Define image transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Load dataset
data_dir = "dataset"
train_dataset = datasets.ImageFolder(root=data_dir, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)

# Get number of classes
num_classes = len(train_dataset.classes)
print("Classes:", train_dataset.classes)


##Define & Train a Custom Model

In [None]:
import torch.nn as nn
import torchvision.models as models
import torch.optim as optim

# Load pre-trained ResNet and modify the final layer
# Try resnet50 or inception later on
model = models.resnet18(pretrained=False)  # Train from scratch
model.fc = nn.Linear(model.fc.in_features, num_classes)  # Modify final layer

# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Define loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")

# Save the trained model
torch.save(model.state_dict(), "custom_model.pth")
print("Model training complete!")


##Real-Time Inference with Your Custom Model

In [None]:
from PIL import Image
import cv2
import numpy as np

# Load trained model
model.load_state_dict(torch.load("custom_model.pth"))
model.eval()

# Function to classify a new image
def classify_image(image):
    image = transform(image).unsqueeze(0).to(device)

    with torch.no_grad():
        output = model(image)
        predicted_class = torch.argmax(output).item()

    return train_dataset.classes[predicted_class]

# Capture and classify live video frames
cap = cv2.VideoCapture(0)
while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Convert OpenCV BGR image to PIL Image
    image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
    label = classify_image(image)

    # Display result
    cv2.putText(frame, f"Class: {label}", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)
    cv2.imshow("Live Classification", frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()
