In [2]:
pip install opencv-python opencv-python-headless

Note: you may need to restart the kernel to use updated packages.


In [3]:
import torch
import torchvision.transforms as transforms
from PIL import Image
import cv2



import os
import random
import shutil
import numpy as np
from collections import defaultdict
from PIL import Image, ImageEnhance, ImageOps, ImageFilter, ImageOps
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures


# ------------------- Torch --------------------------------#
import torch
from torchvision.transforms.functional import to_pil_image
import torchvision
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
''' test show webcam only 

# Initialize webcam
cap = cv2.VideoCapture(0)

while True:
    # Capture frame-by-frame
    ret, frame = cap.read()
    
    if not ret:
        break

    # Display the resulting frame
    cv2.imshow('Webcam', frame)

    # Break the loop on 'q' key press
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# When everything is done, release the capture
cap.release()
cv2.destroyAllWindows()

'''

" test show webcam only \n\n# Initialize webcam\ncap = cv2.VideoCapture(0)\n\nwhile True:\n    # Capture frame-by-frame\n    ret, frame = cap.read()\n    \n    if not ret:\n        break\n\n    # Display the resulting frame\n    cv2.imshow('Webcam', frame)\n\n    # Break the loop on 'q' key press\n    if cv2.waitKey(1) & 0xFF == ord('q'):\n        break\n\n# When everything is done, release the capture\ncap.release()\ncv2.destroyAllWindows()\n\n"

In [7]:
class_names = {0: '130228',
 1: '47892',
 2: '48537',
 3: '48681',
 4: '593040',
 5: '59549',
 6: '83653',
 7: '914922'}

In [14]:

# Load pre-trained ResNet50 model
resnet = models.resnet50(pretrained=True)

# Modify the last fully connected layer to match the number of classes in your dataset
num_classes = len(class_names)
num_features = resnet.fc.in_features
resnet.fc = nn.Linear(num_features, num_classes)

# Optionally, if you want to freeze the weights of the pre-trained layers
for param in resnet.parameters():
    param.requires_grad = False

class SimpleCNN(nn.Module):
    def __init__(self, backbone, num_classes):
        super(SimpleCNN, self).__init__()
        self.backbone = backbone
        self.fc = nn.Linear(2048, num_classes)  # Input size is fixed for ResNet50

    def forward(self, x):
        features = self.backbone(x)
        out = self.fc(features)
        return out

# Remove the final fully connected layer of ResNet50
resnet.fc = nn.Identity()

# Instantiate your SimpleCNN model with ResNet50 backbone
base_model = SimpleCNN(resnet, num_classes)

def load_model(model_path):
    # Check if the saved model is a state_dict or a full model
    state_dict = torch.load(model_path)
    if isinstance(state_dict, dict):
        model = SimpleCNN(resnet, num_classes)
        model.load_state_dict(state_dict)
    else:
        model = state_dict  # Assuming the whole model was saved
    model.eval()  # Set model to evaluation mode
    return model

# Load the trained model
model = load_model('./models/trained_model5.pth')

def preprocess_image(frame):
    preprocess = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize((224, 224)),  # Resize to match the input size expected by your model
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    img_tensor = preprocess(frame)
    img_tensor = img_tensor.unsqueeze(0)  # Add batch dimension
    return img_tensor

def sliding_window(image, step_size, window_size):
    for y in range(0, image.shape[0] - window_size[1], step_size):
        for x in range(0, image.shape[1] - window_size[0], step_size):
            yield (x, y, image[y:y + window_size[1], x:x + window_size[0]])

# Initialize the webcam
cap = cv2.VideoCapture(0)  # Use 0 for the default camera
window_size = (224, 224)
step_size = 112  # Half the window size for overlapping windows

while True:
    ret, frame = cap.read()
    
    if not ret:
        break

    best_box = None
    highest_prob = 0

    for (x, y, window) in sliding_window(frame, step_size=step_size, window_size=window_size):
        if window.shape[0] != window_size[1] or window.shape[1] != window_size[0]:
            continue

        img_tensor = preprocess_image(window)

        with torch.no_grad():
            output = model(img_tensor)
            probabilities = torch.nn.functional.softmax(output, dim=1)
            max_prob, predicted = torch.max(probabilities, 1)
        
        predicted_class_idx = predicted.item()
        predicted_prob = max_prob.item() * 100

        if predicted_class_idx in class_names and predicted_prob > highest_prob:  # Update if this is the highest probability seen so far
            highest_prob = predicted_prob
            best_box = (x, y, x + window_size[0], y + window_size[1], predicted_class_idx, predicted_prob)

    if best_box:
        x1, y1, x2, y2, class_idx, prob = best_box
        class_name = class_names[class_idx]
        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
        cv2.putText(frame, f'{class_name} ({prob:.2f}%)', (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2, cv2.LINE_AA)

    cv2.imshow('Webcam', frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


KeyboardInterrupt: 