In [4]:
!pip install torch torchvision
!pip install opencv-python
!pip install matplotlib




In [5]:
import torch
import torchvision.transforms as transforms
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from PIL import Image
import matplotlib.pyplot as plt
import cv2
import numpy as np

# Load the Faster R-CNN model pre-trained on COCO dataset
model = fasterrcnn_resnet50_fpn(pretrained=True)
model.eval()

# Transform function for the input image
transform = transforms.Compose([
    transforms.ToTensor(),
])

# Function to detect and count objects
def detect_and_count_objects(img_path):
    # Load and preprocess the image
    image = Image.open(img_path).convert("RGB")
    image_tensor = transform(image).unsqueeze(0)  # Add batch dimension

    # Perform detection
    with torch.no_grad():
        outputs = model(image_tensor)

    # Process results
    threshold = 0.5
    boxes = outputs[0]['boxes'][outputs[0]['scores'] > threshold].numpy()
    labels = outputs[0]['labels'][outputs[0]['scores'] > threshold].numpy()

    counts = {}
    for label in np.unique(labels):
        counts[label] = np.sum(labels == label)

    # Print results
    print(f"Detected {len(boxes)} objects.")
    print("Object counts:", counts)

    # Visualize the detected boxes
    img = cv2.imread(img_path)
    for box, label in zip(boxes, labels):
        x1, y1, x2, y2 = box.astype(int)
        cv2.rectangle(img, (x1, y1), (x2, y2), (255, 0, 0), 2)
        cv2.putText(img, str(label), (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)

    # Convert BGR to RGB for display
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    plt.imshow(img_rgb)
    plt.axis('off')
    plt.show()

    return counts

# Test the function with an image
test_file_path = 'object_detection_test.jpg'  # Replace with your image path
object_counts = detect_and_count_objects(test_file_path)

# Print final counts
print("Final Object Counts:")
for label, count in object_counts.items():
    print(f"Class {label}: {count} objects detected")


Detected 7 objects.
Object counts: {50: 1, 55: 3, 67: 1, 84: 1, 86: 1}
Final Object Counts:
Class 50: 1 objects detected
Class 55: 3 objects detected
Class 67: 1 objects detected
Class 84: 1 objects detected
Class 86: 1 objects detected


In [13]:
import torch
from torchvision.models.detection import fasterrcnn_resnet50_fpn
import torchvision.transforms as T
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
import cv2
from torchvision.ops import nms

# Load the Faster R-CNN model pre-trained on COCO dataset
model = fasterrcnn_resnet50_fpn(pretrained=True)
model.eval()  # Set the model to evaluation mode

# Define transformation to convert image to tensor
transform = T.Compose([T.ToTensor()])

def load_image(image_path):
    image = Image.open(image_path)
    image_tensor = transform(image)  # Apply transformation (convert to tensor)
    return image, image_tensor

# Example image (replace this with your image path)
image_path = "object_detection_test.jpg"
image, image_tensor = load_image(image_path)

# Run the image through the model
with torch.no_grad():
    # Send image tensor as a batch (even if it's a single image)
    predictions = model([image_tensor])

# Extract predictions (bounding boxes, labels, and scores)
predicted_boxes = predictions[0]['boxes'].cpu().numpy()
predicted_labels = predictions[0]['labels'].cpu().numpy()
predicted_scores = predictions[0]['scores'].cpu().numpy()

# Filter predictions by a higher confidence threshold
confidence_threshold = 0.7  # Adjust threshold based on your dataset
high_confidence_boxes = predicted_boxes[predicted_scores >= confidence_threshold]
high_confidence_labels = predicted_labels[predicted_scores >= confidence_threshold]
high_confidence_scores = predicted_scores[predicted_scores >= confidence_threshold]

# Apply Non-Maximum Suppression (NMS)
keep = nms(torch.tensor(high_confidence_boxes), torch.tensor(high_confidence_scores), iou_threshold=0.5)

# Only keep the boxes, labels, and scores after NMS
nms_boxes = high_confidence_boxes[keep]
nms_labels = high_confidence_labels[keep]
nms_scores = high_confidence_scores[keep]

# Count the number of objects detected after NMS
num_objects_detected = len(nms_boxes)
print(f"Number of objects detected: {num_objects_detected}")

# Define a function to draw bounding boxes on the image
def draw_boxes(image, boxes):
    image_with_boxes = np.array(image.copy())
    for box in boxes:
        x1, y1, x2, y2 = map(int, box)
        cv2.rectangle(image_with_boxes, (x1, y1), (x2, y2), (255, 0, 0), 2)  # Red box
    return Image.fromarray(image_with_boxes)

# Draw the bounding boxes on the original image
image_with_boxes = draw_boxes(image, nms_boxes)

# Display the image with bounding boxes
plt.imshow(image_with_boxes)
plt.axis('off')
plt.show()


Number of objects detected: 3


In [17]:
# no splitting

import os
import torch
import torch.nn as nn
from torchvision import models, transforms
from PIL import Image
from sklearn import svm
from sklearn.preprocessing import LabelEncoder

# Define the device (use GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the pre-trained ResNet model
resnet = models.resnet50(weights=models.ResNet50_Weights.DEFAULT).to(device)
resnet.eval()  # Set the model to evaluation mode

# Remove the last classification layer (we only use it for feature extraction)
model = nn.Sequential(*list(resnet.children())[:-1])

# Image transformation
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Function to extract features from an image
def extract_features(image_path):
    image = Image.open(image_path).convert('RGB')
    image = transform(image).unsqueeze(0).to(device)  # Apply transformations
    with torch.no_grad():
        features = model(image)  # Extract features
    return features.cpu().numpy().flatten()  # Flatten the features

# Paths and dataset directories
data_dir = "fmcg_products"  # Replace with your dataset folder path
items = os.listdir(data_dir)

# Lists to hold features and labels
X = []
y = []

# Iterate over the dataset and extract features for all items
for item_name in items:
    item_dir = os.path.join(data_dir, item_name)
    for image_name in os.listdir(item_dir):
        image_path = os.path.join(item_dir, image_name)
        
        # Skip directories or hidden files
        if os.path.isdir(image_path) or image_name.startswith('.'):
            continue
        
        # Extract features for each image
        features = extract_features(image_path)
        X.append(features)
        y.append(item_name)

# Encode labels as integers
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Train an SVM classifier on the entire dataset
clf = svm.SVC(kernel='linear')
clf.fit(X, y_encoded)

# Function to classify a new image
def classify_image(image_path):
    features = extract_features(image_path)
    prediction = clf.predict([features])
    predicted_label = label_encoder.inverse_transform(prediction)[0]
    return predicted_label


