In [1]:
!pip install torch torchvision



In [2]:
!pip install opencv-python



In [3]:
!pip install ultralytics



In [1]:
import torch
from torchvision import transforms
from PIL import Image, ExifTags
import cv2
import numpy as np

In [None]:
def load_model(model_path, model_class=None):
    # loading a custom model
    model = torch.load(model_path)
    model =  model['model'].float()
    model.eval()  # Set the model to evaluation mode
    return model

def prepare_image(image_path, input_size, training_mode=False):
    # Load image with PIL
    with Image.open(image_path) as img:
        # Auto-Orient: applying orientation from image EXIF data
        for orientation in ExifTags.TAGS.keys():
            if ExifTags.TAGS[orientation] == 'Orientation':
                break
        exif = img._getexif()

        if exif is not None:
            orientation = exif.get(orientation, 1)
            rotations = {
                3: Image.ROTATE_180,
                6: Image.ROTATE_270,
                8: Image.ROTATE_90
            }
            if orientation in rotations:
                img = img.transpose(rotations[orientation])

        # Resize the image
        img = img.resize(input_size, Image.BOX)

        # Convert image to RGB (in case it's not already in that format)
        img = img.convert('RGB')

        # Define transformations
        transform_list = [transforms.ToTensor()]

        # Apply horizontal flip randomly if in training mode
        if training_mode:
            transform_list.append(transforms.RandomHorizontalFlip(p=1.0))  # Always apply horizontal flip during training

        # Combine all transformations
        transform = transforms.Compose(transform_list)
        
        # Apply transformations
        image = transform(img)

        # Add batch dimension
        image = image.unsqueeze(0)
    return image

def run_inference(model, image):
    with torch.no_grad():  # Disable gradient computation for inference
        output = model(image)
    #print("Output type:", type(output))  # Check the type of output
    #if isinstance(output, tuple):
        #for i, tensor in enumerate(output):
            #print(f"Tensor {i} shape:", tensor.shape)
    return output


def process_output(output):
    # Assuming output is the first tensor in the tuple and has a shape of [1, 6, 8400]
    output = output[0]
    highest_confidence = 0
    hc_index = 0
    result = []
    print(output.shape)
    # Reshape from [1, 6, 8400] to [8400, 6] to simplify
    #predictions = output  # Flatten the batch and attributes dimensions
    for i in range(0,8400,1):
        if(output[0][4][i] > highest_confidence):
            highest_confidence = output[0][4][i]
            hc_index = i

    for i in range(0, 5, 1):
        result.append(output[0][i][hc_index].item())

    # Filter out predictions with a low confidence score
    # Assuming the confidence score is at index 4
    #confidence_threshold = 0.5
    #mask = predictions[:, 4] > confidence_threshold
    #filtered_predictions = predictions[mask]
    return result



def draw_boxes(image, box):
    #image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    # Resize the image to the model's input size (640x640)
    resized_image = cv2.resize(image, (640, 640))

    # Unpack center coordinates, width, height, and confidence from the bounding box
    cx, cy, w, h, conf = box

    # Convert from [center x, center y, width, height] to [x1, y1, x2, y2]
    x1 = int(cx - w / 2)
    y1 = int(cy - h / 2)
    x2 = int(cx + w / 2)
    y2 = int(cy + h / 2)

    # Draw the rectangle on the resized image
    color = (0, 255, 0)  # Green color in BGR
    cv2.rectangle(resized_image, (x1, y1), (x2, y2), color, 2)

    # Put the confidence score on the image
    cv2.putText(resized_image, f'Conf: {conf:.2f}', (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, color, 2)

    return resized_image

def crop_to_box(image, box):
    # Load the original image
    

    # Resize the image to the model's input size (640x640)
    resized_image = cv2.resize(image, (640, 640))

    # Unpack center coordinates, width, height from the bounding box
    cx, cy, w, h = box[:4]

    # Convert from [center x, center y, width, height] to [x1, y1, x2, y2]
    x1 = int((cx - w / 2))
    y1 = int((cy - h / 2))
    x2 = int((cx + w / 2))
    y2 = int((cy + h / 2))

    # Crop the image to the bounding box
    # Make sure coordinates do not go out of image bounds
    x1 = max(x1, 0)
    y1 = max(y1, 0)
    x2 = min(x2, 640)
    y2 = min(y2, 640)

    cropped_image = resized_image[y1:y2, x1:x2]
    #cropped_image = cv2.resize(cropped_image,(224,224))

    return cropped_image

def resize_image(image, scale_percent):
    """
    Resize the image by a certain percentage.
    :param image: Original image.
    :param scale_percent: Percentage to scale (50 means 50%, etc.).
    :return: Resized image.
    """
    width = int(image.shape[1] * scale_percent / 100)
    height = int(image.shape[0] * scale_percent / 100)
    dim = (width, height)
    resized = cv2.resize(image, dim, interpolation=cv2.INTER_AREA)
    return resized




    
def main():
    model_path = 'best_3.pt'  # Path to your model file
    image_path = './images/alan.jpg'  # Path to your test image
    
    # Load the model, adjust the class as necessary
    model = load_model(model_path, model_class=None)  # Use `model_class=YourModelClass` if needed
    
    # Prepare the image
    prepared_image = prepare_image(image_path, input_size=(640, 640))  # Adjust size as per your model's requirement
    
    # Run inference
    output = run_inference(model, prepared_image)
    
    # Interpret output
    top_prediction = process_output(output)
    print(top_prediction)

    original_image = cv2.imread(image_path)
    result_image = crop_to_box(original_image, top_prediction)
    #result_image = resize_image(result_image, scale_percent=100)
    cv2.imshow('Detected Object with Highest Confidence', result_image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

if __name__ == "__main__":
    main()

torch.Size([1, 5, 8400])
[321.9969787597656, 251.54086303710938, 135.28253173828125, 143.34117126464844, 0.9363659024238586]
