# Lecture 2

# Image Thresholding

In [1]:
# Image Thresholding using OpenCV

import cv2
import numpy as np

img = cv2.imread('Media/Image Before Thresholding.png')

# Convert to Grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

# Simple Thresholding
_, threshold = cv2.threshold(gray, 150, 255, cv2.THRESH_OTSU)
# Threshold value means if pixel value is greater than 150, it will be assigned 255, else 0
# Another Thresholding techniques are cv2.THRESH_BINARY_INV, cv2.THRESH_TRUNC, cv2.THRESH_TOZERO, cv2.THRESH_TOZERO_INV 

# Adaptive Thresholding: used for images with varying brightness. It divides the image into smaller blocks and then applies thresholding 
adaptiveThreshold = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 11, 2) 

cv2.imshow('Image Before Thresholding', img)
cv2.imshow('Image After Thresholding', threshold)
cv2.imshow('Image After Adaptive Thresholding', adaptiveThreshold)

cv2.waitKey(0)
cv2.destroyAllWindows()

# Edge Detection

In [None]:
# Edge detection using Canny Edge Detection
import cv2
import numpy as np

img = cv2.imread('Media/Image Before Thresholding.png')

# Convert to Grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

# Canny Edge Detection
# The first argument is the input image. The second and third arguments are our minVal and maxVal.
canny = cv2.Canny(gray, 100, 200)

cv2.imshow('Image Before Thresholding', img)
cv2.imshow('Image After Canny Edge Detection', canny)

cv2.waitKey(0)
cv2.destroyAllWindows()

# Image Blurring

In [None]:
# Image Blurring using OpenCV
import cv2
import numpy as np

img = cv2.imread('ImagePath.png')

# Blurring
Blurred = cv2.blur(img, (7, 7))
# Another Blurring techniques are cv2.GaussianBlur, cv2.medianBlur, cv2.bilateralFilter

cv2.imshow('Image Before Blurring', img)
cv2.imshow('Image After Blurring', Blurred)

cv2.waitKey(0)
cv2.destroyAllWindows()


# Contours

In [6]:
import cv2

img = cv2.imread('Media/Flower.png')

gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
blurred = cv2.GaussianBlur(gray, (3, 3), 0) # Blurring

edged = cv2.Canny(blurred, 170, 255) # Edge Detection

# cv2.imshow("Original image", img)
# cv2.imshow("Edged image", edged)

contours, _ = cv2.findContours(edged, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) # The two objects returned are contours and hierarchy. We will only use contours.

cv2.drawContours(img, contours, -1, (0, 255, 0), 2)
# drawContours parameters: image, contours, contourIdx, color, thickness
# contourIdx = -1 means all contours are drawn

cv2.imshow("Edged image", edged)
cv2.imshow("contours", img)
cv2.waitKey(0)

cv2.imwrite("contours.png", img)

cv2.destroyAllWindows()

# Mediapipe

**Let's Start exploring MediaPipe library**

**MediaPipe offers open-source cross-platform, customizable ML solutions for live and streaming media.**

# Image Classification with MediaPipe

In [None]:
# Start with install the library
# You need Python version < 3.11
%pip install mediapipe 

**Classify a single Image**

**The image classifier was trained using 'ImageNet' Dataset to recognize 1,000 classes**

For more information about the available classifiers check [MediaPipe Image Classification Task guide](https://developers.google.com/mediapipe/solutions/vision/image_classifier/index#efficientnet-lite0_model_recommended)


In [None]:
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
import cv2

BaseOptions = mp.tasks.BaseOptions
ImageClassifier = mp.tasks.vision.ImageClassifier
ImageClassifierOptions = mp.tasks.vision.ImageClassifierOptions
VisionRunningMode = mp.tasks.vision.RunningMode

options = ImageClassifierOptions(
    base_options=BaseOptions(model_asset_path='Models/efficientnet_lite0.tflite'), # Path to the model file
    max_results=5, # Maximum number of results to return
    running_mode=VisionRunningMode.IMAGE, # Running mode 'Image' or 'Video' or 'Stream'
    category_allowlist = ['sports car', 'pizza', 'pomegranate', 'tiger cat'] # List of categories to allow
    )

In [None]:
img = cv2.imread("Media/Ferrari.jpg") # Load the image
mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=img) # Create a MediaPipe Image object from the OpenCV image

with ImageClassifier.create_from_options(options) as classifier: # Create the ImageClassifier instance
    classification_result = classifier.classify(mp_image) # Classify the image

In [None]:
# classification_result contains the classification results.
print(f"Category: {classification_result.classifications[0].categories[0].category_name}") # Get the category of the highest classification result
print(f"Score: {classification_result.classifications[0].categories[0].score}") # Get the score of the highest classification result

Category: sports car
Score: 0.83984375


# Face Detection with MediaPipe

**On a single Image**

In [None]:
import mediapipe as mp

BaseOptions = mp.tasks.BaseOptions
FaceDetector = mp.tasks.vision.FaceDetector
FaceDetectorOptions = mp.tasks.vision.FaceDetectorOptions
VisionRunningMode = mp.tasks.vision.RunningMode

# Initialize a face detector instance:
options = FaceDetectorOptions(
    base_options=BaseOptions(model_asset_path='Models/blaze_face_short_range.tflite'),
    running_mode=VisionRunningMode.IMAGE,
    min_detection_confidence=0.5,
    min_suppression_threshold=0.3)
# Create instance of the face detector and load an image
with FaceDetector.create_from_options(options) as detector:
    img = cv2.imread("Media/Person.jpg")
    mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=img)
    face_detector_result = detector.detect(mp_image)

In [None]:
face_detector_result

DetectionResult(detections=[Detection(bounding_box=BoundingBox(origin_x=333, origin_y=129, width=161, height=161), categories=[Category(index=0, score=0.8582574129104614, display_name=None, category_name=None)], keypoints=[NormalizedKeypoint(x=0.458046555519104, y=0.3762951195240021, label='', score=0.0), NormalizedKeypoint(x=0.5414471626281738, y=0.37260696291923523, label='', score=0.0), NormalizedKeypoint(x=0.5043702125549316, y=0.4522835314273834, label='', score=0.0), NormalizedKeypoint(x=0.504838764667511, y=0.5268869400024414, label='', score=0.0), NormalizedKeypoint(x=0.4082053303718567, y=0.4142933785915375, label='', score=0.0), NormalizedKeypoint(x=0.5878703594207764, y=0.400863379240036, label='', score=0.0)])])

In [None]:
# Get the bounding box of the first face:
x = face_detector_result.detections[0].bounding_box.origin_x
y = face_detector_result.detections[0].bounding_box.origin_y
height = face_detector_result.detections[0].bounding_box.height
width = face_detector_result.detections[0].bounding_box.width

In [None]:
# Draw the bounding box on the image:
cv2.rectangle(img, (int(x), int(y)), (int(x + width), int(y + height)), (0, 255, 0), 2) # The first coordinate is the top-left corner of the bounding box, 
                                                                                        # and the second coordinate is the bottom-right corner of the bounding box
cv2.imshow("Face Detection", img)
cv2.waitKey(0)

cv2.destroyAllWindows()

**Face Detection on a video**

In [None]:
import mediapipe as mp

BaseOptions = mp.tasks.BaseOptions
FaceDetector = mp.tasks.vision.FaceDetector
FaceDetectorOptions = mp.tasks.vision.FaceDetectorOptions
VisionRunningMode = mp.tasks.vision.RunningMode

# Create a face detector instance with the video mode:
options = FaceDetectorOptions(
    base_options=BaseOptions(model_asset_path='Models/blaze_face_short_range.tflite'),
    running_mode=VisionRunningMode.IMAGE,
    min_detection_confidence=0.5,
    min_suppression_threshold=0.3)
with FaceDetector.create_from_options(options) as detector:
    vid = cv2.VideoCapture("Media/video_1.mp4") # Load the video

    fps = vid.get(cv2.CAP_PROP_FPS) # Get the frame rate of the video

    while True:
        success, frame = vid.read() # Read a frame from the video
        if not success:
            break

        mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame) # Create a MediaPipe Image object from the OpenCV image

        face_detector_result = detector.detect(mp_image) 

        if face_detector_result.detections: # If faces are detected
            for detection in face_detector_result.detections: # Loop through the detected faces 
                # Get the coordinates of the face:
                x = detection.bounding_box.origin_x
                y = detection.bounding_box.origin_y
                height = detection.bounding_box.height
                width = detection.bounding_box.width
                cv2.rectangle(frame, (int(x), int(y)), (int(x + width), int(y + height)), (0, 255, 0), 2)
        cv2.imshow("Face Detection", frame)
        if cv2.waitKey(int(1000/fps)) & 0xFF == ord('q'):
            break

    vid.release()
    cv2.destroyAllWindows()

# Text Extracting

In [None]:
# Install pytesseract via this link: 
# https://github.com/UB-Mannheim/tesseract/wiki
# Add the path to the tesseract executable to the system environment variable PATH

In [None]:
%pip install tesseract 
%pip install pytesseract 

In [29]:
# Text Extraction with pytesseract 
import pytesseract
import cv2
pytesseract.pytesseract.tesseract_cmd = 'C:/Program Files/Tesseract-OCR/tesseract.exe' # Path to the tesseract executable

img = cv2.imread("Media/Book Paragraph.jpg")

gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

text = pytesseract.image_to_string(gray) 

print(text)

cv2.imshow("Adaptive Threshold", gray)
cv2.waitKey(0)

cv2.destroyAllWindows()



