<a href="https://colab.research.google.com/github/bhattacharya5/Computer_Vision/blob/main/Major_ComputerVision_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install dlib
!pip install opencv-contrib-python
!pip install imutils



In [2]:
import os
import requests
import tarfile

# URL of the file to download
url = "http://dlib.net/files/data/ibug_300W_large_face_landmark_dataset.tar.gz"

# Function to download and extract the file
def download_and_extract(url, target_folder):
    # Create the target folder if it doesn't exist
    if not os.path.exists(target_folder):
        os.makedirs(target_folder)

    # Download the file
    file_name = url.split('/')[-1]
    file_path = os.path.join(target_folder, file_name)
    with open(file_path, "wb") as f:
        response = requests.get(url)
        f.write(response.content)

    # Extract the contents
    with tarfile.open(file_path, "r:gz") as tar:
        tar.extractall(target_folder)

    print("File downloaded and extracted successfully!")

# Specify the target folder
target_folder = "./ibug_dataset"

# Call the function to download and extract the file
download_and_extract(url, target_folder)

File downloaded and extracted successfully!


In [None]:
import os

def tree(directory):
    print(directory)
    print("|")
    for root, dirs, files in os.walk(directory):
        #for file in files:
        #    print("|--", file)
        for dir in dirs:
            tree(os.path.join(root, dir))

# Specify the directory you want to view
directory = "."  # Current directory

# Call the function to display the tree
tree(directory)

.
|
./.config
|
./.config/configurations
|
./.config/logs
|
./.config/logs/2024.04.16
|
./.config/logs/2024.04.16
|
./ibug_dataset
|
./ibug_dataset/ibug_300W_large_face_landmark_dataset
|
./ibug_dataset/ibug_300W_large_face_landmark_dataset/helen
|
./ibug_dataset/ibug_300W_large_face_landmark_dataset/helen/testset
|
./ibug_dataset/ibug_300W_large_face_landmark_dataset/helen/trainset
|
./ibug_dataset/ibug_300W_large_face_landmark_dataset/lfpw
|
./ibug_dataset/ibug_300W_large_face_landmark_dataset/lfpw/testset
|
./ibug_dataset/ibug_300W_large_face_landmark_dataset/lfpw/trainset
|
./ibug_dataset/ibug_300W_large_face_landmark_dataset/afw
|
./ibug_dataset/ibug_300W_large_face_landmark_dataset/ibug
|
./ibug_dataset/ibug_300W_large_face_landmark_dataset/helen/testset
|
./ibug_dataset/ibug_300W_large_face_landmark_dataset/helen/trainset
|
./ibug_dataset/ibug_300W_large_face_landmark_dataset/lfpw/testset
|
./ibug_dataset/ibug_300W_large_face_landmark_dataset/lfpw/trainset
|
./ibug_dataset/ibug_

## Building an “eyes only” shape predictor dataset

In [12]:
import re

# Define the input and output paths directly here
input_path = "/content/ibug_dataset/ibug_300W_large_face_landmark_dataset/labels_ibug_300W.xml"
output_path = "/content/ibug_dataset/ibug_300W_large_face_landmark_dataset/output1.xml"

def parse_xml(input_path, output_path):
  # in the iBUG 300-W dataset, each (x, y)-coordinate maps to a specific
  # facial feature (i.e., eye, mouth, nose, etc.) -- in order to train a
  # dlib shape predictor on *just* the eyes, we must first define the
  # integer indexes that belong to the eyes
  LANDMARKS = set(list(range(36, 48)))

  # to easily parse out the eye locations from the XML file we can
  # utilize regular expressions to determine if there is a 'part'
  # element on any given line
  PART = re.compile("part name='[0-9]+'")

  # load the contents of the original XML file and open the output file
  # for writing
  print("[INFO] parsing data split XML file...")
  with open(input_path, "r") as file:
      rows = file.readlines()

  with open(output_path, "w") as output:
      # loop over the rows of the data split file
      for row in rows:
          # check to see if the current line has the (x, y)-coordinates for
          # the facial landmarks we are interested in
          parts = re.findall(PART, row)

          # if there is no information related to the (x, y)-coordinates of
          # the facial landmarks, we can write the current line out to disk
          # with no further modifications
          if len(parts) == 0:
              output.write(row)
          # otherwise, there is annotation information that we must process
          else:
              # parse out the name of the attribute from the row
              attr = "name='"
              i = row.find(attr)
              j = row.find("'", i + len(attr) + 1)
              name = int(row[i + len(attr):j])

              # if the facial landmark name exists within the range of our
              # indexes, write it to our output file
              if name in LANDMARKS:
                  output.write(row)

  print("Processing completed. Output saved to:", output_path)


parse_xml (input_path, output_path)

[INFO] parsing data split XML file...
Processing completed. Output saved to: /content/ibug_dataset/ibug_300W_large_face_landmark_dataset/output1.xml


## Creating our training and testing splits

In [13]:
# Paths to the input and output files
train_input_path = "/content/ibug_dataset/ibug_300W_large_face_landmark_dataset/labels_ibug_300W_train.xml"
train_output_path = "/content/ibug_dataset/ibug_300W_large_face_landmark_dataset/labels_ibug_300W_train_eyes.xml"
test_input_path = "/content/ibug_dataset/ibug_300W_large_face_landmark_dataset/labels_ibug_300W_test.xml"
test_output_path = "/content/ibug_dataset/ibug_300W_large_face_landmark_dataset/labels_ibug_300W_test_eyes.xml"

# Parsing train data
parse_xml(train_input_path, train_output_path)

# Parsing test data
parse_xml(test_input_path, test_output_path)

[INFO] parsing data split XML file...
Processing completed. Output saved to: /content/ibug_dataset/ibug_300W_large_face_landmark_dataset/labels_ibug_300W_train_eyes.xml
[INFO] parsing data split XML file...
Processing completed. Output saved to: /content/ibug_dataset/ibug_300W_large_face_landmark_dataset/labels_ibug_300W_test_eyes.xml


## Implementing our custom dlib shape predictor training script

In [14]:
import multiprocessing
import dlib

def train_shape_predictor(training_xml, model_path):
    # grab the default options for dlib's shape predictor
    print("[INFO] setting shape predictor options...")
    options = dlib.shape_predictor_training_options()

    # Hyperparameters to tune:
    # tree_depth: Depth of each regression tree
    options.tree_depth = 4

    # nu: Regularization parameter
    options.nu = 0.1

    # cascade_depth: Number of cascades used to train the shape predictor
    options.cascade_depth = 15

    # feature_pool_size: Number of pixels used to generate features for the random trees at each cascade
    options.feature_pool_size = 400

    # num_test_splits: Number of test splits
    options.num_test_splits = 50

    # oversampling_amount: Controls the amount of "jitter" (data augmentation) when training the shape predictor
    options.oversampling_amount = 5

    # oversampling_translation_jitter: Amount of translation jitter to apply
    options.oversampling_translation_jitter = 0.1

    # be_verbose: Whether to print out status messages during training
    options.be_verbose = True

    # num_threads: Number of threads/CPU cores to be used when training
    options.num_threads = multiprocessing.cpu_count()

    # Log the training options
    print("[INFO] Shape predictor options:")
    print(options)

    # Train the shape predictor
    print("[INFO] Training shape predictor...")
    dlib.train_shape_predictor(training_xml, model_path, options)
    print("[INFO] Training completed.")

# Paths to training data and model output
training_xml = "/content/ibug_dataset/ibug_300W_large_face_landmark_dataset/labels_ibug_300W_train_eyes.xml"
model_path = "/content/ibug_dataset/ibug_300W_large_face_landmark_dataset/custom_shape_predictor.dat"

# Train the shape predictor
train_shape_predictor(training_xml, model_path)


[INFO] setting shape predictor options...
[INFO] Shape predictor options:
shape_predictor_training_options(be_verbose=1, cascade_depth=15, tree_depth=4, num_trees_per_cascade_level=500, nu=0.1, oversampling_amount=5, oversampling_translation_jitter=0.1, feature_pool_size=400, lambda_param=0.1, num_test_splits=50, feature_pool_region_padding=0, random_seed=, num_threads=2, landmark_relative_padding_mode=1)
[INFO] Training shape predictor...
[INFO] Training completed.


In [None]:
'''
import multiprocessing
import dlib
import torch
import xml.etree.ElementTree as ET

# Check if GPU is available
use_gpu = torch.cuda.is_available()

if use_gpu:
    dlib.DLIB_USE_CUDA = True  # Set DLIB_USE_CUDA to enable GPU usage
    print("[INFO] GPU is available. Training on GPU...")
else:
    print("[INFO] GPU is not available. Training on CPU...")


def read_training_data(training_xml):
    # Parse the XML file to extract image file paths, bounding boxes, and landmarks
    tree = ET.parse(training_xml)
    root = tree.getroot()

    data = []
    for image in root.findall('.//image'):
        file_path = image.attrib['file']
        box_attrib = image.find('box').attrib
        box = (int(box_attrib['top']), int(box_attrib['left']), int(box_attrib['width']), int(box_attrib['height']))
        landmarks = [(int(part.attrib['x']), int(part.attrib['y'])) for part in image.findall('.//part')]
        data.append((file_path, box, landmarks))

    return data

def train_shape_predictor(training_data, model_path, batch_size):
    # Grab the default options for dlib's shape predictor
    print("[INFO] Setting shape predictor options...")
    options = dlib.shape_predictor_training_options()

    # Hyperparameters to tune:
    # tree_depth: Depth of each regression tree
    options.tree_depth = 4

    # nu: Regularization parameter
    options.nu = 0.1

    # cascade_depth: Number of cascades used to train the shape predictor
    options.cascade_depth = 15

    # feature_pool_size: Number of pixels used to generate features for the random trees at each cascade
    options.feature_pool_size = 400

    # num_test_splits: Number of test splits
    options.num_test_splits = 50

    # oversampling_amount: Controls the amount of "jitter" (data augmentation) when training the shape predictor
    options.oversampling_amount = 5

    # oversampling_translation_jitter: Amount of translation jitter to apply
    options.oversampling_translation_jitter = 0.1

    # be_verbose: Whether to print out status messages during training
    options.be_verbose = True

    # num_threads: Number of threads/CPU cores to be used when training
    if use_gpu:
        options.num_threads = 1  # GPU training only supports 1 thread
    else:
        options.num_threads = multiprocessing.cpu_count()

    # Log the training options
    print("[INFO] Shape predictor options:")
    print(options)
    print("batch_size - ", batch_size, "len(training_data) - ", len(training_data))

    # Split training data into batches
    for i in range(0, len(training_data), batch_size):
        batch = training_data[i:i + batch_size]

        images = [x[0] for x in batch]
        boxes = [x[1] for x in batch]
        landmarks = [x[2] for x in batch]

        # Create object detections for each image in the batch
        object_detections = []
        for box, landmark in zip(boxes, landmarks):
            print('landmark - ', landmark)
            rect = dlib.rectangle(
                left=box[1],
                top=box[0],
                right=box[1] + box[3],
                bottom=box[0] + box[2]
            )
            for part in landmark:
              print('dlib.points ', [(int(part[0]), int(part[1])) for part in landmark])
              dlib.points.append((int(part[0]), int(part[1])) for part in landmark)
            points = dlib.points
            object_detections.append(dlib.full_object_detection(rect, points))

        # Train the shape predictor on the current batch
        print(f"[INFO] Training batch {i // batch_size + 1}...")
        dlib.train_shape_predictor(images, object_detections, model_path, options)
        print(f"[INFO] Training batch {i // batch_size + 1} completed.")

    print("[INFO] Training completed.")


# Path to training data XML file and model output
training_xml = "/content/ibug_dataset/ibug_300W_large_face_landmark_dataset/labels_ibug_300W_train_eyes.xml"
model_path = "/content/ibug_dataset/ibug_300W_large_face_landmark_dataset/custom_shape_predictor.dat"

# Read training data from XML file
training_data = read_training_data(training_xml)

# Define batch size
batch_size = 100  # Adjust this value based on your memory constraints

# Train the shape predictor
train_shape_predictor(training_data, model_path, batch_size)
'''

## Implementing our shape predictor evaluation

In [19]:
import dlib

# Set the paths directly
predictor_path = "/content/ibug_dataset/ibug_300W_large_face_landmark_dataset/custom_shape_predictor.dat"
xml_path = "/content/ibug_dataset/ibug_300W_large_face_landmark_dataset/labels_ibug_300W_test_eyes.xml"

# Compute the error over the supplied data split
print("[INFO] evaluating shape predictor...")
error = dlib.test_shape_predictor(xml_path, predictor_path)
print("[INFO] error: {}".format(error))


[INFO] evaluating shape predictor...
[INFO] error: 8.496729333248092


### shape predictor inference

In [17]:
import cv2
import dlib
from imutils.video import VideoStream
from imutils import face_utils
import imutils
import time

# Set the path to the shape predictor
shape_predictor_path = "/content/ibug_dataset/ibug_300W_large_face_landmark_dataset/custom_shape_predictor.dat"

# Load the shape predictor
print("[INFO] loading facial landmark predictor...")
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor(shape_predictor_path)

# Initialize the video stream and allow the camera sensor to warm up
print("[INFO] camera sensor warming up...")
vs = VideoStream(src=0).start()
time.sleep(2.0)

# Loop over the frames from the video stream
while True:
    # Grab the frame from the video stream, resize it to have a
    # maximum width of 400 pixels, and convert it to grayscale
    frame = vs.read()
    frame = imutils.resize(frame, width=400)
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    # Detect faces in the grayscale frame
    rects = detector(gray, 0)

    # Loop over the face detections
    for rect in rects:
        # Convert the dlib rectangle into an OpenCV bounding box and
        # draw a bounding box surrounding the face
        (x, y, w, h) = face_utils.rect_to_bb(rect)
        cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)

        # Use our custom dlib shape predictor to predict the location
        # of our landmark coordinates, then convert the prediction to
        # an easily parsable NumPy array
        shape = predictor(gray, rect)
        shape = face_utils.shape_to_np(shape)

        # Loop over the (x, y)-coordinates from our dlib shape
        # predictor model and draw them on the image
        for (sX, sY) in shape:
            cv2.circle(frame, (sX, sY), 1, (0, 0, 255), -1)

    # Show the frame
    cv2.imshow("Frame", frame)
    key = cv2.waitKey(1) & 0xFF
    # If the `q` key was pressed, break from the loop
    if key == ord("q"):
        break

# Do a bit of cleanup
cv2.destroyAllWindows()
vs.stop()


[INFO] loading facial landmark predictor...
[INFO] camera sensor warming up...


AttributeError: 'NoneType' object has no attribute 'shape'