In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.applications import VGG16


# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:

pip install ultralytics


**IMPORTS**

In [None]:
# System libraries
import os
import random
import yaml
from PIL import Image
from tqdm import tqdm




# Data analytics and visualisations
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle

# PyTorch
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, Subset
from torchvision import transforms as T
from torchinfo import summary

#CV2
import os
import numpy as np
import cv2
from tensorflow import keras
from tensorflow.keras import layers

# Data exploration

In [None]:
root = '/kaggle/input/wider-face-a-face-detection-benchmark/'
train_img_folder = f"{root}wider_train/WIDER_train/images/"
val_img_folder = f"{root}wider_val/WIDER_val/images/"
annotations_folder = f"{root}wider_face_split/wider_face_split/"

In [None]:
import os



# Get a list of all subfolders within the main folder
subfolders = [f for f in os.listdir(train_img_folder) if os.path.isdir(os.path.join(train_img_folder, f))]

# Print the names of the subfolders
for folder in subfolders:
    print(folder)


In [None]:
import os
import cv2
import matplotlib.pyplot as plt

# Path to the folder containing the images
image_folder = '/kaggle/input/wider-face-a-face-detection-benchmark/WIDER_train/WIDER_train/images/42--Car_Racing'

# Get a list of image file names in the folder
image_files = [f for f in os.listdir(image_folder) if os.path.isfile(os.path.join(image_folder, f))]

# Display basic statistics of the dataset
print("Total number of images:", len(image_files))

# Plot a sample of images
num_samples = 5

fig, axes = plt.subplots(1, num_samples, figsize=(15, 5))

for i in range(num_samples):
    image_path = os.path.join(image_folder, image_files[i])
    img = cv2.imread(image_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    axes[i].imshow(img)
    axes[i].axis('off')

plt.tight_layout()
plt.show()


In [None]:
img = Image.open(f"{train_img_folder}0--Parade/0_Parade_Parade_0_4.jpg")
plt.imshow(img)

In [None]:
input_size = (224, 224)

In [None]:
image = cv2.imread("/kaggle/input/wider-face-a-face-detection-benchmark/WIDER_val/WIDER_val/images/0--Parade/0_Parade_Parade_0_120.jpg")
image = cv2.resize(image, input_size)
image = image / 255.0  # Normalize pixel values
plt.imshow(image)

In [None]:
from PIL import Image

img = Image.open(f"{train_img_folder}0--Parade/0_Parade_Parade_0_4.jpg")
image_size = img.size

print("Image Size:", image_size)

In [None]:
os.listdir(annotations_folder)


In [None]:
train_bbx_path = f"{annotations_folder}wider_face_train_bbx_gt.txt"
val_bbx_path = f"{annotations_folder}wider_face_val_bbx_gt.txt"

**bounding box**

In [None]:
def load_bbx(bbx_path):
    with open(bbx_path, mode='r') as file:
        lines = file.readlines()
        
    annotations = {}
    i = 0
    while i < len(lines):
        file_name = lines[i].strip()
        i += 1
        num_boxes = int(lines[i].strip())
        i += 1
        boxes = []
        for _ in range(num_boxes):
            box_info = lines[i].strip().split()
            box = {
                'x': int(box_info[0]),
                'y': int(box_info[1]),
                'w': int(box_info[2]),
                'h': int(box_info[3]),
            }
            boxes.append(box)
            i += 1
        annotations[file_name] = boxes

    return annotations

In [None]:
train_annotations = load_bbx(train_bbx_path)
val_annotations = load_bbx(val_bbx_path)

train_keys = []
val_keys = []
for key in train_annotations.keys():
    train_keys.append(key)
for key in val_annotations.keys():
    val_keys.append(key)

**convert the dataset into a dataframe**

In [None]:
def annotation_to_df(annotation, img_shape):
    cs, xs, ys, ws, hs = [], [], [], [], []
    
    h, w, _ = img_shape
    for box in annotation:
        cs.append(0)
        xs.append((box["x"] + box["w"] / 2.0) / w)
        ys.append((box["y"] + box["h"] / 2.0) / h)
        ws.append(box["w"] / w)
        hs.append(box["h"] / h)
        
    return pd.DataFrame({0:cs, 1:xs, 2:ys, 3:ws, 4:hs})

**BoundingBoxesplotting**

In [None]:
def plot_boxes(img, df):
    h, w, _ = img.shape
    
    fig, ax = plt.subplots()
    for index, row in df.iterrows():
        patch = Rectangle(
            ((row[1] - row[3] / 2.0) * w, (row[2] - row[4] / 2.0) * h),
            row[3] * w,
            row[4] * h,
            edgecolor = 'red',
            fill=False,
        )
        ax.add_patch(patch)
    
    plt.imshow(img)

In [None]:
# Sample plot
key = train_keys[random.randint(0, len(train_keys) - 1)]
img = np.array(Image.open(f"{train_img_folder}{key}"))
df = annotation_to_df(train_annotations[key], img.shape)
plot_boxes(img, df)

# transforming the dataset 

In [None]:
def add_dataset(keys, annotations, img_folder, root, split):
    if not os.path.exists(root):
        os.makedirs(root)
    if not os.path.exists(f"{root}images/{split}"):
        os.makedirs(f"{root}images/{split}")
        os.makedirs(f"{root}labels/{split}")
    
    for i, key in enumerate(keys):
        img = np.array(Image.open(f"{img_folder}{key}"))
        Image.fromarray(img).save(f"{root}images/{split}/im{i}.jpg")
        df = annotation_to_df(annotations[key], img.shape)
        df.to_csv(f"{root}labels/{split}/im{i}.txt", header=False, index=False, sep='\t')

In [None]:
len(train_keys), len(val_keys)


In [None]:
add_dataset(train_keys, train_annotations, train_img_folder, "/kaggle/working/datasets/faceset/", "train")


In [None]:
add_dataset(val_keys, val_annotations, val_img_folder, "/kaggle/working/datasets/faceset/", "val")


In [None]:
dict_file = {"path":"/kaggle/working/datasets/faceset",
             "train":"images/train",
             "val":"images/val",
             "nc":0,
             "names": ["face"]}

with open("/kaggle/working/dataset.yaml", 'w') as file:
    documents = yaml.dump(dict_file, file)

********************************************************************************************************


In [None]:
!git clone https://github.com/ultralytics/yolov5
%cd yolov5
%pip install -r "requirements.txt

# training

In [None]:
!python train.py --img 640 --epochs 12 --batch-size 32 --data /kaggle/working/dataset.yaml --weights yolov5s.pt


In [None]:
custom_yolov5 = torch.hub.load('/kaggle/working/yolov5', 'custom', path='runs/train/exp/weights/best.pt', force_reload=True, source='local')

In [None]:
# Changing settings to prevent finding the faces multiple times
custom_yolov5.conf = 0.5
custom_yolov5.iou = 0.3

In [None]:
output = custom_yolov5(f"{train_img_folder}{train_keys[5]}")
output.show()

In [None]:
output = custom_yolov5("/kaggle/input/face123/image.jpg")
output.show()

In [None]:
output = custom_yolov5("/kaggle/input/aaaaa-and/12121.jpg")
output.show()

********************************************************************************************************

********************************************************************************************************