In [1]:
import torch
import torchvision
from torchvision.transforms import functional as F
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from PIL import Image
import cv2
import numpy as np
import IPython.display as display
import torchvision.transforms as T
from ultralytics import YOLO
from torchvision import transforms
import torch.nn as nn

In [2]:
class CNN_Model(nn.Module):
    def __init__(self):
        super(CNN_Model, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1) # 3 x 128 x 128 => 64 x 128 x 128 + pooling
        self.conv_bn1 = nn.BatchNorm2d(64)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1) # 64 x 64 x 64 => 128 x 64 x 64 + pooling
        self.conv_bn2 = nn.BatchNorm2d(128)
        self.conv3 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1) # 128 x 32 x 32 => 256 x 32 x 32 + final pooling = 256 x 16 x 16
        self.conv_bn3 = nn.BatchNorm2d(256)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.conv_dropout = nn.Dropout2d(0.15)
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(256 * 16 * 16, 128)
        self.bn1 = nn.BatchNorm1d(128)
        self.fc2 = nn.Linear(128, 64)
        self.bn2 = nn.BatchNorm1d(64)
        self.fc3 = nn.Linear(64, 32)
        self.bn3 = nn.BatchNorm1d(32)
        self.fc4 = nn.Linear(32, 1)
        self.fc_dropout = nn.Dropout(0.3)
        
        #init.kaiming_uniform_(self.fc1.weight)
        #init.kaiming_uniform_(self.fc2.weight)
        #init.kaiming_uniform_(self.fc3.weight)
        #init.kaiming_uniform_(self.fc4.weight)
    
    def forward(self, x):
        x = self.conv_bn1(nn.functional.leaky_relu(self.pool(self.conv1(x))))
        x = self.conv_bn2(nn.functional.leaky_relu(self.pool(self.conv2(x))))
        x = self.conv_dropout(self.conv_bn3(nn.functional.leaky_relu(self.pool(self.conv3(x)))))
        x = self.flatten(x)
        x = self.fc_dropout(self.bn1(nn.functional.leaky_relu(self.fc1(x))))
        x = self.fc_dropout(self.bn2(nn.functional.leaky_relu(self.fc2(x))))
        x = self.fc_dropout(self.bn3(nn.functional.leaky_relu(self.fc3(x))))
        x = self.fc4(x)
        return x

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
network = CNN_Model().to(device)
network.load_state_dict(torch.load("best_model.pth"))
network.eval()

CNN_Model(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv_bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv_bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv_bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv_dropout): Dropout2d(p=0.15, inplace=False)
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc1): Linear(in_features=65536, out_features=128, bias=True)
  (bn1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc2): Linear(in_features=128, out_features=64, bias=True)
  (bn2): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_runnin

In [4]:
transform_for_age = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor()
])

In [5]:
model = YOLO('yolov8n-face.pt')
model = model.to(device)
model.eval()

YOLO(
  (model): DetectionModel(
    (model): Sequential(
      (0): Conv(
        (conv): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(16, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (act): SiLU(inplace=True)
      )
      (1): Conv(
        (conv): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(32, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (act): SiLU(inplace=True)
      )
      (2): C2f(
        (cv1): Conv(
          (conv): Conv2d(32, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn): BatchNorm2d(32, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
          (act): SiLU(inplace=True)
        )
        (cv2): Conv(
          (conv): Conv2d(48, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn): BatchNorm2d(32, eps=0.001, momentum=0.03, affine=True, track_running_s

In [6]:
def get_human_boxes(prediction, treshold = 0.6):
    boxes = prediction[0].boxes.cpu().numpy()
    size = boxes.conf.size
    
    cls = np.array(boxes.cls, dtype = int)
    conf = np.array(boxes.conf)
    xyxy = np.array(boxes.xyxy, dtype = int)
    
    # print(conf)
    # print(cls)
    # print(xyxy)

    result = []

    for i in range(size):
        if(conf[i] > treshold and cls[i] == 0):
            result.append(xyxy[i])

    return result

In [13]:
def show_camera_stream():
    stream = cv2.VideoCapture(0)
    if not stream.isOpened():
        print('Camera not found :(')
        exit()
    
    transform = T.ToTensor()
    while(True):
        
        ret, BGR_frame = stream.read()
        frame = cv2.cvtColor(BGR_frame, cv2.COLOR_BGR2RGB)
        pil_frame = Image.fromarray(frame)
        frame_tensor = transform(frame).unsqueeze(0).to(device)
        prediction = model(frame_tensor)
    
        boxes = get_human_boxes(prediction)
    
        for box in boxes:
            box[0] = max(box[0]-40, 0)
            box[1] = max(box[1]-40, 0)
            box[2] = min(box[2]+40, 640)
            box[3] = min(box[3]+40, 480)
            cv2.rectangle(BGR_frame, (box[0], box[1]), (box[2], box[3]), (255, 0, 0), 2)
            
            isolated_object_frame = BGR_frame[box[1]:box[3], box[0]:box[2]]
            RGB_frame = cv2.cvtColor(isolated_object_frame, cv2.COLOR_BGR2RGB)
            frame_img = Image.fromarray(RGB_frame)
            frame_tensor = transform_for_age(frame_img).unsqueeze(0).to(device)
            result = int(network(frame_tensor).cpu().detach().numpy().item())
            cv2.putText(BGR_frame, f"Age: {result}", (box[0], box[1]), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (36,255,12), 2)
            #cv2.imshow('Isolated object', isolated_object_frame)
    
        if not ret:
            print('No more stream')
            break
            
        if cv2.waitKey(1) == ord('q'):
            break
            
        cv2.imshow('Webcam', BGR_frame)
    
    stream.release()
    cv2.destroyAllWindows()

In [14]:
show_camera_stream()


0: 480x640 1 face, 9.1ms
Speed: 0.0ms preprocess, 9.1ms inference, 3.2ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 face, 8.1ms
Speed: 0.0ms preprocess, 8.1ms inference, 5.1ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 face, 12.4ms
Speed: 0.0ms preprocess, 12.4ms inference, 3.1ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 face, 10.4ms
Speed: 0.0ms preprocess, 10.4ms inference, 3.6ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 face, 12.0ms
Speed: 0.0ms preprocess, 12.0ms inference, 4.8ms postprocess per image at shape (1, 3, 480, 640)


QObject::moveToThread: Current thread (0x3b3ddf00) is not the object's thread (0x3b53e5b0).
Cannot move to target thread (0x3b3ddf00)

QObject::moveToThread: Current thread (0x3b3ddf00) is not the object's thread (0x3b53e5b0).
Cannot move to target thread (0x3b3ddf00)

QObject::moveToThread: Current thread (0x3b3ddf00) is not the object's thread (0x3b53e5b0).
Cannot move to target thread (0x3b3ddf00)

QObject::moveToThread: Current thread (0x3b3ddf00) is not the object's thread (0x3b53e5b0).
Cannot move to target thread (0x3b3ddf00)

QObject::moveToThread: Current thread (0x3b3ddf00) is not the object's thread (0x3b53e5b0).
Cannot move to target thread (0x3b3ddf00)

QObject::moveToThread: Current thread (0x3b3ddf00) is not the object's thread (0x3b53e5b0).
Cannot move to target thread (0x3b3ddf00)

QObject::moveToThread: Current thread (0x3b3ddf00) is not the object's thread (0x3b53e5b0).
Cannot move to target thread (0x3b3ddf00)

QObject::moveToThread: Current thread (0x3b3ddf00) is n


0: 480x640 1 face, 8.0ms
Speed: 0.0ms preprocess, 8.0ms inference, 2.9ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 face, 12.1ms
Speed: 0.0ms preprocess, 12.1ms inference, 1.8ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 face, 8.4ms
Speed: 0.0ms preprocess, 8.4ms inference, 2.9ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 face, 13.5ms
Speed: 0.0ms preprocess, 13.5ms inference, 2.6ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 face, 11.5ms
Speed: 0.0ms preprocess, 11.5ms inference, 2.6ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 face, 20.2ms
Speed: 0.0ms preprocess, 20.2ms inference, 4.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 face, 21.0ms
Speed: 0.0ms preprocess, 21.0ms inference, 4.6ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 face, 25.3ms
Speed: 0.0ms preprocess, 25.3ms inference, 3.4ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 