# **1. 設定**

# 1.1 連線到 Google Drive

In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


# 1.2 安裝YOLO v7 所需檔案及library

In [2]:
%%bash
cd /content/gdrive/MyDrive
git clone https://github.com/WongKinYiu/yolov7.git
cd yolov7
wget https://raw.githubusercontent.com/WongKinYiu/yolov7/u5/requirements.txt
pip install -r requirements.txt

Collecting numpy<1.24.0,>=1.18.5 (from -r requirements.txt (line 5))
  Downloading numpy-1.23.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.1 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 17.1/17.1 MB 44.1 MB/s eta 0:00:00
Collecting thop (from -r requirements.txt (line 36))
  Downloading thop-0.1.1.post2209072238-py3-none-any.whl (15 kB)
Collecting jedi>=0.16 (from ipython->-r requirements.txt (line 34))
  Downloading jedi-0.19.1-py2.py3-none-any.whl (1.6 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.6/1.6 MB 59.4 MB/s eta 0:00:00
Installing collected packages: numpy, jedi, thop
  Attempting uninstall: numpy
    Found existing installation: numpy 1.25.2
    Uninstalling numpy-1.25.2:
      Successfully uninstalled numpy-1.25.2
Successfully installed jedi-0.19.1 numpy-1.23.5 thop-0.1.1.post2209072238


fatal: destination path 'yolov7' already exists and is not an empty directory.
--2024-02-27 12:57:11--  https://raw.githubusercontent.com/WongKinYiu/yolov7/u5/requirements.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1032 (1.0K) [text/plain]
Saving to: ‘requirements.txt.49’

     0K .                                                     100% 55.0M=0s

2024-02-27 12:57:11 (55.0 MB/s) - ‘requirements.txt.49’ saved [1032/1032]

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
chex 0.1.85 requires numpy>=1.24.1, but you have numpy 1.23.5 which is incompatible.


In [3]:
import os
import sys
sys.path.append('/content/gdrive/MyDrive/yolov7')

In [4]:
cd /content/gdrive/MyDrive/yolov7

/content/gdrive/MyDrive/yolov7


In [5]:
if not os.path.isdir("/content/gdrive/MyDrive/yolov7/weights"):
  os.makedirs("/content/gdrive/MyDrive/yolov7/weights")

# 1.3 import所需library和定義補邊的函式

In [6]:
import argparse
import time
from pathlib import Path
import cv2
import torch
import numpy as np
import torch.backends.cudnn as cudnn
from numpy import random

from models.experimental import attempt_load
from utils.datasets import LoadStreams, LoadImages
from utils.general import check_img_size, check_requirements, check_imshow, non_max_suppression, apply_classifier, \
    scale_coords, xyxy2xywh, strip_optimizer, set_logging, increment_path
from utils.plots import plot_one_box
from utils.torch_utils import select_device, load_classifier, time_synchronized, TracedModel


def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32):
    shape = img.shape[:2]
    #確保輸入的new_shape類型為int
    if isinstance(new_shape, int):
        new_shape = (new_shape, new_shape)

    # 計算長寬放大比例
    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
    if not scaleup:
        r = min(r, 1.0)

    # 將原本圖片長寬乘上比例 並計算要求的長寬與乘上比例後的原圖長寬差多少 補上餘數後就是等比例縮小
    ratio = r, r
    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
    # 算出要補多少
    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]
    if auto:  # 算出最少需要補多少的dw, dh
        dw, dh = np.mod(dw, stride), np.mod(dh, stride)   # np.mod取餘數
    elif scaleFill:
        dw, dh = 0.0, 0.0
        new_unpad = (new_shape[1], new_shape[0])
        ratio = new_shape[1] / shape[1], new_shape[0] / shape[0]
    dw /= 2  # 分成兩邊
    dh /= 2

    if shape[::-1] != new_unpad:  # 調整大小
        img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
    img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # 補邊
    return img, ratio, (dw, dh)



# **1.4 重要參數**

In [7]:
opt  = {
    "weights": "weights/best_50_detect.pt",
    "yaml"   : "data/data.yaml",
    "img-size": 640,
    "conf-thres": 0.35, # 準確度要達到設定的閥值才顯示
    "iou-thres" : 0.45, # 設定重疊顯示的閥值
    "device" : '0', # 使用GPU
    "classes" : None
}

# **2. 使用webcam辨識**

In [8]:
from IPython.display import display, Javascript, Image
from google.colab.output import eval_js
from google.colab.patches import cv2_imshow
from base64 import b64decode, b64encode
import PIL
import io
import html
# 將JavaScript 物件轉換為OpenCV格式的圖片
def js_to_image(js_reply):

  # base64 的圖片
  image_bytes = b64decode(js_reply.split(',')[1])
  # 轉換為 numpy array
  jpg_as_np = np.frombuffer(image_bytes, dtype=np.uint8)
  # 將 numpy array 轉換成 OpenCV BGR
  img = cv2.imdecode(jpg_as_np, flags=1)

  return img

# 將偵測框從OpenCV 轉換為base64 byte字串以便即時覆蓋在相機上
def bbox_to_bytes(bbox_array):
  # 將 array 轉為 PIL image
  bbox_PIL = PIL.Image.fromarray(bbox_array, 'RGBA')
  iobuf = io.BytesIO()
  # PIL image 儲存成 png 以便後續處理
  bbox_PIL.save(iobuf, format='png')
  # 回傳base64 byte字串給bbox_bytes變數
  bbox_bytes = 'data:image/png;base64,{}'.format((str(b64encode(iobuf.getvalue()), 'utf-8')))

  return bbox_bytes


# 使用Javascript啟動相機
def video_stream():
  js = Javascript('''
    var video;
    var div = null;
    var stream;
    var captureCanvas;
    var imgElement;
    var labelElement;

    var pendingResolve = null;
    var shutdown = false;

    function removeDom() {
       stream.getVideoTracks()[0].stop();
       video.remove();
       div.remove();
       video = null;
       div = null;
       stream = null;
       imgElement = null;
       captureCanvas = null;
       labelElement = null;
    }

    function onAnimationFrame() {
      if (!shutdown) {
        window.requestAnimationFrame(onAnimationFrame);
      }
      if (pendingResolve) {
        var result = "";
        if (!shutdown) {
          captureCanvas.getContext('2d').drawImage(video, 0, 0, 640, 480);
          result = captureCanvas.toDataURL('image/jpeg', 0.8)
        }
        var lp = pendingResolve;
        pendingResolve = null;
        lp(result);
      }
    }

    async function createDom() {
      if (div !== null) {
        return stream;
      }

      div = document.createElement('div');
      div.style.position = 'fixed';
      div.style.top = '0%';
      div.style.border = '2px solid black';
      div.style.padding = '3px';
      div.style.width = '100%';
      div.style.maxWidth = '600px';
      document.body.appendChild(div);

      const modelOut = document.createElement('div');
      modelOut.innerHTML = "<span>狀態:</span>";
      labelElement = document.createElement('span');
      labelElement.innerText = 'No data';
      labelElement.style.fontWeight = 'bold';
      modelOut.appendChild(labelElement);
      div.appendChild(modelOut);

      video = document.createElement('video');
      video.style.display = 'block';
      video.width = div.clientWidth - 6;
      video.setAttribute('playsinline', '');
      video.onclick = () => { shutdown = true; };
      stream = await navigator.mediaDevices.getUserMedia(
          {video: { facingMode: "environment"}});
      div.appendChild(video);

      imgElement = document.createElement('img');
      imgElement.style.position = 'absolute';
      imgElement.style.zIndex = 1;
      imgElement.onclick = () => { shutdown = true; };
      div.appendChild(imgElement);

      const instruction = document.createElement('div');
      instruction.innerHTML =
          '<span style="color: red; font-weight: bold;">' +
          '點此或是影片以結束執行</span>';
      div.appendChild(instruction);
      instruction.onclick = () => { shutdown = true; };

      video.srcObject = stream;
      await video.play();

      captureCanvas = document.createElement('canvas');
      captureCanvas.width = 640;
      captureCanvas.height = 480;
      window.requestAnimationFrame(onAnimationFrame);

      return stream;
    }
    async function stream_frame(label, imgData) {
      if (shutdown) {
        removeDom();
        shutdown = false;
        return '';
      }

      var preCreate = Date.now();
      stream = await createDom();

      var preShow = Date.now();
      if (label != "") {
        labelElement.innerHTML = label;
      }

      if (imgData != "") {
        var videoRect = video.getClientRects()[0];
        imgElement.style.top = videoRect.top + "px";
        imgElement.style.left = videoRect.left + "px";
        imgElement.style.width = videoRect.width + "px";
        imgElement.style.height = videoRect.height + "px";
        imgElement.src = imgData;
      }

      var preCapture = Date.now();
      var result = await new Promise(function(resolve, reject) {
        pendingResolve = resolve;
      });
      shutdown = false;

      return {'create': preShow - preCreate,
              'show': preCapture - preShow,
              'capture': Date.now() - preCapture,
              'img': result};
    }
    ''')

  display(js)

def video_frame(label, bbox):
  data = eval_js('stream_frame("{}", "{}")'.format(label, bbox))
  return data

In [9]:
# 使用numpy自製音檔 並且儲存要播放的音檔至wn變數
from IPython.display import Audio
f1 = 500
f2 = 1000
rate = 9000
L = 2
times = np.linspace(0,L,rate*L)
signal = np.sin(2*np.pi*f1*times) + np.sin(2*np.pi*f2*times)

wn = Audio(data=signal, rate=rate, autoplay=True)

In [10]:
from collections import deque
from datetime import datetime

# 啟動相機
video_stream()
label_html = '偵測中...'

# 初始偵測框為空白
bbox = ''
count = 0
print('\n'*30)

# 設置時間雙端佇列
time_deque = {}
time_names = ['awake','drowsy']
for id in time_names:
  time_deque[id] = deque(maxlen = 64)

# 初始化佇列
time_deque['awake'].appendleft(datetime.now())
time_deque['drowsy'].appendleft(datetime.now())

# 初始化awake和drowsy時間
delta_awake  = 0
delta_drowsy = 0

# 不要計算梯度可以減少RAM使用量並提高計算速度
with torch.no_grad():
  weights, imgsz = opt['weights'], (480,640)
  set_logging()
  device = select_device(opt['device'])
  half = device.type != 'cpu'
  model = attempt_load(weights, map_location=device)  # 讀取權重檔
  stride = int(model.stride.max())  # 設為最大的stride 可使辨識過程加速

  # 若是使用cpu執行，模型的權重參數以及輸入數據類型會轉換為半經度浮點數格式 可以減少模型的RAM占用和計算時間(但精準度可能會下降)
  if half:
    model.half()

  # 確保在模型被放置在多GPU環境中和單GPU環境中，都能正確的獲取名稱
  names = model.module.names if hasattr(model, 'module') else model.names
  # 隨機製造顏色
  colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]
  # 若不是使用cpu執行 創建了一個全零張量作為模型的輸入數據。張量的形狀是(一張 , 彩色 , rows, cloumns)
  # 並且傳送給GPU 數據類型為模型的第一個參數的數據類型
  if device.type != 'cpu':
    model(torch.zeros(1, 3, imgsz[0], imgsz[1]).to(device).type_as(next(model.parameters())))
  classes = None
  if opt['classes']:
    classes = []
    for class_name in opt['classes']:

      classes.append(names.index(class_name))

  if classes:

    classes = [i for i in range(len(names)) if i not in classes]

  while True:
    js_reply = video_frame(label_html, bbox)
    if not js_reply:
        break

    img0 = js_to_image(js_reply["img"])
    bbox_array = np.zeros([480,640,4], dtype=np.uint8)
    img = letterbox(img0, imgsz, stride=stride)[0]
    # 將圖片從 (高度, 寬度, 通道數) 的格式轉換為 PyTorch 的 (通道數, 高度, 寬度) 格式，並將顏色通道從 RGB 調整為 BGR (::-1 就是顛倒)
    img = img[:, :, ::-1].transpose(2, 0, 1)
    # 確保了一個內存連續的array，確保數據的連續性和有效性
    img = np.ascontiguousarray(img)
    img = torch.from_numpy(img).to(device)
    img = img.half() if half else img.float()
    img /= 255.0
    if img.ndimension() == 3:
      img = img.unsqueeze(0)

    # 預測
    t1 = time_synchronized()
    pred = model(img, augment= False)[0]

    # 消除重複偵測，並保留準確度較高的結果
    pred = non_max_suppression(pred, opt['conf-thres'], opt['iou-thres'], classes= classes, agnostic= False)
    t2 = time_synchronized()
    for i, det in enumerate(pred):
      s = ''
      # 將圖片的寬度和高度添加到字串s中
      s += '%gx%g ' % img.shape[2:]
      # 根據原始圖像 img0 的形狀，使用 [[1, 0, 1, 0]] 重新排列形狀中的維度 [[1, 0, 1, 0]] 的含義是將原始形狀 (height, width, channels) 中的維度重新排列為 (width, height, width, height)
      gn = torch.tensor(img0.shape)[[1, 0, 1, 0]]
      if len(det):
        #對檢測到的目標框的坐標進行縮放，以便與原始圖像的尺寸相匹配
        det[:, :4] = scale_coords(img.shape[2:], det[:, :4], img0.shape).round()

        # det[:, -1] 表示檢測到的目標框的類別索引，.unique() 函數返回不重複的類別索引
        for c in det[:, -1].unique():
          n = (det[:, -1] == c).sum()  # 計算屬於當前類別 c 的檢測到的目標框的數量
          s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "

        # 使用reversed抓取最後一個偵測框的 四個座標 準確度 類別
        for *xyxy, conf, cls in reversed(det):

          label = f'{names[int(cls)]} {conf:.2f}'
          plot_one_box(xyxy, bbox_array, label=label, color=colors[int(cls)], line_thickness=3)

    # 建立計時器及警報器
    obj_name = label[0]
    if obj_name == 'a':
      if delta_drowsy != 0:
        time_deque['awake'].clear()
        delta_drowsy =0
        time_deque['awake'].appendleft(datetime.now())

      time_deque['awake'].popleft()
      time_deque['awake'].appendleft(datetime.now())
      delta_awake = (time_deque['awake'][0]-time_deque['drowsy'][-1]).total_seconds()
    cv2.putText(bbox_array, 'awake(sec):'+str(round(delta_awake,2)), (10, 90), 5, 1, [0, 0, 255], thickness=1, lineType=cv2.LINE_AA)

    if obj_name == 'd':
      if delta_awake != 0:
        time_deque['drowsy'].clear()
        delta_awake =0
        time_deque['drowsy'].appendleft(datetime.now())

      time_deque['drowsy'].popleft()
      time_deque['drowsy'].appendleft(datetime.now())
      delta_drowsy = (time_deque['drowsy'][0]-time_deque['awake'][-1]).total_seconds()

      if delta_drowsy >=2:
          display(wn)
          cv2.line(bbox_array, (170, 400), (470, 400), [255, 0, 0], 140)
          cv2.putText(bbox_array, 'WAKE UP!!!', (190, 420), 5, 2, [225, 255, 255], thickness=2, lineType=cv2.LINE_AA)



    cv2.putText(bbox_array, 'drowsy(sec):'+str(round(delta_drowsy,2)), (10, 110), 5, 1, [0, 0, 255], thickness=1, lineType=cv2.LINE_AA)
    cv2.putText(bbox_array, 'alarm_time(sec):'+str(2), (10, 130), 5, 1, [0, 0, 255], thickness=1, lineType=cv2.LINE_AA)

    # 沿著第三維度找尋最大的數組 若不是0(代表有偵測框)就*255 使數組變為0-255之間
    bbox_array[:,:,3] = (bbox_array.max(axis = 2) > 0 ).astype(int) * 255
    bbox_bytes = bbox_to_bytes(bbox_array)

    bbox = bbox_bytes

    #隱藏撥放音檔的Javascript(否則webcam會一直往下)
    jss = Javascript('''

    var elementsToHide = document.querySelectorAll(".display_data");

    elementsToHide.forEach(function(element) {
        element.style.display = "none";
    });
    ''')

    #執行jss
    display(jss)

<IPython.core.display.Javascript object>
































Fusing layers... 
RepConv.fuse_repvgg_block
RepConv.fuse_repvgg_block
RepConv.fuse_repvgg_block
IDetect.fuse


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


NameError: name 'label' is not defined