In [1]:
!pip install ultralytics

Collecting ultralytics
  Downloading ultralytics-8.3.96-py3-none-any.whl.metadata (35 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.14-py3-none-any.whl.metadata (9.4 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.8.0->ultralytics)
  Downloading nv

In [2]:
from IPython.display import display, Javascript, Image
from google.colab.output import eval_js
from base64 import b64decode, b64encode
import cv2
import numpy as np
import PIL
import io
import html
import time

### **Hàm chuyển đổi từ JS qua CV**

In [3]:
# function to convert the JavaScript object into an OpenCV image
def js_to_image(js_reply):
  """
  Params:
          js_reply: JavaScript object containing image from webcam
  Returns:
          img: OpenCV BGR image
  """
  # decode base64 image
  image_bytes = b64decode(js_reply.split(',')[1])
  # convert bytes to numpy array
  jpg_as_np = np.frombuffer(image_bytes, dtype=np.uint8)
  # decode numpy array into OpenCV BGR image
  img = cv2.imdecode(jpg_as_np, flags=1)

  return img

# function to convert OpenCV Rectangle bounding box image into base64 byte string to be overlayed on video stream
def bbox_to_bytes(bbox_array):
  """
  Params:
          bbox_array: Numpy array (pixels) containing rectangle to overlay on video stream.
  Returns:
        bytes: Base64 image byte string
  """
  # convert array into PIL image
  bbox_PIL = PIL.Image.fromarray(bbox_array, 'RGBA')
  iobuf = io.BytesIO()
  # format bbox into png for return
  bbox_PIL.save(iobuf, format='png')
  # format return string
  bbox_bytes = 'data:image/png;base64,{}'.format((str(b64encode(iobuf.getvalue()), 'utf-8')))

  return bbox_bytes

### **JavaScript để tạo luồng video trực tiếp của bạn một cách chính xác bằng cách sử dụng webcam làm đầu vào**

In [4]:
# JavaScript to properly create our live video stream using our webcam as input
def video_stream():
  js = Javascript('''
    var video;
    var div = null;
    var stream;
    var captureCanvas;
    var imgElement;
    var labelElement;

    var pendingResolve = null;
    var shutdown = false;

    function removeDom() {
       stream.getVideoTracks()[0].stop();
       video.remove();
       div.remove();
       video = null;
       div = null;
       stream = null;
       imgElement = null;
       captureCanvas = null;
       labelElement = null;
    }

    function onAnimationFrame() {
      if (!shutdown) {
        window.requestAnimationFrame(onAnimationFrame);
      }
      if (pendingResolve) {
        var result = "";
        if (!shutdown) {
          captureCanvas.getContext('2d').drawImage(video, 0, 0, 640, 480);
          result = captureCanvas.toDataURL('image/jpeg', 0.8)
        }
        var lp = pendingResolve;
        pendingResolve = null;
        lp(result);
      }
    }

    async function createDom() {
      if (div !== null) {
        return stream;
      }

      div = document.createElement('div');
      div.style.border = '2px solid black';
      div.style.padding = '3px';
      div.style.width = '100%';
      div.style.maxWidth = '600px';
      document.body.appendChild(div);

      const modelOut = document.createElement('div');
      modelOut.innerHTML = "<span>Status:</span>";
      labelElement = document.createElement('span');
      labelElement.innerText = 'No data';
      labelElement.style.fontWeight = 'bold';
      modelOut.appendChild(labelElement);
      div.appendChild(modelOut);

      video = document.createElement('video');
      video.style.display = 'block';
      video.style.transform = "scaleX(-1)";
      video.width = div.clientWidth - 6;
      video.setAttribute('playsinline', '');
      video.onclick = () => { shutdown = true; };
      stream = await navigator.mediaDevices.getUserMedia(
          {video: { facingMode: "environment"}});
      div.appendChild(video);

      imgElement = document.createElement('img');
      imgElement.style.position = 'absolute';
      imgElement.style.zIndex = 1;
      imgElement.onclick = () => { shutdown = true; };
      div.appendChild(imgElement);

      const instruction = document.createElement('div');
      instruction.innerHTML =
          '<span style="color: red; font-weight: bold;">' +
          'When finished, click here or on the video to stop this demo</span>';
      div.appendChild(instruction);
      instruction.onclick = () => { shutdown = true; };

      video.srcObject = stream;
      await video.play();

      captureCanvas = document.createElement('canvas');
      captureCanvas.width = 640; //video.videoWidth;
      captureCanvas.height = 480; //video.videoHeight;
      window.requestAnimationFrame(onAnimationFrame);

      return stream;
    }
    async function stream_frame(label, imgData) {
      if (shutdown) {
        removeDom();
        shutdown = false;
        return '';
      }

      var preCreate = Date.now();
      await createDom();

      var preShow = Date.now();
      if (label != "") {
        labelElement.innerHTML = label;
      }

      if (imgData != "") {
        var videoRect = video.getClientRects()[0];
        imgElement.style.top = videoRect.top + "px";
        imgElement.style.left = videoRect.left + "px";
        imgElement.style.width = videoRect.width + "px";
        imgElement.style.height = videoRect.height + "px";
        imgElement.src = imgData;
      }

      var preCapture = Date.now();
      var result = await new Promise(function(resolve, reject) {
        pendingResolve = resolve;
      });
      shutdown = false;

      return {'create': preShow - preCreate,
              'show': preCapture - preShow,
              'capture': Date.now() - preCapture,
              'img': result};
    }
    ''')

  display(js)

def video_frame(label, bbox):
  data = eval_js('stream_frame("{}", "{}")'.format(label, bbox))
  return data

###**YOLOv11_model**

In [5]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [6]:
from ultralytics import YOLO
import torch

device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
model = YOLO('/content/drive/MyDrive/Colab Notebooks/best1.pt').to(device)

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


###**Alert_System**

In [7]:
import requests
import time
import cv2
# Lấy bot_token trong colab
from google.colab import userdata
bot_token = userdata.get('bot_token')

TELEGRAM_BOT_TOKEN = bot_token
CHAT_ID = "7139479935"  # https://api.telegram.org/bot[TOKEN]/getUpdates

# Biến kiểm soát thời gian gửi tin nhắn
last_sent_time = 0
alert_interval = 2

def send_telegram(frame):
    global last_sent_time
    current_time = time.time()

    # Kiểm tra nếu đã đủ giây từ lần gửi trước
    if current_time - last_sent_time >= alert_interval:
        url = f"https://api.telegram.org/bot{TELEGRAM_BOT_TOKEN}/sendPhoto"  # https://api.telegram.org/bot[TOKEN]/sendPhoto

        # Chuyển frame (dạng numpy array) thành buffer (dạng jpg) (không cần lưu file)
        _, img_encoded = cv2.imencode(".jpg", frame)
        img_bytes = img_encoded.tobytes()

        # Gửi ảnh qua Telegram https://api.telegram.org/bot[TOKEN]/sendMessage?chat_id=[CHAT_ID]&text=[MY_MESSAGE_TEXT]
        requests.post(url, data={"chat_id": CHAT_ID, 'caption': "🚨 Cảnh báo! Phát hiện vũ khí"}, files={"photo": ("alert.jpg", img_bytes, "image/jpeg")})

        last_sent_time = current_time  # Cập nhật thời gian gửi cuối cùng
        print("[TELEGRAM SENT] Đã gửi cảnh báo")

###**Main**

In [10]:
# start streaming video from webcam
video_stream()
# label for video
label_html = 'Capturing...'
# initialze bounding box to empty
bbox = ''
count = 0
while True:
    js_reply = video_frame(label_html, bbox)
    if not js_reply:
        break

    # convert JS response to OpenCV Image
    frame = js_to_image(js_reply["img"])
    frame = cv2.flip(frame,1)
    # Resize để đưa vào model
    frame = cv2.resize(frame, dsize=(600,400))

    results = model(frame)

    detect = False
    for result in results:
      for box in result.boxes:
        cls = int(box.cls[0].item())
        class_name = model.names[cls]
        confidence = box.conf[0].item()

        if class_name in ['knife','pistol','Knife','Pistol','gun','Gun']:
          detect = True
          x1, y1, x2, y2 = map(int, box.xyxy[0])  # Lấy tọa độ bounding box
          cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 255),2)  # Vẽ khung đỏ

          # Khung chứa tên lớp và độ chính xác
          label = f"{confidence * 100:.1f}%"
          cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)

    if detect:
          if confidence> 0.77:
              send_telegram(frame)








<IPython.core.display.Javascript object>


0: 448x640 1 knife, 68.1ms
Speed: 3.8ms preprocess, 68.1ms inference, 1.8ms postprocess per image at shape (1, 3, 448, 640)

0: 448x640 (no detections), 70.8ms
Speed: 4.2ms preprocess, 70.8ms inference, 1.1ms postprocess per image at shape (1, 3, 448, 640)

0: 448x640 (no detections), 68.0ms
Speed: 3.2ms preprocess, 68.0ms inference, 1.0ms postprocess per image at shape (1, 3, 448, 640)

0: 448x640 (no detections), 68.0ms
Speed: 2.8ms preprocess, 68.0ms inference, 1.3ms postprocess per image at shape (1, 3, 448, 640)

0: 448x640 (no detections), 67.9ms
Speed: 2.6ms preprocess, 67.9ms inference, 0.8ms postprocess per image at shape (1, 3, 448, 640)

0: 448x640 (no detections), 67.9ms
Speed: 3.0ms preprocess, 67.9ms inference, 0.9ms postprocess per image at shape (1, 3, 448, 640)

0: 448x640 (no detections), 68.0ms
Speed: 3.0ms preprocess, 68.0ms inference, 0.6ms postprocess per image at shape (1, 3, 448, 640)
