In [None]:
!pip install ultralytics deep_sort_realtime facenet-pytorch

In [None]:
import cv2
import numpy as np
import time
from datetime import datetime
import os
from PIL import Image
import torch
import torchvision.transforms as transforms
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from facenet_pytorch import MTCNN, InceptionResnetV1

# Google Colab用のimport文
from google.colab.patches import cv2_imshow
from google.colab import files

# 必要なライブラリのインポートを試みる
try:
    from ultralytics import YOLO
    from deep_sort_realtime.deepsort_tracker import DeepSort
except ImportError:
    print("Error: Required libraries are not installed.")
    print("Please install them using: !pip install ultralytics deep_sort_realtime facenet-pytorch")
    exit(1)

def enhance_image(image, brightness=0, contrast=0):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    image_tensor = torch.from_numpy(image).float().to(device)
    image_tensor = image_tensor * (1 + contrast/100) + brightness
    image_tensor = torch.clamp(image_tensor, 0, 255)
    enhanced = image_tensor.byte().cpu().numpy()
    return enhanced

def process_video(video_path, yolo_model, tracker, mtcnn, resnet, output_dir, known_face_embeddings, start_unique_count):
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Error: Unable to open video file: {video_path}")
        return start_unique_count, known_face_embeddings, 0, 0, 0, 0

    total_unique_person_count = start_unique_count
    frame_count = 0
    no_face_detected_count = 0
    no_person_detected_count = 0
    passersby_count = 0

    yolo_transform = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize((640, 640)),
        transforms.ToTensor()
    ])

    device = next(yolo_model.parameters()).device
    fps = cap.get(cv2.CAP_PROP_FPS)

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        if frame_count % 60 == 0:
            current_time_sec = frame_count / fps
            print(f"Processing second: {current_time_sec:.2f}")

            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frame_rgb = enhance_image(frame_rgb, brightness=10, contrast=10)
            frame_resized = yolo_transform(frame_rgb).unsqueeze(0).to(device)

            results = yolo_model(frame_resized)

            detections = []
            for r in results:
                boxes = r.boxes
                for box in boxes:
                    x1, y1, x2, y2 = box.xyxy[0]
                    conf = box.conf[0]
                    cls = int(box.cls[0])
                    if cls == 0:
                        orig_h, orig_w = frame.shape[:2]
                        x1 = int(x1.item() * orig_w / 640)
                        y1 = int(y1.item() * orig_h / 640)
                        x2 = int(x2.item() * orig_w / 640)
                        y2 = int(y2.item() * orig_h / 640)
                        detections.append(([x1, y1, x2 - x1, y2 - y1], conf, 'person'))

            if not detections:
                no_person_detected_count += 1
                continue

            tracks = tracker.update_tracks(detections, frame=frame_rgb)

            for track in tracks:
                if not track.is_confirmed():
                    continue

                ltrb = track.to_ltrb()
                x1, y1, x2, y2 = map(int, ltrb)
                person_image = frame_rgb[y1:y2, x1:x2]

                try:
                    faces = mtcnn(Image.fromarray(person_image))
                    if faces is not None and faces.shape[0] > 0:
                        faces = faces.to(device)
                        face_embeddings = resnet(faces).detach().cpu()

                        for face_embedding in face_embeddings:
                            if len(known_face_embeddings) == 0 or not any(torch.nn.functional.cosine_similarity(face_embedding, known_emb, dim=0) > 0.7 for known_emb in known_face_embeddings):
                                total_unique_person_count += 1
                                known_face_embeddings.append(face_embedding)

                                face_image = Image.fromarray(person_image)
                                face_image.save(os.path.join(output_dir, f"unique_person_{total_unique_person_count}.jpg"))
                    else:
                        no_face_detected_count += 1
                except Exception as e:
                    print(f"Error in face detection/recognition: {e}")
                    no_face_detected_count += 1

            passersby_count += len(tracks)
            frame_count += 1
            print(f"Processed {frame_count} frames. Current unique people count: {total_unique_person_count}")
        else:
            frame_count += 1

    cap.release()
    return total_unique_person_count, known_face_embeddings, frame_count, no_face_detected_count, no_person_detected_count, passersby_count

def main(video_paths):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using device: {device}")

    try:
        yolo_model = YOLO("yolov8n.pt")
        yolo_model.to(device)
    except Exception as e:
        print(f"Error loading YOLO model: {e}")
        return

    tracker = DeepSort(max_age=30)
    mtcnn = MTCNN(keep_all=True, device=device)
    resnet = InceptionResnetV1(pretrained='vggface2').eval().to(device)

    output_dir = "/content/unique_face_images"
    os.makedirs(output_dir, exist_ok=True)

    total_unique_person_count = 0
    known_face_embeddings = []
    start_time = time.time()
    total_frame_count = 0
    total_no_face_detected_count = 0
    total_no_person_detected_count = 0
    total_passersby_count = 0

    for i, video_path in enumerate(video_paths):
        print(f"Processing video {i+1}/{len(video_paths)}: {video_path}")
        video_unique_count, known_face_embeddings, frame_count, no_face_count, no_person_count, passersby_count = process_video(
            video_path, yolo_model, tracker, mtcnn, resnet, output_dir, known_face_embeddings, total_unique_person_count
        )
        total_unique_person_count = video_unique_count
        total_frame_count += frame_count
        total_no_face_detected_count += no_face_count
        total_no_person_detected_count += no_person_count
        total_passersby_count += passersby_count

        video_output_filename = f"video_{i+1}_results_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt"
        with open(video_output_filename, "w", encoding="utf-8") as f:
            f.write(f"Video path: {video_path}\n")
            f.write(f"Unique persons: {video_unique_count}\n")
            f.write(f"Processed frames: {frame_count}\n")
            f.write(f"Frames with no face detected: {no_face_count}\n")
            f.write(f"Frames with no person detected: {no_person_count}\n")
            f.write(f"Passersby count: {passersby_count}\n")

        print(f"Results for video {i+1} saved to {video_output_filename}")

    end_time = time.time()
    execution_time = end_time - start_time

    output_filename = f"combined_results_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt"
    with open(output_filename, "w", encoding="utf-8") as f:
        f.write(f"Total unique persons across all videos: {total_unique_person_count}\n")
        f.write(f"Total execution time: {execution_time:.2f} seconds\n")
        f.write(f"Total processed frames: {total_frame_count}\n")
        f.write(f"Total frames with no face detected: {total_no_face_detected_count}\n")
        f.write(f"Total frames with no person detected: {total_no_person_detected_count}\n")
        f.write(f"Total passersby count: {total_passersby_count}\n")
        f.write(f"Date and time: {datetime.now().strftime('%Y%m%d_%H%M%S')}\n")
        f.write(f"Face images saved at: {os.path.abspath(output_dir)}\n")

    print(f"Combined results saved to {output_filename}")
    print(f"Face images saved at {os.path.abspath(output_dir)}")

    files.download(output_filename)

if __name__ == "__main__":
    video_paths = [
        "/content/drive/MyDrive/MIPPE記録/MAH00065.MP4",
        "/content/drive/MyDrive/MIPPE記録/MAH00066.MP4"
    ]
    main(video_paths)
