In [1]:
# 필요한 라이브러리 설치
!pip install -q imageio
!pip install -q opencv-python
!pip install -q git+https://github.com/tensorflow/docs

  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for tensorflow-docs (setup.py) ... [?25l[?25hdone


In [2]:
# TensorFlow 및 TF-Hub 모듈 불러오기
from absl import logging
import tensorflow as tf
import tensorflow_hub as hub
from tensorflow_docs.vis import embed
logging.set_verbosity(logging.ERROR)

# UCF101 데이터셋을 처리하기 위한 몇 가지 도우미 모듈
import random
import re
import os
import tempfile
import ssl
import cv2
import numpy as np

# 애니메이션을 표시하기 위한 이미지 입출력 관련 모듈
import imageio
from IPython import display

from urllib import request  # Python 3가 필요함

In [3]:
# UCF101 데이터셋에서 비디오 가져오기를 위한 유틸리티 함수들
UCF_ROOT = "https://www.crcv.ucf.edu/THUMOS14/UCF101/UCF101/"
_VIDEO_LIST = None
_CACHE_DIR = tempfile.mkdtemp()
unverified_context = ssl._create_unverified_context()

def list_ucf_videos():
  """UCF101 데이터셋에 있는 비디오 목록 가져오기."""
  global _VIDEO_LIST
  if not _VIDEO_LIST:
    index = request.urlopen(UCF_ROOT, context=unverified_context).read().decode("utf-8")
    videos = re.findall("(v_[\w_]+\.avi)", index)
    _VIDEO_LIST = sorted(set(videos))
  return list(_VIDEO_LIST)

def fetch_ufc_video(video):
  """비디오를 가져와 로컬 파일 시스템에 캐시."""
  cache_path = os.path.join(_CACHE_DIR, video)
  if not os.path.exists(cache_path):
    urlpath = request.urljoin(UCF_ROOT, video)
    print("Fetching %s => %s" % (urlpath, cache_path))
    data = request.urlopen(urlpath, context=unverified_context).read()
    open(cache_path, "wb").write(data)
  return cache_path

# CV2를 사용하여 비디오 파일 열기 위한 유틸리티 함수들
def crop_center_square(frame):
  y, x = frame.shape[0:2]
  min_dim = min(y, x)
  start_x = (x // 2) - (min_dim // 2)
  start_y = (y // 2) - (min_dim // 2)
  return frame[start_y:start_y+min_dim,start_x:start_x+min_dim]

def load_video(path, max_frames=0, resize=(224, 224)):
  cap = cv2.VideoCapture(path)
  frames = []
  try:
    while True:
      ret, frame = cap.read()
      if not ret:
        break
      frame = crop_center_square(frame)
      frame = cv2.resize(frame, resize)
      frame = frame[:, :, [2, 1, 0]]
      frames.append(frame)

      if len(frames) == max_frames:
        break
  finally:
    cap.release()
  return np.array(frames) / 255.0

def to_gif(images):
  converted_images = np.clip(images * 255, 0, 255).astype(np.uint8)
  imageio.mimsave('./animation.gif', converted_images, fps=25)
  return embed.embed_file('./animation.gif')

In [9]:
# Kinetics-400 동작 레이블을 GitHub 리포지토리에서 가져오기
KINETICS_URL = "https://raw.githubusercontent.com/deepmind/kinetics-i3d/master/data/label_map.txt"
with request.urlopen(KINETICS_URL) as obj:
  labels = [line.decode("utf-8").strip() for line in obj.readlines()]
print("Found %d labels." % len(labels))

Found 400 labels.


In [4]:
# UCF101 데이터셋에서 비디오 목록 가져오기
ucf_videos = list_ucf_videos()

# 카테고리별로 비디오 그룹화
categories = {}
for video in ucf_videos:
  category = video[2:-12]
  if category not in categories:
    categories[category] = []
  categories[category].append(video)
print("Found %d videos in %d categories." % (len(ucf_videos), len(categories)))

# 각 카테고리에서 일부 비디오 출력
for category, sequences in categories.items():
  summary = ", ".join(sequences[:2])
  print("%-20s %4d videos (%s, ...)" % (category, len(sequences), summary))

Found 13320 videos in 101 categories.
ApplyEyeMakeup        145 videos (v_ApplyEyeMakeup_g01_c01.avi, v_ApplyEyeMakeup_g01_c02.avi, ...)
ApplyLipstick         114 videos (v_ApplyLipstick_g01_c01.avi, v_ApplyLipstick_g01_c02.avi, ...)
Archery               145 videos (v_Archery_g01_c01.avi, v_Archery_g01_c02.avi, ...)
BabyCrawling          132 videos (v_BabyCrawling_g01_c01.avi, v_BabyCrawling_g01_c02.avi, ...)
BalanceBeam           108 videos (v_BalanceBeam_g01_c01.avi, v_BalanceBeam_g01_c02.avi, ...)
BandMarching          155 videos (v_BandMarching_g01_c01.avi, v_BandMarching_g01_c02.avi, ...)
BaseballPitch         150 videos (v_BaseballPitch_g01_c01.avi, v_BaseballPitch_g01_c02.avi, ...)
BasketballDunk        131 videos (v_BasketballDunk_g01_c01.avi, v_BasketballDunk_g01_c02.avi, ...)
Basketball            134 videos (v_Basketball_g01_c01.avi, v_Basketball_g01_c02.avi, ...)
BenchPress            160 videos (v_BenchPress_g01_c01.avi, v_BenchPress_g01_c02.avi, ...)
Biking              

In [5]:
# 샘플 "타이핑" 비디오 가져오기
video_path = fetch_ufc_video("v_Typing_g01_c01.avi")
sample_video = load_video(video_path)
sample_video.shape

Fetching https://www.crcv.ucf.edu/THUMOS14/UCF101/UCF101/v_Typing_g01_c01.avi => /tmp/tmpit6euy4_/v_Typing_g01_c01.avi


(127, 224, 224, 3)

In [6]:
# I3D 모델 로드
i3d = hub.load("https://www.kaggle.com/models/deepmind/i3d-kinetics/frameworks/TensorFlow1/variations/400/versions/1").signatures['default']

In [7]:
# 샘플 비디오에 대한 동작 예측
def predict(sample_video):
  model_input = tf.constant(sample_video, dtype=tf.float32)[tf.newaxis, ...]
  logits = i3d(model_input)['default'][0]
  probabilities = tf.nn.softmax(logits)

  print("Top 5 actions:")
  for i in np.argsort(probabilities)[::-1][:5]:
    print(f"  {labels[i]:22}: {probabilities[i] * 100:5.2f}%")

In [10]:
# 샘플 "타이핑" 비디오에 대한 동작 예측
predict(sample_video)

Top 5 actions:
  using computer        : 100.00%
  drumming fingers      :  0.00%
  texting               :  0.00%
  using remote controller (not gaming):  0.00%
  jogging               :  0.00%


In [11]:
# 샘플 "피아노 연주" 비디오 가져오기
video_path2 = fetch_ufc_video("v_PlayingPiano_g01_c01.avi")
sample_video2 = load_video(video_path2)
predict(sample_video2)

Fetching https://www.crcv.ucf.edu/THUMOS14/UCF101/UCF101/v_PlayingPiano_g01_c01.avi => /tmp/tmpit6euy4_/v_PlayingPiano_g01_c01.avi
Top 5 actions:
  playing piano         : 94.79%
  playing organ         :  1.88%
  checking tires        :  1.19%
  driving car           :  0.30%
  playing trumpet       :  0.29%


In [12]:
!pip install yt-dlp

Collecting yt-dlp
  Downloading yt_dlp-2023.12.30-py2.py3-none-any.whl (3.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.0/3.0 MB[0m [31m9.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting mutagen (from yt-dlp)
  Downloading mutagen-1.47.0-py3-none-any.whl (194 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.4/194.4 kB[0m [31m20.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pycryptodomex (from yt-dlp)
  Downloading pycryptodomex-3.20.0-cp35-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m17.5 MB/s[0m eta [36m0:00:00[0m
Collecting websockets>=12.0 (from yt-dlp)
  Downloading websockets-12.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (130 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m130.2/130.2 kB[0m [31m15.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting b

In [13]:
# YouTube에서 피아노 연주 동영상 가져오기
!yt-dlp -f 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/mp4' -o test_video_piano.mp4 https://www.youtube.com/watch?v=wnD81KcTL0o

[youtube] Extracting URL: https://www.youtube.com/watch?v=wnD81KcTL0o
[youtube] wnD81KcTL0o: Downloading webpage
[youtube] wnD81KcTL0o: Downloading ios player API JSON
[youtube] wnD81KcTL0o: Downloading android player API JSON
[youtube] wnD81KcTL0o: Downloading m3u8 information
[info] wnD81KcTL0o: Downloading 1 format(s): 609+140
[hlsnative] Downloading m3u8 manifest
[hlsnative] Total fragments: 19
[download] Destination: test_video_piano.f609.mp4
[K[download] 100% of    4.69MiB in [1;37m00:00:02[0m at [0;32m1.57MiB/s[0m
[download] Destination: test_video_piano.f140.m4a
[K[download] 100% of    1.52MiB in [1;37m00:00:00[0m at [0;32m6.66MiB/s[0m
[Merger] Merging formats into "test_video_piano.mp4"
Deleting original file test_video_piano.f140.m4a (pass -k to keep)
Deleting original file test_video_piano.f609.mp4 (pass -k to keep)


In [14]:
video_path3 = "test_video_piano.mp4"
sample_video3 = load_video(video_path3)[:100]
sample_video3.shape
predict(sample_video3)

Top 5 actions:
  cleaning windows      :  9.20%
  playing bagpipes      :  8.79%
  playing recorder      :  6.03%
  playing trumpet       :  4.54%
  playing piano         :  3.91%


In [15]:
# YouTube에서 "타이핑" 동영상 다운로드
!yt-dlp -f 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/mp4' -o test_video_typing.mp4 https://www.youtube.com/watch?v=LBSwweZj4ok

[youtube] Extracting URL: https://www.youtube.com/watch?v=LBSwweZj4ok
[youtube] LBSwweZj4ok: Downloading webpage
[youtube] LBSwweZj4ok: Downloading ios player API JSON
[youtube] LBSwweZj4ok: Downloading android player API JSON
[youtube] LBSwweZj4ok: Downloading m3u8 information
[info] LBSwweZj4ok: Downloading 1 format(s): 614+140
[hlsnative] Downloading m3u8 manifest
[hlsnative] Total fragments: 4
[download] Destination: test_video_typing.f614.mp4
[K[download] 100% of    1.92MiB in [1;37m00:00:00[0m at [0;32m2.02MiB/s[0m
[download] Destination: test_video_typing.f140.m4a
[K[download] 100% of  278.48KiB in [1;37m00:00:00[0m at [0;32m2.68MiB/s[0m
[Merger] Merging formats into "test_video_typing.mp4"
Deleting original file test_video_typing.f140.m4a (pass -k to keep)
Deleting original file test_video_typing.f614.mp4 (pass -k to keep)


In [16]:
video_path4 = "test_video_typing.mp4"
sample_video4 = load_video(video_path4)[:100]
sample_video4.shape
predict(sample_video4)

Top 5 actions:
  using computer        : 97.99%
  applying cream        :  0.97%
  tapping pen           :  0.54%
  drumming fingers      :  0.14%
  breading or breadcrumbing:  0.09%
