<a href="https://colab.research.google.com/github/iMan0405/Portfolio_DT_vs_AI/blob/main/Load_video_data_for_study.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Kutubxonlarni yuklab olish va o'rnatish

In [1]:
!pip install -U "tensorflow>=2.10.0"

Collecting tensorflow>=2.10.0
  Downloading tensorflow-2.15.0.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (475.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m475.2/475.2 MB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tensorflow
  Attempting uninstall: tensorflow
    Found existing installation: tensorflow 2.15.0
    Uninstalling tensorflow-2.15.0:
      Successfully uninstalled tensorflow-2.15.0
Successfully installed tensorflow-2.15.0.post1


In [2]:
!pip install remotezip tqdm opencv-python
!pip install -q git+https://github.com/tensorflow/docs

Collecting remotezip
  Downloading remotezip-0.12.2.tar.gz (7.6 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: remotezip
  Building wheel for remotezip (setup.py) ... [?25l[?25hdone
  Created wheel for remotezip: filename=remotezip-0.12.2-py3-none-any.whl size=7890 sha256=bcdff0d070a79ed9d275711275e5b0bd0a12bf2a452fcbda009284eab32e0da7
  Stored in directory: /root/.cache/pip/wheels/f2/71/f9/c650cc34039d772da8b040ed447b6b5f5de9abc38d88073fa1
Successfully built remotezip
Installing collected packages: remotezip
Successfully installed remotezip-0.12.2
  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for tensorflow-docs (setup.py) ... [?25l[?25hdone


In [3]:
import tqdm
import random
import pathlib
import itertools
import collections

import os
import cv2
import numpy as np
import remotezip as rz

import tensorflow as tf

import imageio
from IPython import display
from urllib import request
from tensorflow_docs.vis import embed

In [4]:
URL = "https://storage.googleapis.com/thumos14_files/UCF101_videos.zip"

In [5]:
def list_files_from_zip_url(zip_url):
    files = []
    with rz.RemoteZip(zip_url) as zip:
        for zip_info in zip.infolist():
            files.append(zip_info.filename)
    return files

In [6]:
files = list_files_from_zip_url(URL)
files = [f for f in files if f.endswith('.avi')]
files[:10]

['UCF101/v_ApplyEyeMakeup_g01_c01.avi',
 'UCF101/v_ApplyEyeMakeup_g01_c02.avi',
 'UCF101/v_ApplyEyeMakeup_g01_c03.avi',
 'UCF101/v_ApplyEyeMakeup_g01_c04.avi',
 'UCF101/v_ApplyEyeMakeup_g01_c05.avi',
 'UCF101/v_ApplyEyeMakeup_g01_c06.avi',
 'UCF101/v_ApplyEyeMakeup_g02_c01.avi',
 'UCF101/v_ApplyEyeMakeup_g02_c02.avi',
 'UCF101/v_ApplyEyeMakeup_g02_c03.avi',
 'UCF101/v_ApplyEyeMakeup_g02_c04.avi']

In [7]:
def get_class(fname):
    return fname.split('_')[-3]

In [8]:
def get_files_per_class(files):
    files_for_class = collections.defaultdict(list)
    for fname in files:
        class_name = get_class(fname)
        files_for_class[class_name].append(fname)
    return files_for_class

In [9]:
NUM_CLASSES = 10
FILES_PER_CLASS = 50

In [10]:
files_for_class = get_files_per_class(files)
classes = list(files_for_class.keys())

In [11]:
print('Num classes:', len(classes))
print('Num videos for class[0]:', len(files_for_class[classes[0]]))

Num classes: 101
Num videos for class[0]: 145


In [12]:
def select_subset_of_classes(files_for_class, classes, files_per_class):
    files_subset = dict()
    for class_name in classes:
        class_files = files_for_class[class_name]
        files_subset[class_name] = class_files[:files_per_class]
    return files_subset

In [13]:
files_subset = select_subset_of_classes(files_for_class, classes[:NUM_CLASSES], FILES_PER_CLASS)
list(files_subset.keys())

['ApplyEyeMakeup',
 'ApplyLipstick',
 'Archery',
 'BabyCrawling',
 'BalanceBeam',
 'BandMarching',
 'BaseballPitch',
 'BasketballDunk',
 'Basketball',
 'BenchPress']

In [14]:
def download_from_zip(zip_url, to_dir, file_names):
    with rz.RemoteZip(zip_url) as zip:
        for fn in tqdm.tqdm(file_names):
            class_name = get_class(fn)
            zip.extract(fn, str(to_dir / class_name))
            unzipped_file = to_dir / class_name / fn

            fn = pathlib.Path(fn).parts[-1]
            output_file = to_dir / class_name / fn
            unzipped_file.rename(output_file)

In [15]:
def split_class_lists(files_for_class, count):
    split_files = []
    remainder = {}
    for cls in files_for_class:
        split_files.extend(files_for_class[cls][:count])
        remainder[cls] = files_for_class[cls][count:]
    return split_files, remainder

In [16]:
def download_ucf_101_subset(zip_url, num_classes, splits, download_dir):
    files = list_files_from_zip_url(zip_url)
    for f in files:
        path = os.path.normpath(f)
        tokens = path.split(os.sep)
        if len(tokens) <= 2:
            files.remove(f)
    files_for_class = get_files_per_class(files)
    classes = list(files_for_class.keys())[:num_classes]

    for cls in classes:
        random.shuffle(files_for_class[cls])
    files_for_class = {x: files_for_class[x] for x in classes}

    dirs = {}
    for split_name, split_count in splits.items():
        print(split_name, ":")
        split_dir = download_dir / split_name
        split_files, files_for_class = split_class_lists(files_for_class, split_count)
        download_from_zip(zip_url, split_dir, split_files)
        dirs[split_name] = split_dir
    return dirs

In [17]:
download_dir = pathlib.Path('./UCF101_subset/')
subset_paths = download_ucf_101_subset(URL,
                                       num_classes = NUM_CLASSES,
                                       splits = {"train":30, "val":10, "test":10},
                                       download_dir = download_dir)

train :


100%|██████████| 300/300 [00:56<00:00,  5.32it/s]


val :


100%|██████████| 100/100 [00:19<00:00,  5.08it/s]


test :


100%|██████████| 100/100 [00:18<00:00,  5.35it/s]


In [18]:
video_count_train = len(list(download_dir.glob('train/*/*.avi')))
video_count_val = len(list(download_dir.glob('val/*/*.avi')))
video_count_test = len(list(download_dir.glob('test/*/*.avi')))
video_total = video_count_train + video_count_val + video_count_test
print(f"Total videos: {video_total}")

Total videos: 500


In [19]:
!find ./UCF101_subset

./UCF101_subset
./UCF101_subset/train
./UCF101_subset/train/ApplyEyeMakeup
./UCF101_subset/train/ApplyEyeMakeup/UCF101
./UCF101_subset/train/ApplyEyeMakeup/v_ApplyEyeMakeup_g18_c03.avi
./UCF101_subset/train/ApplyEyeMakeup/v_ApplyEyeMakeup_g18_c01.avi
./UCF101_subset/train/ApplyEyeMakeup/v_ApplyEyeMakeup_g19_c04.avi
./UCF101_subset/train/ApplyEyeMakeup/v_ApplyEyeMakeup_g13_c05.avi
./UCF101_subset/train/ApplyEyeMakeup/v_ApplyEyeMakeup_g13_c01.avi
./UCF101_subset/train/ApplyEyeMakeup/v_ApplyEyeMakeup_g02_c01.avi
./UCF101_subset/train/ApplyEyeMakeup/v_ApplyEyeMakeup_g14_c03.avi
./UCF101_subset/train/ApplyEyeMakeup/v_ApplyEyeMakeup_g06_c01.avi
./UCF101_subset/train/ApplyEyeMakeup/v_ApplyEyeMakeup_g14_c01.avi
./UCF101_subset/train/ApplyEyeMakeup/v_ApplyEyeMakeup_g04_c01.avi
./UCF101_subset/train/ApplyEyeMakeup/v_ApplyEyeMakeup_g01_c05.avi
./UCF101_subset/train/ApplyEyeMakeup/v_ApplyEyeMakeup_g24_c02.avi
./UCF101_subset/train/ApplyEyeMakeup/v_ApplyEyeMakeup_g23_c04.avi
./UCF101_subset/train/A

In [20]:
def format_frames(frame, output_size):
    frame = tf.image.convert_image_dtype(frame, tf.float32)
    frame = tf.image.resize_with_pad(frame, *output_size)
    return frame

In [21]:
def frames_from_video_file(video_path, n_frames, output_size = (224, 224), frame_step = 15):
    result = []
    src = cv2.VideoCapture(str(video_path))
    video_length = src.get(cv2.CAP_PROP_FRAME_COUNT)
    need_length = 1+ (n_frames - 1) * frame_step
    if need_length > video_length:
        start = 0
    else:
        max_start = video_length - need_length
        start = random.randint(0, max_start + 1)

    src.set(cv2.CAP_PROP_POS_FRAMES, start)
    ret, frame = src.read()
    result.append(format_frames(frame, output_size))

    for _ in range(n_frames - 1):
        for _ in range(frame_step):
            ret, frame = src.read()
        if ret:
            frame = format_frames(frame, output_size)
            result.append(frame)
        else:
            result.append(np.zeros_like(result[0]))
    src.release()
    result = np.array(result)[..., [2, 1, 0]]

    return result

### Video ma'lumotlarni vizualizatsiya qilish

In [22]:
!curl -O https://upload.wikimedia.org/wikipedia/commons/8/86/End_of_a_jam.ogv

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 55.0M  100 55.0M    0     0  24.1M      0  0:00:02  0:00:02 --:--:-- 24.1M
