In [1]:
import os
import cv2
import pandas as pd
from tqdm import tqdm
from glob import glob
import matplotlib.pyplot as plt
import numpy as np
import warnings
warnings.filterwarnings('ignore')
from PIL import Image,  ImageEnhance
import mediapipe as mp

In [2]:
DATA_DIR = 'slovo'
TRAIN_DIR = os.path.join(DATA_DIR, 'train_100')
TEST_DIR = os.path.join(DATA_DIR, 'test_100')
ann_100 = pd.read_csv(os.path.join(DATA_DIR, 'annotations_100.csv'))
ann_100

Unnamed: 0,attachment_id,text,user_id,height,width,length,train,frame_count,height_width
0,5ce6ce8a-d191-47d4-97b4-3fc6f9138a73,я,185bd3a81d9d618518d10abebf0d17a8,1920,1080,91.0,True,51,1920_1080
1,c9e2dbd1-ec2f-4382-a80e-5bca8396a9b1,я,46dd04a1caa75ed3082b573cb5a3ad26,1920,1080,58.0,True,58,1920_1080
2,3f72244c-7379-4403-b7ee-e1b1b8d78d46,я,db573f94204e56e0cf3fc2ea000e5bdc,1280,720,49.0,True,49,1280_720
3,5adf08e2-4ff6-4183-ba22-446c1cd0b0f8,я,0211b488644476dd0fec656ccb9b74fc,1920,1080,50.0,True,50,1920_1080
4,13a148fc-a546-47ab-b5d0-f9d0c3cce1de,я,2d84da20c251acaeb3186642fcb04f2e,1920,1080,27.0,True,27,1920_1080
...,...,...,...,...,...,...,...,...,...
1995,74db1174-1314-43c7-bb8a-cc5cce5e2510,редко,f2dc6eab563f93d86629c1cfe479f09d,1440,1440,56.0,False,56,1440_1440
1996,5d742908-98ce-4172-bb2f-02f3595860a4,редко,798aa826b6129d33f72f62d4ba60b681,720,1280,61.0,False,61,720_1280
1997,dfe2ff4c-98fa-4ead-b861-f499f570a321,месяц,798aa826b6129d33f72f62d4ba60b681,720,1280,58.0,False,58,720_1280
1998,04856a31-8326-49cb-8e0c-0843c64a1d92,время,798aa826b6129d33f72f62d4ba60b681,720,1280,52.0,False,52,720_1280


In [4]:
train_data = pd.read_csv(os.path.join(DATA_DIR, 'train_data_2.csv'))
val_data = pd.read_csv(os.path.join(DATA_DIR, 'val_data_2.csv'))
train_data = result = pd.concat([train_data, val_data])
test_data = pd.read_csv(os.path.join(DATA_DIR, 'test_data_2.csv'))

video_id_train, label_train = train_data['video_file'].values, train_data['label'].values
video_id_test, label_test = test_data['video_file'].values, test_data['label'].values

In [5]:
video_path_train = [f'{TRAIN_DIR}\\{f}.mp4' for f in video_id_train]
video_path_test = [f'{TEST_DIR}\\{f}.mp4' for f in video_id_test]

In [8]:
# Инициализация утилит MediaPipe для отрисовки и модели Holistic (объединение ключевых точек лица, рук и позы)
mp_drawing = mp.solutions.drawing_utils
mp_holistic = mp.solutions.holistic

# Выходная директория
output_directory = 'slovo/'

# Создание выходной директории, если она не существует
os.makedirs(output_directory, exist_ok=True)

# Инициализация модели MediaPipe Holistic
holistic = mp_holistic.Holistic()

def adjust_landmarks(arr, center):
    # Reshape the array to have shape (n, 3)
    arr_reshaped = arr.reshape(-1, 3)

    # Repeat the center array to have shape (n, 3)
    center_repeated = np.tile(center, (len(arr_reshaped), 1))

    # Subtract the center array from the arr array
    arr_adjusted = arr_reshaped - center_repeated

    # Reshape arr_adjusted back to shape (n*3,)
    arr_adjusted = arr_adjusted.reshape(-1)
    return arr_adjusted

def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*3)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    nose = pose[:3]
    lh_wrist = lh[:3]
    rh_wrist = rh[:3]
    pose_adjusted = adjust_landmarks(pose, nose)
    lh_adjusted = adjust_landmarks(lh, lh_wrist)
    rh_adjusted = adjust_landmarks(rh, rh_wrist)
    return pose_adjusted, lh_adjusted, rh_adjusted

def process_video(video_path, output_path, num_frames=20):
    # Инициализация видео ридера
    video_reader = cv2.VideoCapture(video_path)
    total_frames = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT))

    # Выбор кадров для обработки (равномерно распределенные)
    frame_indices = np.linspace(0, total_frames - 1, min(num_frames, total_frames), dtype=int)

    frames = []
    frame_index = 0

    while video_reader.isOpened():
        success, frame = video_reader.read()
        if not success or frame_index > frame_indices[-1]:
            break
        
        if frame_index in frame_indices:
            # Преобразование кадра в RGB для MediaPipe
            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        
            # Определение размеров кадра
            h, w, _ = frame_rgb.shape
            if h > w:
                padding = (h - w) // 2
                frame_rgb = cv2.copyMakeBorder(frame_rgb, 0, 0, padding, padding, cv2.BORDER_CONSTANT, value=[0, 0, 0])
            else:
                padding = (w - h) // 2
                frame_rgb = cv2.copyMakeBorder(frame_rgb, padding, padding, 0, 0, cv2.BORDER_CONSTANT, value=[0, 0, 0])

            # Выполнение оценки holistic (лицо, руки, поза)
            results = holistic.process(frame_rgb)

            # Извлечение ключевых точек
            pose_adjusted, lh_adjusted, rh_adjusted = extract_keypoints(results)
            frames.append([frame_index, pose_adjusted, lh_adjusted, rh_adjusted])
        
        frame_index += 1

    # Освобождение ресурсов
    video_reader.release()

    # Добавление пустых кадров, если необходимо
    while len(frames) < num_frames:
        frames.append([None, np.zeros(33*3), np.zeros(21*3), np.zeros(21*3)])    

    # Создание DataFrame из извлеченных ключевых точек
    df = pd.DataFrame(frames, columns=['frame', 'pose', 'left_hand', 'right_hand'])

    # Сохранение DataFrame в parquet файл
    df.to_parquet(output_path)

def process_videos(video_paths, output_subdir):
    os.makedirs(output_subdir, exist_ok=True)
    count = 0
    for video_path in tqdm(video_paths):
        filename = os.path.basename(video_path)
        output_path = os.path.join(output_subdir, f'{os.path.splitext(filename)[0]}.parquet')
        process_video(video_path, output_path)
        count += 1
        if count % 100 == 0:
            print(f'Processed {count} videos')

# Обработка тренировочных, валидационных и тестовых видео
process_videos(video_path_train, os.path.join(output_directory, 'train_keypoints_5'))
process_videos(video_path_test, os.path.join(output_directory, 'test_keypoints_5'))

  8%|▊         | 100/1200 [01:56<18:25,  1.00s/it]

Processed 100 videos


 17%|█▋        | 200/1200 [03:49<22:29,  1.35s/it]

Processed 200 videos


 25%|██▌       | 300/1200 [06:10<15:57,  1.06s/it]

Processed 300 videos


 33%|███▎      | 400/1200 [08:00<14:51,  1.11s/it]

Processed 400 videos


 42%|████▏     | 500/1200 [09:51<11:54,  1.02s/it]

Processed 500 videos


 50%|█████     | 600/1200 [12:00<16:42,  1.67s/it]

Processed 600 videos


 58%|█████▊    | 700/1200 [14:49<15:23,  1.85s/it]

Processed 700 videos


 67%|██████▋   | 800/1200 [17:29<09:09,  1.37s/it]

Processed 800 videos


 75%|███████▌  | 900/1200 [20:08<07:25,  1.48s/it]

Processed 900 videos


 83%|████████▎ | 1000/1200 [22:49<05:44,  1.72s/it]

Processed 1000 videos


 92%|█████████▏| 1100/1200 [25:43<03:20,  2.00s/it]

Processed 1100 videos


100%|██████████| 1200/1200 [28:53<00:00,  1.44s/it]


Processed 1200 videos


 33%|███▎      | 100/300 [03:19<06:12,  1.86s/it]

Processed 100 videos


 67%|██████▋   | 200/300 [06:23<02:32,  1.52s/it]

Processed 200 videos


100%|██████████| 300/300 [09:18<00:00,  1.86s/it]


Processed 300 videos


 20%|██        | 100/500 [02:49<11:14,  1.69s/it]

Processed 100 videos


 40%|████      | 200/500 [05:35<08:32,  1.71s/it]

Processed 200 videos


 60%|██████    | 300/500 [08:20<05:42,  1.71s/it]

Processed 300 videos


 80%|████████  | 400/500 [11:08<02:45,  1.66s/it]

Processed 400 videos


100%|██████████| 500/500 [13:44<00:00,  1.65s/it]

Processed 500 videos





In [None]:
mp_drawing = mp.solutions.drawing_utils
mp_holistic = mp.solutions.holistic

# Выходная директория
output_directory = 'slovo/'

# Создание выходной директории, если она не существует
os.makedirs(output_directory, exist_ok=True)

# Инициализация модели MediaPipe Holistic
holistic = mp_holistic.Holistic()

def adjust_landmarks(arr, center):
    arr_reshaped = arr.reshape(-1, 3)
    center_repeated = np.tile(center, (len(arr_reshaped), 1))
    arr_adjusted = arr_reshaped - center_repeated
    arr_adjusted = arr_adjusted.reshape(-1)
    return arr_adjusted

def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*3)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    nose = pose[:3]
    lh_wrist = lh[:3]
    rh_wrist = rh[:3]
    pose_adjusted = adjust_landmarks(pose, nose)
    lh_adjusted = adjust_landmarks(lh, lh_wrist)
    rh_adjusted = adjust_landmarks(rh, rh_wrist)
    return pose_adjusted, lh_adjusted, rh_adjusted

def flip_keypoints(keypoints):
    flipped = keypoints.copy()
    flipped[::3] = 1 - keypoints[::3]  # Flip x-coordinates
    return flipped

def process_frame(frame, flip=False):
    if flip:
        frame = cv2.flip(frame, 1)  # Flip the frame horizontally

    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    h, w, _ = frame_rgb.shape
    if h > w:
        padding = (h - w) // 2
        frame_rgb = cv2.copyMakeBorder(frame_rgb, 0, 0, padding, padding, cv2.BORDER_CONSTANT, value=[0, 0, 0])
    else:
        padding = (w - h) // 2
        frame_rgb = cv2.copyMakeBorder(frame_rgb, padding, padding, 0, 0, cv2.BORDER_CONSTANT, value=[0, 0, 0])

    results = holistic.process(frame_rgb)

    pose_adjusted, lh_adjusted, rh_adjusted = extract_keypoints(results)
    
    if flip:
        pose_adjusted = flip_keypoints(pose_adjusted)
        lh_adjusted, rh_adjusted = rh_adjusted, lh_adjusted  # Swap left and right hand keypoints

    return pose_adjusted, lh_adjusted, rh_adjusted

def process_video(video_path, output_path, num_frames=20):
    video_reader = cv2.VideoCapture(video_path)
    total_frames = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT))

    frame_indices = np.linspace(0, total_frames - 1, min(num_frames, total_frames), dtype=int)

    frames = []
    frame_index = 0

    while video_reader.isOpened():
        success, frame = video_reader.read()
        if not success or frame_index > frame_indices[-1]:
            break

        if frame_index in frame_indices:
            for flip in [False, True]:  # Process both original and flipped frames
                pose_adjusted, lh_adjusted, rh_adjusted = process_frame(frame, flip)
                frames.append([frame_index, pose_adjusted, lh_adjusted, rh_adjusted, flip])
        
        frame_index += 1

    video_reader.release()

    # Добавление пустых кадров, если необходимо
    while len(frames) < num_frames * 2:  # Учитываем, что обрабатываем и оригинальные и перевёрнутые кадры
        frames.append([None, np.zeros(33*3), np.zeros(21*3), np.zeros(21*3), False])    
        frames.append([None, np.zeros(33*3), np.zeros(21*3), np.zeros(21*3), True])        
        
    df = pd.DataFrame(frames, columns=['frame', 'pose', 'left_hand', 'right_hand', 'flip'])
    df.to_parquet(output_path)

def process_videos(video_paths, output_subdir):
    os.makedirs(output_subdir, exist_ok=True)
    count = 0
    for video_path in tqdm(video_paths):
        filename = os.path.basename(video_path)
        output_path = os.path.join(output_subdir, f'{os.path.splitext(filename)[0]}.parquet')
        process_video(video_path, output_path)
        count += 1
        if count % 100 == 0:
            print(f'Processed {count} videos')

# Обработка тренировочных, валидационных и тестовых видео
process_videos(video_path_train, os.path.join(output_directory, 'train_keypoints_6'))
process_videos(video_path_test, os.path.join(output_directory, 'test_keypoints_6'))


In [None]:
# Инициализация утилит MediaPipe для отрисовки и модели Holistic (объединение ключевых точек лица, рук и позы)
mp_drawing = mp.solutions.drawing_utils
mp_holistic = mp.solutions.holistic

# Выходная директория
output_directory = 'slovo/'

# Создание выходной директории, если она не существует
os.makedirs(output_directory, exist_ok=True)

# Инициализация модели MediaPipe Holistic
holistic = mp_holistic.Holistic()

def adjust_landmarks(arr, center):
    # Reshape the array to have shape (n, 3)
    arr_reshaped = arr.reshape(-1, 3)

    # Repeat the center array to have shape (n, 3)
    center_repeated = np.tile(center, (len(arr_reshaped), 1))

    # Subtract the center array from the arr array
    arr_adjusted = arr_reshaped - center_repeated

    # Reshape arr_adjusted back to shape (n*3,)
    arr_adjusted = arr_adjusted.reshape(-1)
    return arr_adjusted

def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*3)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    nose = pose[:3]
    lh_wrist = lh[:3]
    rh_wrist = rh[:3]
    pose_adjusted = adjust_landmarks(pose, nose)
    lh_adjusted = adjust_landmarks(lh, lh_wrist)
    rh_adjusted = adjust_landmarks(rh, rh_wrist)
    return pose_adjusted, lh_adjusted, rh_adjusted

def enhance_frame(frame):
    frame_pil = Image.fromarray(frame)
    
    # Увеличение контрастности
    contrast_enhancer = ImageEnhance.Contrast(frame_pil)
    frame_contrast = contrast_enhancer.enhance(1.5)
    
    # Увеличение резкости
    sharpness_enhancer = ImageEnhance.Sharpness(frame_pil)
    frame_sharpness = sharpness_enhancer.enhance(2.0)
    
    # Увеличение яркости
    brightness_enhancer = ImageEnhance.Brightness(frame_pil)
    frame_brightness = brightness_enhancer.enhance(1.5)
    
    # Флиппинг
    frame_flipped = frame_pil.transpose(Image.FLIP_LEFT_RIGHT)
    
    return frame_contrast, frame_sharpness, frame_brightness, frame_flipped

def process_video(video_path, output_path, num_frames=20):
    # Инициализация видео ридера
    video_reader = cv2.VideoCapture(video_path)
    total_frames = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT))

    # Выбор кадров для обработки (равномерно распределенные)
    frame_indices = np.linspace(0, total_frames - 1, min(num_frames, total_frames), dtype=int)

    frames = []
    frame_index = 0

    while video_reader.isOpened():
        success, frame = video_reader.read()
        if not success or frame_index > frame_indices[-1]:
            break
        
        if frame_index in frame_indices:
            # Преобразование кадра в RGB для MediaPipe
            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        
            # Определение размеров кадра
            h, w, _ = frame_rgb.shape
            if h > w:
                padding = (h - w) // 2
                frame_rgb = cv2.copyMakeBorder(frame_rgb, 0, 0, padding, padding, cv2.BORDER_CONSTANT, value=[0, 0, 0])
            else:
                padding = (w - h) // 2
                frame_rgb = cv2.copyMakeBorder(frame_rgb, padding, padding, 0, 0, cv2.BORDER_CONSTANT, value=[0, 0, 0])

            # Выполнение оценки holistic (лицо, руки, поза)
            results = holistic.process(frame_rgb)

            # Извлечение ключевых точек из оригинального кадра
            pose_adjusted, lh_adjusted, rh_adjusted = extract_keypoints(results)
            frames.append([frame_index, 'original', pose_adjusted, lh_adjusted, rh_adjusted])
            
            # Применение дополнительных преобразований к кадру
            frame_contrast, frame_sharpness, frame_brightness, frame_flipped = enhance_frame(frame_rgb)
            for variation_name, enhanced_frame in zip(['contrast', 'sharpness', 'brightness', 'flipped'],
                                                      [frame_contrast, frame_sharpness, frame_brightness, frame_flipped]):
                enhanced_frame_rgb = np.array(enhanced_frame)
                results = holistic.process(enhanced_frame_rgb)
                pose_adjusted, lh_adjusted, rh_adjusted = extract_keypoints(results)
                frames.append([frame_index, variation_name, pose_adjusted, lh_adjusted, rh_adjusted])
        
        frame_index += 1

    # Освобождение ресурсов
    video_reader.release()

    # Добавление пустых кадров, если необходимо
    while len(frames) < num_frames * 5:
        for i in ['original', 'contrast', 'sharpness', 'brightness', 'flipped']:
            frames.append([None, i, np.zeros(33*3), np.zeros(21*3), np.zeros(21*3)])    

    # Создание DataFrame из извлеченных ключевых точек
    df = pd.DataFrame(frames, columns=['frame', 'variation', 'pose', 'left_hand', 'right_hand'])

    # Сохранение DataFrame в parquet файл
    df.to_parquet(output_path)

def process_videos(video_paths, output_subdir):
    os.makedirs(output_subdir, exist_ok=True)
    count = 0
    for video_path in tqdm(video_paths):
        filename = os.path.basename(video_path)
        output_path = os.path.join(output_subdir, f'{os.path.splitext(filename)[0]}.parquet')
        process_video(video_path, output_path)
        count += 1
        if count % 100 == 0:
            print(f'Processed {count} videos')

# Обработка тренировочных и тестовых видео
process_videos(video_path_train, os.path.join(output_directory, 'train_keypoints_7'))
process_videos(video_path_test, os.path.join(output_directory, 'test_keypoints_7'))
