<a href="https://colab.research.google.com/github/XoLeRyTeR/TS/blob/main/ProjectCV.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [19]:
import cv2
import os
def find_face(video):
  video_path = video
  face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
  cap = cv2.VideoCapture(video_path)
  while cap.isOpened():
      ret, frame = cap.read()
      if not ret:
          break
      gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
      faces = face_cascade.detectMultiScale(gray_frame, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))
      if len(faces) > 0:
          (x, y, w, h) = faces[0]
          face = frame[y:y+h, x:x+w]
          cap.release()
          return face

In [20]:
from transformers import ViTModel, ViTFeatureExtractor
import torch
model = ViTModel.from_pretrained('google/vit-base-patch16-224')
feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224')

def img2vec(img):
  #image = cv2.imread(img)
  image=img
  inputs = feature_extractor(images=image, return_tensors="pt")

  outputs = model(**inputs)
  feature_vector = outputs.last_hidden_state[:,0].detach().numpy()
  return feature_vector

Some weights of ViTModel were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized: ['vit.pooler.dense.bias', 'vit.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [22]:
from moviepy.editor import VideoFileClip
def extract_wav(video_path):
  video = VideoFileClip(video_path)
  audio = video.audio
  audio.write_audiofile("audio.wav", codec='pcm_s16le')
  return "audio.wav"

In [38]:
import librosa
import numpy as np
def wav2vec(audio_path):
  y, sr = librosa.load(audio_path, sr=None)
  mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
  return np.array(np.mean(mfccs.T, axis=0))

In [39]:
from tensorflow.keras.layers import MultiHeadAttention, Dense, LayerNormalization,Input
from tensorflow.keras.models import Sequential,Model
import numpy as np

def video2frames(video_path):
    video = cv2.VideoCapture(video_path)
    frames = []
    total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))

    step = total_frames // 30
    frames = []

    for i in range(30):
        video.set(cv2.CAP_PROP_POS_FRAMES, i * step)
        ret, frame = video.read()
        if not ret:
            break
        frames.append(frame)

    video.release()
    #print(np.array(frames).shape)
    return np.array(frames)
def video2vec(frames):
    features = np.array([img2vec(frame)[0] for frame in frames])
    features = np.expand_dims(features, axis=0)
    #print(features.shape[1:])

    input_tensor = Input(shape=features.shape[1:])
    attention_output = MultiHeadAttention(num_heads=8, key_dim=64)(input_tensor, input_tensor)
    normalized_output = LayerNormalization()(attention_output)
    output_tensor = Dense(128, activation='relu')(normalized_output)
    model = Model(inputs=input_tensor, outputs=output_tensor)
    model.compile(optimizer='adam', loss='mse')

    vector_out = model.predict(features)
    return vector_out


In [51]:
import os
from IPython.display import display, HTML
from google.colab import output

def RecordVideo():
  # Создаем директорию для сохранения видео
  video_dir = '/content'  # Сохраняем в корневую директорию /content
  video_path_webm = os.path.join(video_dir, 'video_with_audio.webm')
  video_path_mp4 = os.path.join(video_dir, 'video_with_audio.mp4')

  # Определяем JavaScript и HTML для захвата видео
  video_capture_html ="""
  <video id="video" width="320" height="240" autoplay muted></video>
  <br>
  <div id="status">Подготовка к записи...</div>
  <script>
    const video = document.getElementById('video');
    const status = document.getElementById('status');
    let mediaRecorder;
    let chunks = [];

    // Доступ к веб-камере с видео и аудио
    navigator.mediaDevices.getUserMedia({ video: { width: 320, height: 240 }, audio: true })
      .then(stream => {
        video.srcObject = stream;
        mediaRecorder = new MediaRecorder(stream, { mimeType: 'video/webm' });

        // Когда данные доступны, добавляем их в chunks
        mediaRecorder.ondataavailable = event => {
          if (event.data.size > 0) {
            chunks.push(event.data);
          }
        };

        // Когда запись останавливается, отправляем видео в Python
        mediaRecorder.onstop = () => {
          const blob = new Blob(chunks, { type: 'video/webm' });
          chunks = [];
          const reader = new FileReader();
          reader.onload = () => {
            const base64Data = reader.result.split(',')[1];
            google.colab.kernel.invokeFunction('save_video', [base64Data], {});
            status.innerText = 'Видео с аудио сохранено.';
          };
          reader.readAsDataURL(blob);
        };

        // Начинаем запись автоматически
        mediaRecorder.start();
        status.innerText = 'Запись началась...';

        // Останавливаем запись через 1 минуту (60000 миллисекунд)
        setTimeout(() => {
          if (mediaRecorder.state === 'recording') {
            mediaRecorder.stop();
            status.innerText = 'Запись остановлена...';
          }
        }, 6000); // 6000 ms = 1 минута
      })
      .catch(error => {
        console.error('Ошибка доступа к веб-камере:', error);
        alert('Не удалось получить доступ к веб-камере и микрофону.');
      });
    </script>
  """

  # Отображаем интерфейс захвата видео
  display(HTML(video_capture_html))

  # Определяем Python callback для сохранения и конвертации видео
  def save_video_callback(base64_data):
      import base64
      video_data = base64.b64decode(base64_data)

      # Сохраняем WebM файл
      with open(video_path_webm, 'wb') as f:
          f.write(video_data)
      print(f"Видео с аудио сохранено по адресу: {video_path_webm}")

      # Выводим содержимое директории для проверки
      print("Содержимое директории после сохранения:")
      print(os.listdir(video_dir))

      # Конвертируем WebM в MP4 с помощью ffmpeg
      os.system(f"ffmpeg -i {video_path_webm} -c:v libx264 {video_path_mp4}")
      print(video_path_mp4)
      # Выводим содержимое директории после конвертации
      print("Содержимое директории после конвертации:")
      print(os.listdir(video_dir))

  # Регистрируем callback с Colab
  output.register_callback('save_video', save_video_callback)
  return video_path_mp4

In [53]:
video_path=RecordVideo()

Видео с аудио сохранено по адресу: /content/video_with_audio.webm
Содержимое директории после сохранения:
['.config', 'videofolder', '.ipynb_checkpoints', 'audio.wav', 'video_with_audio.webm', 'sample_data']
/content/video_with_audio.mp4
Содержимое директории после конвертации:
['.config', 'videofolder', '.ipynb_checkpoints', 'video_with_audio.mp4', 'audio.wav', 'video_with_audio.webm', 'sample_data']


In [54]:
Data = {'wav2vec':wav2vec(extract_wav(video_path)),'img2vec':img2vec(find_face(video_path)),'video2vec': video2vec(video2frames(video_path))}
print(Data)

MoviePy - Writing audio in audio.wav


                                                        

MoviePy - Done.




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 135ms/step
{'wav2vec': array([-775.62823  ,   45.89489  ,   24.4338   ,   39.0814   ,
          4.190861 ,   29.390879 ,   -4.171281 ,   27.076014 ,
         -9.055146 ,   18.174307 ,   -5.411517 ,    7.6822214,
         -1.1253952], dtype=float32), 'img2vec': array([[-1.16864786e-01,  1.56060725e-01,  1.76259279e-01,
        -3.12051624e-01, -5.42946577e-01, -5.23812771e-01,
         1.08623171e+00,  6.83873057e-01,  8.80680323e-01,
        -1.06273246e+00, -7.36731589e-01, -1.21586549e+00,
         1.18053448e+00,  1.23886418e+00, -2.19152808e+00,
         1.55447996e+00,  1.98714629e-01, -1.13931604e-01,
        -4.44814950e-01,  1.13869298e+00,  4.63865846e-01,
        -1.44775438e+00,  7.55703986e-01,  5.15038669e-01,
        -1.59308803e+00, -1.56378496e+00,  1.31915557e+00,
        -2.08514094e+00, -1.14723539e+00, -5.78470588e-01,
        -3.54084790e-01, -6.00990951e-01,  2.49593630e-01,
        -7.95009911e-01, -9.