In [None]:
pip install moviepy

In [None]:
pip install --upgrade torch transformers av

# audio from video

In [None]:
from moviepy.editor import VideoFileClip

def extract_audio(video_file_path, audio_file_path):
    """
    Extracts audio from a video file and saves it as an MP3 file.
    
    Parameters:
    - video_file_path: The path to the video file.
    - audio_file_path: The path where the extracted audio file will be saved.
    """

    video = VideoFileClip(video_file_path)
    audio = video.audio
    audio.write_audiofile(audio_file_path)
    video.close()

video_file_path = '/home/girish/Downloads/Zenodo/VideoInterview/CFWyWfu_SpR.mp4'
audio_file_path = '/home/girish/Downloads/Zenodo/Audios/output_audio.mp3'
extract_audio(video_file_path, audio_file_path)


In [None]:
import subprocess

def extract_audio_ffmpeg(video_file_path, audio_file_path):
    """
    Extracts audio from a video file using FFmpeg and saves it as an MP3 file.
    
    Parameters:
    - video_file_path: The path to the video file.
    - audio_file_path: The path where the extracted audio file will be saved.
    """
    command = ['ffmpeg', '-i', video_file_path, '-q:a', '0', '-map', 'a', audio_file_path]
    

    subprocess.run(command, check=True)

video_file_path = '/home/girish/Downloads/Zenodo/VideoInterview/CFWyWfu_SpR.mp4'
audio_file_path = '/home/girish/Downloads/Zenodo/Audios/output_audio2.mp3'
extract_audio_ffmpeg(video_file_path, audio_file_path)


In [None]:
import os
import torchaudio
import numpy as np
import pandas as pd
from speechbrain.pretrained.interfaces import foreign_class


classifier = foreign_class(source="speechbrain/emotion-recognition-wav2vec2-IEMOCAP", pymodule_file="custom_interface.py", classname="CustomEncoderWav2vec2Classifier")

def extract_features(path):
    signal, fs = torchaudio.load(path)
    embeddings = classifier.encode_batch(signal)
    return np.array(embeddings.mean(axis=0).squeeze())

def process_and_save_audio_features(folder_path):

    for file in os.listdir(folder_path):
        if file.endswith(".wav"):  
            file_path = os.path.join(folder_path, file)
            features = extract_features(file_path)
            
            df = pd.DataFrame(features).transpose()  

            csv_file_name = os.path.splitext(file)[0] + '_features.csv'
 
            df.to_csv(os.path.join(folder_path, csv_file_name), index=False)

folder_path = 'path/to/your/audio/folder'
process_and_save_audio_features(folder_path)


In [None]:
import os
import torchaudio
from transformers import Wav2Vec2FeatureExtractor, Wav2Vec2Model
import torch
import pandas as pd
from torchaudio.transforms import Resample

feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained("facebook/wav2vec2-large-xlsr-53")
model = Wav2Vec2Model.from_pretrained("facebook/wav2vec2-large-xlsr-53")

input_folder = '/home/girish/Downloads/Zenodo/Audios'
output_folder = '/home/girish/Downloads/Zenodo/Audio_features'

os.makedirs(output_folder, exist_ok=True)

processed_files = [os.path.splitext(f)[0] for f in os.listdir(output_folder) if f.endswith('.csv')]

for filename in os.listdir(input_folder):
    if filename.endswith(".wav"):
        audio_file = os.path.join(input_folder, filename)
        
        if os.path.splitext(filename)[0] in processed_files:
            print(f"Features for {filename} already extracted, skipping...")
            continue

        waveform, sample_rate = torchaudio.load(audio_file)

        if waveform.shape[0] > 1:
            waveform = waveform.mean(dim=0, keepdim=True)

        if sample_rate != 16000:
            resampler = Resample(orig_freq=sample_rate, new_freq=16000)
            waveform = resampler(waveform)
            sample_rate = 16000 

        inputs = feature_extractor(waveform.squeeze(), return_tensors="pt", padding="longest", sampling_rate=sample_rate)
        with torch.no_grad():
            features = model(**inputs).last_hidden_state

        features_np = features.squeeze().detach().numpy()

        df = pd.DataFrame(features_np)

        csv_filename = os.path.join(output_folder, f"{os.path.splitext(filename)[0]}_features.csv")
        df.to_csv(csv_filename, index=False)

        print(f"Features saved to {csv_filename}")


# Video Features

In [None]:
pip install av

In [None]:
import av
import numpy as np
import pandas as pd
from transformers import AutoImageProcessor, VideoMAEModel

np.random.seed(0)

def read_video_pyav(container, indices):
    frames = []
    container.seek(0)
    start_index = indices[0]
    end_index = indices[-1]
    for i, frame in enumerate(container.decode(video=0)):
        if i > end_index:
            break
        if i >= start_index and i in indices:
            frames.append(frame)
    return np.stack([x.to_ndarray(format="rgb24") for x in frames])

def sample_frame_indices(clip_len, frame_sample_rate, seg_len):
    converted_len = int(clip_len * frame_sample_rate)
    end_idx = np.random.randint(converted_len, seg_len)
    start_idx = end_idx - converted_len
    indices = np.linspace(start_idx, end_idx, num=clip_len)
    indices = np.clip(indices, start_idx, end_idx - 1).astype(np.int64)
    return indices

file_path = "/home/girish/Downloads/Zenodo/VideoInterview/WfTccuO_NXE.mp4"
container = av.open(file_path)

# Sample frames from the video
indices = sample_frame_indices(clip_len=16, frame_sample_rate=1, seg_len=container.streams.video[0].frames)
video = read_video_pyav(container, indices)

image_processor = AutoImageProcessor.from_pretrained("MCG-NJU/videomae-base")
model = VideoMAEModel.from_pretrained("MCG-NJU/videomae-base")
inputs = image_processor(list(video), return_tensors="pt")

outputs = model(**inputs)
last_hidden_states = outputs.last_hidden_state
features_numpy = last_hidden_states.detach().cpu().numpy().squeeze(0)

features_df = pd.DataFrame(features_numpy)
features_csv_path = '/home/girish/Downloads/Zenodo/Video_features/video_features.csv'
features_df.to_csv(features_csv_path, index=False)

print(f"Features saved to {features_csv_path}")


preprocessor_config.json: 100%|████████████████████████████████████████████████████████| 271/271 [00:00<00:00, 1.05MB/s]
Could not find image processor class in the image processor config or the model config. Loading based on pattern matching with the model's feature extractor configuration.
config.json: 100%|█████████████████████████████████████████████████████████████████████| 725/725 [00:00<00:00, 2.52MB/s]
pytorch_model.bin: 100%|█████████████████████████████████████████████████████████████| 377M/377M [00:42<00:00, 8.94MB/s]
  return torch.tensor(value)


Features saved to /home/girish/Downloads/Zenodo/Video_features/video_features.csv


In [None]:

import pandas as pd
import numpy as np
import av
import torch
from transformers import AutoImageProcessor, VideoMAEModel
import os

np.random.seed(0)

def read_video_pyav(container, indices):
    frames = []
    container.seek(0)
    start_index = indices[0]
    end_index = indices[-1]
    for i, frame in enumerate(container.decode(video=0)):
        if i > end_index:
            break
        if i >= start_index and i in indices:
            frames.append(frame)
    return np.stack([x.to_ndarray(format="rgb24") for x in frames])

def sample_frame_indices(clip_len, frame_sample_rate, seg_len):
    converted_len = int(clip_len * frame_sample_rate)
    end_idx = np.random.randint(converted_len, seg_len)
    start_idx = end_idx - converted_len
    indices = np.linspace(start_idx, end_idx, num=clip_len)
    indices = np.clip(indices, start_idx, end_idx - 1).astype(np.int64)
    return indices

def extract_video_features(file_path, device):
    container = av.open(file_path)
    indices = sample_frame_indices(clip_len=16, frame_sample_rate=1, seg_len=container.streams.video[0].frames)
    video = read_video_pyav(container, indices)
    
    image_processor = AutoImageProcessor.from_pretrained("MCG-NJU/videomae-base")
    model = VideoMAEModel.from_pretrained("MCG-NJU/videomae-base")
    model.to(device)
    
    inputs = image_processor(list(video), return_tensors="pt").to(device)
    
    with torch.no_grad():
        outputs = model(**inputs)
    features = outputs.last_hidden_state.mean(dim=1).squeeze().cpu().numpy()
    
    return features

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
video_files = ["/home/girish/Downloads/Zenodo/VideoInterview/zzXHxHz_WTr.mp4"]  
all_features = []
for file_path in video_files:
    features = extract_video_features(file_path, device)
    all_features.append(features)

features_df = pd.DataFrame(all_features)
features_csv_path = '/home/girish/Downloads/Zenodo/Video_features/zzXHxHz_WTr.csv'
features_df.to_csv(features_csv_path, index=False)

print(f"Features saved to {features_csv_path}")



Could not find image processor class in the image processor config or the model config. Loading based on pattern matching with the model's feature extractor configuration.


Features saved to /home/girish/Downloads/Zenodo/Video_features/zzXHxHz_WTr.csv


# for whole folder video emaddings

In [None]:
import pandas as pd
import numpy as np
import av
import torch
from transformers import AutoImageProcessor, VideoMAEModel
import os
import glob
np.random.seed(0)

def read_video_pyav(container, indices):
    frames = []
    container.seek(0)
    start_index = indices[0]
    end_index = indices[-1]
    for i, frame in enumerate(container.decode(video=0)):
        if i > end_index:
            break
        if i >= start_index and i in indices:
            frames.append(frame)
    return np.stack([x.to_ndarray(format="rgb24") for x in frames])

def sample_frame_indices(clip_len, frame_sample_rate, seg_len):
    converted_len = int(clip_len * frame_sample_rate)
    end_idx = np.random.randint(converted_len, seg_len)
    start_idx = end_idx - converted_len
    indices = np.linspace(start_idx, end_idx, num=clip_len)
    indices = np.clip(indices, start_idx, end_idx - 1).astype(np.int64)
    return indices

def extract_video_features(file_path, device):
    container = av.open(file_path)
    indices = sample_frame_indices(clip_len=16, frame_sample_rate=1, seg_len=container.streams.video[0].frames)
    video = read_video_pyav(container, indices)
    
    image_processor = AutoImageProcessor.from_pretrained("MCG-NJU/videomae-base")
    model = VideoMAEModel.from_pretrained("MCG-NJU/videomae-base")
    model.to(device)
    
    inputs = image_processor(list(video), return_tensors="pt").to(device)
    
    with torch.no_grad():
        outputs = model(**inputs)
    features = outputs.last_hidden_state.mean(dim=1).squeeze().cpu().numpy()
    
    return file_path, features

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
video_folder = "/home/girish/Downloads/Zenodo/VideoInterview" 
video_files = glob.glob(os.path.join(video_folder, "*.mp4"))

all_features = []
file_names = []
for file_path in video_files:
    file_name, features = extract_video_features(file_path, device)
    file_names.append(os.path.basename(file_name)) 
    all_features.append(features)

features_df = pd.DataFrame(all_features)
features_df['file_name'] = file_names 
features_csv_path = '/home/girish/Downloads/Zenodo/Video_features/video_features.csv' 
features_df.to_csv(features_csv_path, index=False)

print(f"Features saved to {features_csv_path}")


In [None]:
import pandas as pd
import numpy as np
import av
import torch
from transformers import AutoImageProcessor, VideoMAEModel
import os
import glob

np.random.seed(0)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
video_folder = "/home/girish/Downloads/Zenodo/VideoInterview" 
features_csv_path = "/home/girish/Downloads/Zenodo/Video_features/video_features.csv"  


In [3]:
def read_video_all_frames(file_path):
    """
    Read all video frames and return as a list of numpy arrays.
    """
    container = av.open(file_path)
    frames = [frame.to_image() for frame in container.decode(video=0)]
    return frames

def uniform_frame_sampling(frames, num_samples=16):
    """
    Uniformly sample frames from the video.
    """
    sampled_frames = [frames[i] for i in np.linspace(0, len(frames) - 1, num_samples, dtype=int)]
    return sampled_frames

def extract_and_pool_video_features(file_path, device, num_samples=16, pooling='mean'):
    """
    Extract and pool video features from uniformly sampled frames.
    """
    frames = read_video_all_frames(file_path)
    sampled_frames = uniform_frame_sampling(frames, num_samples)
    
    image_processor = AutoImageProcessor.from_pretrained("MCG-NJU/videomae-base")
    model = VideoMAEModel.from_pretrained("MCG-NJU/videomae-base").to(device)
    
    pooled_features = []
    for frame in sampled_frames:
        inputs = image_processor(images=frame, return_tensors="pt").to(device)
        with torch.no_grad():
            outputs = model(**inputs)
        features = outputs.last_hidden_state.mean(dim=1).squeeze().cpu().numpy()
        pooled_features.append(features)
    
    if pooling == 'mean':
        final_features = np.mean(pooled_features, axis=0)
    elif pooling == 'max':
        final_features = np.max(pooled_features, axis=0)
    
    return final_features


In [None]:
def process_videos(video_files, device):
    all_features = []
    file_names = []
    
    for file_path in video_files:
        try:
            features = extract_and_pool_video_features(file_path, device)
            file_names.append(os.path.basename(file_path))  
            all_features.append(features)
        except Exception as e:
            print(f"Skipping {file_path} due to error: {e}")

    return pd.DataFrame(all_features, columns=[f'Feature_{i}' for i in range(all_features[0].shape[0])]), file_names

if __name__ == "__main__":
    video_files = glob.glob(os.path.join(video_folder, "*.mp4"))
    features_df, file_names = process_videos(video_files, device)
    features_df['file_name'] = file_names 
    features_df.to_csv(features_csv_path, index=False)

    print(f"Features saved to {features_csv_path}")


deprecated pixel format used, make sure you did set range correctly
Could not find image processor class in the image processor config or the model config. Loading based on pattern matching with the model's feature extractor configuration.
deprecated pixel format used, make sure you did set range correctly
 (repeated 765 more times)


Skipping /home/girish/Downloads/Zenodo/VideoInterview/CFWyWfu_SpR.mp4 due to error: Calculated padded input size per channel: (1 x 224 x 224). Kernel size: (2 x 16 x 16). Kernel size can't be greater than actual input size


# CNN ON VIDEO FEATURES

In [3]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv1D, Flatten, MaxPooling1D, Dropout
from tensorflow.keras.optimizers import Adam


In [None]:
data = pd.read_csv('/home/girish/Downloads/Zenodo/Video_features/video_features.csv')
data.drop(columns=['file_name'], inplace=True)

X = data.drop(columns=['Extraversion', 'Agreeableness', 'Conscientiousness', 'Neuroticism', 'Openness']).values
y = data[['Extraversion', 'Agreeableness', 'Conscientiousness', 'Neuroticism', 'Openness']].values

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

X_train_reshaped = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test_reshaped = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))


In [None]:
model = Sequential([
    Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(768, 1)),
    MaxPooling1D(pool_size=2),
    Flatten(),
    Dense(100, activation='relu'),
    Dropout(0.5),
    Dense(5)  
])

model.compile(optimizer=Adam(learning_rate=0.001), loss='mse', metrics=['mae'])


2024-02-16 04:21:48.424040: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-02-16 04:21:48.447176: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-02-16 04:21:48.447312: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-02-16 04:21:48.469557: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-02-16 04:21:48.469630: I external/local_xla/xla/stream_executor

In [6]:
history = model.fit(X_train_reshaped, y_train, validation_split=0.2, epochs=100, batch_size=8, verbose=2)


Epoch 1/100


2024-02-16 04:21:59.849734: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:454] Loaded cuDNN version 8902
2024-02-16 04:22:02.197302: W external/local_xla/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc:504] Can't find libdevice directory ${CUDA_DIR}/nvvm/libdevice. This may result in compilation or runtime failures, if the program we try to run uses routines from libdevice.
Searched for CUDA in the following directories:
  ./cuda_sdk_lib
  /usr/local/cuda-12.2
  /usr/local/cuda
  /home/girish/.local/lib/python3.10/site-packages/tensorflow/python/platform/../../../nvidia/cuda_nvcc
  /home/girish/.local/lib/python3.10/site-packages/tensorflow/python/platform/../../../../nvidia/cuda_nvcc
  .
You can choose the search directory by setting xla_gpu_cuda_data_dir in HloModule's DebugOptions.  For most apps, setting the environment variable XLA_FLAGS=--xla_gpu_cuda_data_dir=/path/to/cuda will work.
2024-02-16 04:22:04.506375: I external/local_xla/xla/service/service.cc:168] XLA

InternalError: Graph execution error:

Detected at node Adam/StatefulPartitionedCall_4 defined at (most recent call last):
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main

  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code

  File "/home/girish/.local/lib/python3.10/site-packages/ipykernel_launcher.py", line 17, in <module>

  File "/home/girish/.local/lib/python3.10/site-packages/traitlets/config/application.py", line 1077, in launch_instance

  File "/home/girish/.local/lib/python3.10/site-packages/ipykernel/kernelapp.py", line 739, in start

  File "/home/girish/.local/lib/python3.10/site-packages/tornado/platform/asyncio.py", line 205, in start

  File "/usr/lib/python3.10/asyncio/base_events.py", line 603, in run_forever

  File "/usr/lib/python3.10/asyncio/base_events.py", line 1909, in _run_once

  File "/usr/lib/python3.10/asyncio/events.py", line 80, in _run

  File "/home/girish/.local/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 529, in dispatch_queue

  File "/home/girish/.local/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 518, in process_one

  File "/home/girish/.local/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 424, in dispatch_shell

  File "/home/girish/.local/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 766, in execute_request

  File "/home/girish/.local/lib/python3.10/site-packages/ipykernel/ipkernel.py", line 429, in do_execute

  File "/home/girish/.local/lib/python3.10/site-packages/ipykernel/zmqshell.py", line 549, in run_cell

  File "/home/girish/.local/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3048, in run_cell

  File "/home/girish/.local/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3103, in _run_cell

  File "/home/girish/.local/lib/python3.10/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner

  File "/home/girish/.local/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3308, in run_cell_async

  File "/home/girish/.local/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3490, in run_ast_nodes

  File "/home/girish/.local/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3550, in run_code

  File "/tmp/ipykernel_30106/1005542190.py", line 1, in <module>

  File "/home/girish/.local/lib/python3.10/site-packages/keras/src/utils/traceback_utils.py", line 65, in error_handler

  File "/home/girish/.local/lib/python3.10/site-packages/keras/src/engine/training.py", line 1807, in fit

  File "/home/girish/.local/lib/python3.10/site-packages/keras/src/engine/training.py", line 1401, in train_function

  File "/home/girish/.local/lib/python3.10/site-packages/keras/src/engine/training.py", line 1384, in step_function

  File "/home/girish/.local/lib/python3.10/site-packages/keras/src/engine/training.py", line 1373, in run_step

  File "/home/girish/.local/lib/python3.10/site-packages/keras/src/engine/training.py", line 1154, in train_step

  File "/home/girish/.local/lib/python3.10/site-packages/keras/src/optimizers/optimizer.py", line 544, in minimize

  File "/home/girish/.local/lib/python3.10/site-packages/keras/src/optimizers/optimizer.py", line 1223, in apply_gradients

  File "/home/girish/.local/lib/python3.10/site-packages/keras/src/optimizers/optimizer.py", line 652, in apply_gradients

  File "/home/girish/.local/lib/python3.10/site-packages/keras/src/optimizers/optimizer.py", line 1253, in _internal_apply_gradients

  File "/home/girish/.local/lib/python3.10/site-packages/keras/src/optimizers/optimizer.py", line 1345, in _distributed_apply_gradients_fn

  File "/home/girish/.local/lib/python3.10/site-packages/keras/src/optimizers/optimizer.py", line 1340, in apply_grad_to_update_var

libdevice not found at ./libdevice.10.bc
	 [[{{node Adam/StatefulPartitionedCall_4}}]] [Op:__inference_train_function_969]

In [None]:
test_loss, test_mae = model.evaluate(X_test_reshaped, y_test, verbose=2)

y_pred = model.predict(X_test_reshaped)

mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

print(f"Test MAE: {mae}")
print(f"Test RMSE: {rmse}")
