In [1]:
# pip install -U pliers

[0mCollecting pliers
  Using cached pliers-0.4.2-py3-none-any.whl (3.7 MB)
Collecting python-magic
  Using cached python_magic-0.4.27-py2.py3-none-any.whl (13 kB)
Collecting numpy>=1.13
  Downloading numpy-1.25.2-cp311-cp311-macosx_10_9_x86_64.whl (20.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m20.8/20.8 MB[0m [31m9.6 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25hCollecting nltk>=3.0
  Using cached nltk-3.8.1-py3-none-any.whl (1.5 MB)
Collecting moviepy>=0.2
  Using cached moviepy-1.0.3.tar.gz (388 kB)
  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting pandas>=0.24
  Downloading pandas-2.0.3-cp311-cp311-macosx_10_9_x86_64.whl (11.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.6/11.6 MB[0m [31m7.7 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting pillow
  Downloading Pillow-10.0.0-cp311-cp311-macosx_10_10_x86_64.whl (3.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.4/3.

In [None]:
from pliers.tests.utils import get_test_data_path
from os.path import join
from pliers.filters import FrameSamplingFilter
from pliers.converters import GoogleSpeechAPIConverter
from pliers.extractors import (ClarifaiAPIImageExtractor, GoogleVisionAPIFaceExtractor,
                               ComplexTextExtractor, PredefinedDictionaryExtractor,
                               STFTAudioExtractor, VADERSentimentExtractor,
                               merge_results)

video = join(get_test_data_path(), 'video', 'obama_speech.mp4')

# Store all the returned features in a single list (nested lists
# are fine, the merge_results function will flatten everything)
features = []

# Sample video frames and apply the image-based extractors
sampler = FrameSamplingFilter(every=10)
frames = sampler.transform(video)

obj_ext = ClarifaiAPIImageExtractor()
obj_features = obj_ext.transform(frames)
features.append(obj_features)

face_ext = GoogleVisionAPIFaceExtractor()
face_features = face_ext.transform(frames)
features.append(face_features)

# Power in speech frequencies
stft_ext = STFTAudioExtractor(freq_bins=[(100, 300)])
speech_features = stft_ext.transform(video)
features.append(speech_features)

# Explicitly transcribe the video--we could also skip this step
# and it would be done implicitly, but this way we can specify
# that we want to use the Google Cloud Speech API rather than
# the package default (IBM Watson)
text_conv = GoogleSpeechAPIConverter()
text = text_conv.transform(video)

# Text-based features
text_ext = ComplexTextExtractor()
text_features = text_ext.transform(text)
features.append(text_features)

dict_ext = PredefinedDictionaryExtractor(
    variables=['affect/V.Mean.Sum', 'subtlexusfrequency/Lg10WF'])
norm_features = dict_ext.transform(text)
features.append(norm_features)

sent_ext = VADERSentimentExtractor()
sent_features = sent_ext.transform(text)
features.append(sent_features)

# Ask for data in 'long' format, and code extractor name as a separate
# column instead of prepending it to feature names.
df = merge_results(features, format='long', extractor_names='column')

# Output rows in a sensible order
df.sort_values(['extractor', 'feature', 'onset', 'duration', 'order']).head(10)