## Downloading and installing the **voice_characterizer** library

In [18]:
%%capture --no-stderr
!wget "https://drive.google.com/uc?id=1AL0t8rckwq9cQH2affK1-JoDa34J_MkX" -O "/content/vanpy-0.67-py3-none-any.whl"
!pip install "/content/vanpy-0.67-py3-none-any.whl"

## Installing prerequisites
If you are using one of the VAD preprocessing components, there is a need to install the **ffmpeg** utility before you continue.

In [2]:
#@title <font size="4"><i>Installing ffmpeg</font><br/><font size="3">Taken from <a href="https://colab.research.google.com/github/yunooooo/FFmpeg-for-Google-Drive/blob/master/FFmpeg.ipynb">here</a></font> { vertical-output: true }
from IPython.display import clear_output
import os, urllib.request
HOME = os.path.expanduser("~")
pathDoneCMD = f'{HOME}/doneCMD.sh'
if not os.path.exists(f"{HOME}/.ipython/ttmg.py"):
    hCode = "https://raw.githubusercontent.com/yunooooo/gcct/master/res/ttmg.py"
    urllib.request.urlretrieve(hCode, f"{HOME}/.ipython/ttmg.py")

from ttmg import (
    loadingAn,
    textAn,
)

loadingAn(name="lds")
textAn("Installing Dependencies...", ty='twg')
os.system('pip install git+git://github.com/AWConant/jikanpy.git')
os.system('add-apt-repository -y ppa:jonathonf/ffmpeg-4')
os.system('apt-get update')
os.system('apt install mediainfo')
os.system('apt-get install ffmpeg')
clear_output()
print('Installation finished.')

Installation finished.


## Downloading the audio samples and configuration file


In [21]:
%%capture --no-stderr
# get the pipline configuration file
!wget "https://drive.google.com/uc?id=16ahverthtZhgmOtRGMFB-z0LlPTy6MT9" -O "/content/pipeline.yaml"
# get the audio-samples
!wget "https://drive.google.com/uc?id=1uRChxDLqmnzxWWS3Ri9mcGTYRhOJyhNR" -O "/content/speech_examples_small.zip"
# extract the contents of the archive
!unzip -qq "/content/speech_examples_small.zip"
# delete the archive
!rm "/content/speech_examples_small.zip"

In [4]:
import os
import soundfile
from IPython.display import display, Audio
wav, sr = soundfile.read("/content/speech_examples_small/stream_1nwjWQJB_20220104_16_28_02_40.wav")
display(Audio(wav.T, rate=sr))

## Running the voice characterization pipeline

In [22]:
import yaml
import logging
logging.basicConfig(level=logging.INFO)  # You can configure the log-level to
                                         # reach the desired verbosity
from vanpy.core.ClassificationPipline import ClassificationPipeline
from vanpy.core.FeatureExtractionPipline import FeatureExtractionPipeline
from vanpy.core.PreprocessPipline import PreprocessPipeline
from vanpy.core.CombinedPipeline import CombinedPipeline


# loading the configuration of the pipeline and its components from pipline.yaml
with open('pipeline.yaml', 'r') as f:
  config = yaml.load(f, Loader=yaml.FullLoader)

# declaring the preprocessing pipeline
preprocessing_pipeline = PreprocessPipeline(
  ['file_mapper', 'wav_converter', 'ina_speech_segmenter'], config=config)
# declaring the feature extraction pipeline
# this is required for the training set generation or whether one of the models
# is expecting the features as input (e.g. 'common_voices_gender')
feature_extraction_pipeline = FeatureExtractionPipeline(
  ['pyannote_embedding'], config=config)
# declaring the classification pipeline
# some of the classifiers use the features columns and others preprocessed files
speaker_clf_pipeline = ClassificationPipeline(
  ['common_voices_gender', 'common_voices_age', 'speech_brain_iemocap_emotion'], 
  config=config)

# combining all of the pipelines together
pipline = CombinedPipeline(
  [preprocessing_pipeline, feature_extraction_pipeline, speaker_clf_pipeline],
  config=config)
# executing pipeline
processed_payload = pipline.process()

Downloading data from https://github.com/ina-foss/inaSpeechSegmenter/releases/download/models/keras_speech_music_cnn.hdf5
Downloading data from https://github.com/ina-foss/inaSpeechSegmenter/releases/download/models/keras_male_female_cnn.hdf5


Downloading:   0%|          | 0.00/96.4M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/2.00k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.64k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/6.01k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/159 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.80k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/363M [00:00<?, ?B/s]

Some weights of the model checkpoint at facebook/wav2vec2-base were not used when initializing Wav2Vec2Model: ['quantizer.weight_proj.weight', 'project_hid.bias', 'project_q.bias', 'project_hid.weight', 'quantizer.weight_proj.bias', 'project_q.weight', 'quantizer.codevectors']
- This IS expected if you are initializing Wav2Vec2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing Wav2Vec2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Downloading:   0%|          | 0.00/378M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/13.2k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/83.0 [00:00<?, ?B/s]

## Diplaying the classification data frame
The dataframe includes the column of the last pre-proccesd file-pathes and all of the classification columns

In [23]:
processed_payload.get_classification_df()

Unnamed: 0,ina_speech_segmenter_processed_path,common_voices_gender_classification,common_voices_age_classification,speech_brain_iemocap_emotion
0,ina_preprocessed/stream_1nwjWQJB_20220104_16_2...,female,thirties,neu
1,ina_preprocessed/stream_LVaAGS9o_20220104_16_2...,male,twenties,hap


## Diplaying the features data frame
The dataframe includes the column of the last pre-proccesd file-pathes and all of the feature columns

In [24]:
processed_payload.get_features_df()

Unnamed: 0,ina_speech_segmenter_processed_path,0,1,2,3,4,5,6,7,8,...,503,504,505,506,507,508,509,510,511,perf_pyannote_embedding_get_features
0,ina_preprocessed/stream_1nwjWQJB_20220104_16_2...,8.877208,-38.049767,30.893175,20.121298,-30.725372,-52.043125,-6.321362,45.486713,-27.69747,...,7.375932,-10.408524,5.848773,14.999799,26.872677,17.378815,-20.556408,49.252014,-26.353689,0.643497
1,ina_preprocessed/stream_LVaAGS9o_20220104_16_2...,-3.487365,115.851768,26.675196,45.657825,-39.521873,-4.936071,37.598557,7.883302,63.429573,...,-24.167591,-0.330149,-33.650799,28.6628,40.57518,16.881489,138.446869,40.276592,-38.88501,0.156474
