<a href="https://colab.research.google.com/github/francescopapaleo/streamlit/blob/main/AMPLab02_Essentia.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install essentia-tensorflow
!pip install numpy pandas scikit-learn

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting essentia-tensorflow
  Downloading essentia_tensorflow-2.1b6.dev858-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (291.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m291.4/291.4 MB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: essentia-tensorflow
Successfully installed essentia-tensorflow-2.1b6.dev858
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

from google.colab import auth
auth.authenticate_user()

Mounted at /content/gdrive


In [None]:
import os
import sys
import matplotlib.pyplot as plt
import numpy as np
from pathlib import Path
import json

import essentia.standard as es
import pandas as pd

import IPython.display as ipd
from tqdm import tqdm
import concurrent.futures

from tempfile import TemporaryDirectory
from itertools import chain
from concurrent.futures import ProcessPoolExecutor

In [None]:
DATASET_PATH = Path('/content/gdrive/MyDrive/AMPLAB/MusAV/audio_chunks')
MODELS_HOME = Path("/content/gdrive/MyDrive/AMPLAB/essetia_playlist/models")
OUTPUT_FILE_PATH = Path("/content/gdrive/MyDrive/AMPLAB/essetia_playlist")

In [None]:
! mkdir models
! curl -L -o models/voice_instrumental-musicnn-mtt-2.pb "https://essentia.upf.edu/models/classifiers/voice_instrumental/voice_instrumental-musicnn-mtt-2.pb"
! curl -L -o models/discogs-effnet-bs64-1.pb "https://essentia.upf.edu/models/music-style-classification/discogs-effnet/discogs-effnet-bs64-1.pb"
! curl -L -o models/msd-musicnn-1.pb "https://essentia.upf.edu/models/autotagging/msd/msd-musicnn-1.pb"
! curl -L -o models/emomusic-musicnn-msd-2.pb "https://essentia.upf.edu/models/classification-heads/emomusic/emomusic-musicnn-msd-2.pb"
! curl -L -o models/labels.py "https://raw.githubusercontent.com/MTG/essentia-replicate-demos/main/effnet-discogs/labels.py"


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 3163k  100 3163k    0     0   555k      0  0:00:05  0:00:05 --:--:--  751k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 17.5M  100 17.5M    0     0   697k      0  0:00:25  0:00:25 --:--:--  881k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 3123k  100 3123k    0     0   552k      0  0:00:05  0:00:05 --:--:--  651k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 82460  100 82460    0     0  45332      0  0:00:01  0:00:01 --:--:-- 45307
  % Total    % Received % Xferd  Average Speed   Tim

In [None]:
# function to get a list of all audio files to be analysed

def get_all_files(path):
    file_list = []
    for root, dirs, files in os.walk(path):
        for file in files:
            file_list.append(os.path.join(root, file))
    return file_list

In [None]:
sys.path.append('/content/models/')

from labels import labels

class AudioFeatures:
    def __init__(self):
        self.model_effnet = es.TensorflowPredictEffnetDiscogs(graphFilename="/content/models/discogs-effnet-bs64-1.pb")
        self.model_vi = es.TensorflowPredictMusiCNN(graphFilename="/content/models/voice_instrumental-musicnn-mtt-2.pb", output='model/dense/BiasAdd')
        self.model_av_emb = es.TensorflowPredictMusiCNN(graphFilename="/content/models/msd-musicnn-1.pb", output='model/dense/BiasAdd')
        self.model_av = es.TensorflowPredict2D(graphFilename='/content/models/emomusic-musicnn-msd-2.pb', output='model/Identity')

    def tempo_dance(self, path_to_file):
        audio = es.MonoLoader(filename=path_to_file, sampleRate=44100)()
        bpm, beats, beats_confidence, _, beats_intervals = es.RhythmExtractor2013()(audio)
        danceability, dfa = es.Danceability()(audio)
        return bpm, danceability

    def audio_16(self, path_to_file):
        audio_load_16 = es.MonoLoader(filename=path_to_file, sampleRate=16000)()
        return audio_load_16

    def style_ml(self, audio_load_16):
        activations = self.model_effnet(audio_load_16)
        activations_mean = np.mean(activations, axis=0)

        style_idx = np.argmax(activations_mean)
        style_label = labels[style_idx]

        return style_label

    def vi_ml(self, audio_load_16):
        activations = self.model_vi(audio_load_16)
        v_i_mean = np.mean(activations, axis=0, keepdims=True)[0]

        # Scale v_i_mean to a value between 0 and 1 and determine voice-like or instrumental-like
        scaled_vi = (v_i_mean + 1) / 2  # Scale to range [0, 1]
        if np.all(scaled_vi >= 0.5):
          vi_label = "voice-like"
        else:
          vi_label = "instrumental-like"
        return vi_label
        
        # Scale v_i_mean to a value between 0 and 1 and determine voice-like or instrumental-like
        scaled_vi = (v_i_mean + 1) / 2  # Scale to range [0, 1]
        if scaled_vi >= 0.5:
          vi_label = "voice-like"
        else:
          vi_label = "instrumental-like"

    def av_ml(self, audio_load_16):
        embeddings = self.model_av_emb(audio_load_16)
        activations = self.model_av(embeddings)
        activations_mean = np.mean(activations, axis=0, keepdims=True)[0]
        valence = activations_mean[0]
        arousal = activations_mean[1]
        return valence, arousal

In [None]:
files_list = get_all_files(DATASET_PATH)

with open(OUTPUT_FILE_PATH / "get_all_files.json", "w") as f:
    json.dump(files_list, f)

In [None]:
# Load the JSON data from a file or a string variable
with open('/content/get_all_files.json', 'r') as f:
     all_file_list = json.load(f)

['/content/gdrive/MyDrive/AMPLAB/MusAV/audio_chunks/audio.000/1t/1tnci80rcW5LjqQqBwdZsW.mp3', '/content/gdrive/MyDrive/AMPLAB/MusAV/audio_chunks/audio.000/5l/5lx1xGuMvgzCz9pn1eyjWN.mp3', '/content/gdrive/MyDrive/AMPLAB/MusAV/audio_chunks/audio.000/3L/3LUPz3adlCwLL1Yl2IUHqL.mp3', '/content/gdrive/MyDrive/AMPLAB/MusAV/audio_chunks/audio.000/3L/3L0Dg5V8V4XLugW7PXhKdk.mp3', '/content/gdrive/MyDrive/AMPLAB/MusAV/audio_chunks/audio.000/3V/3VqHuw0wFlIHcIPWkhIbdQ.mp3', '/content/gdrive/MyDrive/AMPLAB/MusAV/audio_chunks/audio.000/3V/3v9P4gIW7qO6MJ3MouOIm6.mp3', '/content/gdrive/MyDrive/AMPLAB/MusAV/audio_chunks/audio.000/14/141CcTeiS1Sjt6OIkrwehS.mp3', '/content/gdrive/MyDrive/AMPLAB/MusAV/audio_chunks/audio.000/0w/0wWt6QbecbAC2p49sc0Cs7.mp3', '/content/gdrive/MyDrive/AMPLAB/MusAV/audio_chunks/audio.000/1B/1BlmxzHLMnthuXDKKWHTOB.mp3', '/content/gdrive/MyDrive/AMPLAB/MusAV/audio_chunks/audio.000/67/67xcVP82w8CAh857X0h2lX.mp3']


In [None]:
audio_features = AudioFeatures()

with open("descriptors_output.json", "w") as f:
    for file in tqdm(all_file_list):
        audio_features_dict = {}

        # Compute the audio features for the current file
        audio = audio_features.audio_16(file)
        tempo, danceability = audio_features.tempo_dance(file)
        style_label = audio_features.style_ml(audio)
        vi_label = audio_features.vi_ml(audio)
        valence, arousal = audio_features.av_ml(audio)

        audio_features_dict['file'] = file
        audio_features_dict['tempo'] = float(tempo)
        audio_features_dict['danceability'] = float(danceability)
        audio_features_dict['valence'] = float(valence)
        audio_features_dict['arousal'] = float(arousal)
        audio_features_dict['style'] = style_label
        audio_features_dict['vi_label'] = vi_label

        json.dump(audio_features_dict, f)
        f.write('\n')

 17%|█▋        | 364/2100 [59:52<4:52:58, 10.13s/it]

In [None]:
pd.read_json(r'/content/gdrive/MyDrive/AMPLAB/essentia_playlist/output.json')

Unnamed: 0,file,tempo,danceability,valence,arousal,style,vi_label
0,/content/gdrive/MyDrive/AMPLAB/MusAV/audio_chu...,125.997940,4.005003,5.870118,5.283186,Electronic---Techno,instrumental-like
1,/content/gdrive/MyDrive/AMPLAB/MusAV/audio_chu...,99.956627,1.753968,5.004374,5.199041,Hip Hop---Trap,instrumental-like
2,/content/gdrive/MyDrive/AMPLAB/MusAV/audio_chu...,139.767349,1.242303,5.115003,7.934643,Hip Hop---Grime,instrumental-like
3,/content/gdrive/MyDrive/AMPLAB/MusAV/audio_chu...,95.024933,1.130136,4.823322,5.292911,Electronic---Tropical House,instrumental-like
4,/content/gdrive/MyDrive/AMPLAB/MusAV/audio_chu...,123.793266,1.100455,3.484726,6.045059,Rock---Hard Rock,instrumental-like
...,...,...,...,...,...,...,...
2095,/content/gdrive/MyDrive/AMPLAB/MusAV/audio_chu...,96.994759,0.979767,5.331085,5.796984,Pop---Ballad,instrumental-like
2096,/content/gdrive/MyDrive/AMPLAB/MusAV/audio_chu...,133.099304,1.338298,6.005487,6.903127,Rock---Punk,instrumental-like
2097,/content/gdrive/MyDrive/AMPLAB/MusAV/audio_chu...,129.032669,1.209611,6.102379,7.003244,Latin---Reggaeton,instrumental-like
2098,/content/gdrive/MyDrive/AMPLAB/MusAV/audio_chu...,116.832001,1.287699,5.974061,6.215495,Blues---Texas Blues,instrumental-like
