# CC5213 - Recuperación de Información Multimedia

 Profesor: Juan Manuel Barrios
 
 Fecha: 14 de abril de 2021

## Leyendo Audio

Decodificar con:
```
ffmpeg -i video.mp4 -ac 1 -ar 8192 -acodec pcm_s16le -f s16le audio.raw
```

Reproducir con:
```
ffplay -f s16le -acodec pcm_s16le -ar 8192 audio.raw
vlc --demux=rawaud --rawaud-channels 1 --rawaud-samplerate 8192 --rawaud-fourcc=s16l audio.raw
```

Probar con distintos valores de sample_rate como `{8192, 11025, 22050, 44100}`

In [None]:
import numpy
import os.path
import subprocess

def convertir_a_audio_raw(file_video, sample_rate):
    file_raw = "{}.{}.raw".format(file_video, sample_rate) 
    if os.path.isfile(file_raw):
        return file_raw
    comando = ["ffmpeg", "-i", file_video, "-ac", "1", "-ar", str(sample_rate),
                "-acodec", "pcm_s16le", "-f", "s16le",  file_raw]
    print("INICIANDO: {}".format(" ".join(comando)))
    code = subprocess.call(comando)
    if code != 0:
        raise Exception("ERROR!")
    return file_raw

def reproducir_audio_raw(file_raw, sample_rate):
    comando = ["vlc", "--demux=rawaud", "--rawaud-channels", "1", "--rawaud-samplerate", 
               str(sample_rate), "--rawaud-fourcc=s16l", file_raw]
    print("INICIANDO: {}".format(" ".join(comando)))
    code = subprocess.call(comando)
    if code != 0:
        raise Exception("ERROR!")

file_video = "coffin.mp4"
sample_rate = 44100

file_raw = convertir_a_audio_raw(file_video, sample_rate)
samples = numpy.fromfile(file_raw, dtype=numpy.int16);

print("samples = {}".format(len(samples)))
print("largo = {:.1f} segundos".format(len(samples)/sample_rate))
print("min max = {} {}".format(min(samples), max(samples)))
print(samples)

reproducir_audio_raw(file_raw, sample_rate)

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

plt.figure(figsize=(25,5))
plt.plot(samples)
plt.xlabel('Samples')
plt.ylabel('Amplitud')
plt.ylim(top=32767, bottom=-32768)
plt.yticks(numpy.arange(-30000, 30001, step=10000))
plt.margins(0)
plt.show()


In [None]:
def modificar_samples(samples, sample_rate):
    nuevos_samples = samples.copy()
    # medio segundo
    offset = int(sample_rate / 2)
    for i in range(len(samples) - offset):
        #sample actual
        val1 = int(samples[i])
        #sample de mas adelante
        val2 = int(samples[i + offset])
        # sumar ambos
        val = int((val1 + val2))
        # saturacion
        if val > 32767:
            val = 32767
        elif val < -32768:
            val = -32768
        # guardar el nuevo valor
        nuevos_samples[i] = val 
    return nuevos_samples

nuevos_samples = modificar_samples(samples, sample_rate)

nuevo_archivo_raw = file_raw + ".v2" 

print("guardando archivo {}".format(nuevo_archivo_raw))
nuevos_samples.tofile(nuevo_archivo_raw)

print("reproduciendo {}".format(nuevo_archivo_raw))
reproducir_audio_raw(nuevo_archivo_raw, sample_rate)


 # Ejemplo MFCC
 
 Instalar LibROSA con:
 
 ```
 pip install librosa
 ```

NO USAR `conda install` porque aparecerá un error.


In [None]:
import numpy
import librosa
import os.path
import subprocess

def convertir_a_wav(file_video, sample_rate):
    file_wav = "{}.{}.wav".format(file_video, sample_rate) 
    print(file_wav)
    if os.path.isfile(file_wav):
        return file_wav
    comando = ["ffmpeg", "-i", file_video, "-ac", "1", "-ar", str(sample_rate), file_wav]
    print("INICIANDO: {}".format(" ".join(comando)))
    code = subprocess.call(comando)
    if code != 0:
        raise Exception("ERROR!")
    return file_wav
    
def create_audio_descriptors(file_wav, sample_rate):
    # leer audio
    y, sr = librosa.load(file_wav, sample_rate)
    print("audio samples={} samplerate={} segundos={:.1f}".format(len(y), sr, len(y) / sr))
    # parámetros MFCC
    ventana = 4096
    salto = 4096
    dimension = 32
    print("ventana={} ({:.0f} ms) salto={} ({:.0f} ms)".format(ventana, ventana/sample_rate*1000, salto, salto/sample_rate*1000))
    # calcular MFCC
    mfcc = librosa.feature.mfcc(y, sr=sr, n_mfcc=dimension, n_fft=ventana, hop_length=salto)
    # matriz de descriptores
    print("{} descriptores de {}-d".format(mfcc.shape[1], mfcc.shape[0]))
    return mfcc.transpose()

def calcular_descriptores(file_video, sample_rate):
    file_wav = convertir_a_wav(file_video, sample_rate)
    descriptores = create_audio_descriptors(file_wav, sample_rate)
    print("{} {}".format(descriptores.shape, descriptores.dtype))
    return descriptores

file_video = "coffin.mp4"
sample_rate = 44100

descriptores = calcular_descriptores(file_video, sample_rate)


In [None]:
import IPython.display as ipd

#reproducir audio en un mini-player
file_wav = convertir_a_wav(file_video, sample_rate)
y, sr = librosa.load(file_wav, sample_rate)
ipd.Audio(y, rate=sr)