In [None]:
import whisper
import numpy as np
import sounddevice as sd
import matplotlib.pyplot as plt
from src.models.slime import SLIME
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from src.data.dataset import CommonVoice

In [None]:
np.random.seed(21)
ds_test = CommonVoice("test", use_mfcc=False)
x, y = ds_test[4]

In [None]:
def _transcribe(f, x: np.ndarray) -> str:
    x = whisper.pad_or_trim(x)
    result = f.transcribe(x)
    return result["text"]

In [None]:
f = whisper.load_model("tiny")
#g = LinearRegression()
g = DecisionTreeRegressor(max_depth=5)
sample_rate = 16_000
segment_length = 500

explainer = SLIME(f, g, sample_rate, segment_length)

In [None]:
# Create audio file with the original audio (save it to disk)
sd.play(x, sample_rate)

In [None]:
print(f"Whisper transcription: {_transcribe(f, x.numpy())}")
print(f"Correct transcription: {ds_test.vocab.decode(y.numpy())}")

In [None]:
explainer.fit(x.numpy(), n_perturbations=250)

In [None]:
explainer.g.feature_importances_

In [None]:
explainer.g.score(explainer.X, explainer.y)

In [None]:
plt.bar(np.arange(explainer.n_segments), explainer.coef)
plt.grid()
plt.xlabel('Segmento')
plt.ylabel('Importancia de Gini')
plt.savefig('./paper/images/gini.png')
plt.show()

In [None]:
plt.bar(np.arange(explainer.n_segments), explainer.segment_importance)
plt.grid()
plt.xlabel('Segmento')
plt.ylabel('Importancia')
plt.savefig('./paper/images/slime.png')
plt.show()

In [None]:
sd.play(x*10, sample_rate)

In [None]:
explainer.explain(x)

In [None]:
from pydub import AudioSegment

x_16bit = np.int16(x * (2**15 - 1))

# Create an audio segment
audio_segment = AudioSegment(
    x_16bit.tobytes(), 
    frame_rate=sample_rate,
    sample_width=x_16bit.dtype.itemsize, 
    channels=1
)

# Export to an MP3 file
audio_segment.export("output.mp3", format="mp3")

# Split the audio segment into chunks of 500 ms
chunks = audio_segment[::segment_length]

# Export each chunk to a separate MP3 file
for i, chunk in enumerate(chunks):
    chunk.export(f"output_{i}.mp3", format="mp3")