<a href="https://colab.research.google.com/github/acbarrosramos/voice_to_action/blob/master/Fine_tune_dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
from IPython.display import Audio
drive.mount('/content/gdrive/')

Mounted at /content/gdrive/


In [7]:
#!pip install pydub
#!pip install SpeechRecognition

In [8]:
from pydub import AudioSegment
import speech_recognition as sr
import io
import os
import pandas as pd

In [18]:
def transcribe_audio(file_path):
    recognizer = sr.Recognizer()
    file_extension = os.path.splitext(file_path)[1][1:]

    # Verifica se o arquivo já está em formato WAV, caso contrário, converte -- alguns são mp3
    if file_extension == "wav":
        audio_data = AudioSegment.from_file(file_path, format="wav")
    else:
        audio_data = AudioSegment.from_file(file_path, format=file_extension)
        wav_buffer = io.BytesIO()
        audio_data.export(wav_buffer, format="wav")
        wav_buffer.seek(0)
        audio_data = wav_buffer

    # Realiza a transcrição usando o formato correto
    if isinstance(audio_data, io.BytesIO):
        with sr.AudioFile(audio_data) as source:
            audio_record = recognizer.record(source)
    else:
        with sr.AudioFile(file_path) as source:
            audio_record = recognizer.record(source)

    try:
        text = recognizer.recognize_google(audio_record, language='pt-BR')
    except sr.UnknownValueError:
        text = "Áudio não compreendido"
    except sr.RequestError as e:
        text = "Erro ao solicitar reconhecimento de fala; {0}".format(e)

    return text

In [19]:
#load dataset já existente

def load_existing_dataset(file_path):
    try:
        return pd.read_csv(file_path)
    except FileNotFoundError:
        return pd.DataFrame(columns=['File Name', 'Location', 'Transcription', 'Sentiment'])

In [22]:
def create_dataset(base_path, dataset_path):
    existing_dataset = load_existing_dataset(dataset_path)
    existing_files = set(existing_dataset['Location'])
    sentiments = ['positive', 'negative', 'neutral']
    data = []

    for sentiment in sentiments:
        folder_path = os.path.join(base_path, sentiment)
        for filename in os.listdir(folder_path):
            if filename.endswith((".mp3", ".wav")):  # Adiciona suporte para arquivos .wav
                file_path = os.path.join(folder_path, filename)
                if file_path not in existing_files:
                    transcription = transcribe_audio(file_path)
                    data.append({
                        'File Name': filename,
                        'Location': file_path,
                        'Transcription': transcription,
                        'Sentiment': sentiment
                    })
                    time.sleep(1)  # Pausa de 10 segundos entre cada transcrição
                else:
                    existing_data = existing_dataset[existing_dataset['Location'] == file_path].iloc[0]
                    data.append({
                        'File Name': existing_data['File Name'],
                        'Location': existing_data['Location'],
                        'Transcription': existing_data['Transcription'],
                        'Sentiment': existing_data['Sentiment']
                    })

    return pd.DataFrame(data)

In [23]:
# Caminho base para os arquivos de áudio e para o dataset
base_path = "/content/gdrive/My Drive/Colab Notebooks/audios"
dataset_path = "/content/gdrive/My Drive/Colab Notebooks/audios/dataset.csv"
dataset = create_dataset(base_path, dataset_path)
dataset.to_csv(dataset_path, index=False)