In [1]:
import whisper

# Load the model
model = whisper.load_model("large")

# Load audio and preprocess
audio = whisper.load_audio("Andrew.mp3") #for testing, use local files 
audio = whisper.pad_or_trim(audio)

# Generate mel spectrogram with the correct number of channels (128)
mel = whisper.log_mel_spectrogram(audio, n_mels=128).to(model.device)

# Remove unnecessary singleton dimensions
mel = mel.squeeze(0).squeeze(0)

# Add a batch dimension
mel = mel.unsqueeze(0)

# Print mel dimensions for debugging
print("Modified Mel dimensions:", mel.shape)

# Continue with language detection and decoding
probs, _ = model.detect_language(mel)

# Assuming languages are ordered based on the model's internal configuration
languages = ["en", "es", "fr"]  # Replace with actual language codes

# Find the language with the maximum probability
max_prob_index = max(range(len(probs)), key=probs.__getitem__)
detected_language = languages[max_prob_index]

print(f"Detected language: {detected_language}")


Modified Mel dimensions: torch.Size([1, 128, 3000])
Detected language: en


In [2]:
# Decode the audio using the detected language
options = whisper.DecodingOptions(language=detected_language, fp16=False)
result = whisper.decode(model, mel, options)

In [3]:
# Print the recognized text
print(result[0].text)

We are looking at how to pronounce this word. How do you say it? Andrew.


In [4]:
import os
from pydub import AudioSegment

# Converting wav files to MP3s
def wav_to_mp3(folder_path, output_file):
    if(os.path.isdir(folder_path)):
        try:
            files = os.listdir(folder_path)
            for file_name in files:
                if(".wav" in file_name):
                    # Input audio
                    input_wav_file = f'{folder_path}/{file_name}'
                    audio = AudioSegment.from_wav(input_wav_file)
                    
                    # Output the audio
                    output_mp3_file = f'{output_file}/{file_name[0:-4]}.mp3'
                    
                    # Export the audio
                    audio.export(output_mp3_file, format="mp3")
                    print(f"Conversion from {input_wav_file} to {output_mp3_file} completed.")
        except:
            print(f'Ups, theres an error')   
    else:
        print("The provided path is not a directory.")

In [5]:
import os
import whisper  # Assuming this is your audio processing library

def list_files_in_folder(folder_path):
    if os.path.isdir(folder_path):
        files = os.listdir(folder_path)
        output_file_path = f"{folder_path.rstrip('/').replace('/', '_')}_output.txt"

        # Open the output file for writing
        with open(output_file_path, 'w') as f:
            # Write header to the file
            f.write(f"{'File':<32}{'Actual':<15}{'Predicted'}\n")
            f.write("="*70 + '\n')  # Separator line

            for file_name in files:
                if('.mp3' in file_name or '.wav' in file_name):
                    # Load model outside the loop to avoid reloading it for each file
                    model = whisper.load_model("base")
                    audio = whisper.load_audio(os.path.join(folder_path, file_name))
                    audio = whisper.pad_or_trim(audio)
                    mel = whisper.log_mel_spectrogram(audio).to(model.device)
                    options = whisper.DecodingOptions(language="en", fp16=False)
                    result = whisper.decode(model, mel, options)

                    # Extract the actual word from the file name
                    actual_word = file_name[18:-4]

                    # Write formatted output to the file
                    f.write(f"{file_name:<32}{actual_word:<15}{result.text}\n")

        print(f"Output written to {output_file_path}")

    else:
        print("The provided path is not a directory.")

# Replace 'your_folder_path' with the actual path of the folder you want to open
folder_path = 'mp3s/' #replace with actual path
list_files_in_folder(folder_path)

100%|███████████████████████████████████████| 139M/139M [00:16<00:00, 8.88MiB/s]


Output written to mp3s_output.txt


In [6]:
import os
import shutil

def separate_files(input_folder, output_folder_participant, output_folder_researcher):
    # Create output folders if they don't exist
    os.makedirs(output_folder_participant, exist_ok=True)
    os.makedirs(output_folder_researcher, exist_ok=True)

    # Iterate through files in the input folder
    for filename in os.listdir(input_folder):
        file_path = os.path.join(input_folder, filename)

        # Check if the file contains "participant" in its name
        if "participant" in filename.lower():
            destination_folder = output_folder_participant
        elif "researcher" in filename.lower():
            destination_folder = output_folder_researcher
        else:
            # Skip files that don't match the criteria
            continue

        # Move the file to the appropriate folder
        shutil.move(file_path, os.path.join(destination_folder, filename))

if __name__ == "__main__":
    # Replace these paths with your actual paths
    input_folder_path = "mp3s/"                    #replace with actual paths 
    participant_output_path = "kick_ppts_participant/"
    researcher_output_path = "kick_ppts_researcher/"

    # Call the function to separate files
    separate_files(input_folder_path, participant_output_path, researcher_output_path)

    print("Separation completed.")

Separation completed.


In [7]:
#wav_to_mp3('EB21_researcher_KT1', 'EB21_researcher_KT1_MP3')
print()
list_files_in_folder("mp3s/") #replace with actual paths 


Output written to mp3s_output.txt
