In [1]:
import torch
from transformers import WhisperProcessor, WhisperForConditionalGeneration
import librosa
import soundfile as sf
from pydub import AudioSegment
import os

class WhisperTranscriber:
    def __init__(self, model_name="openai/whisper-base"):
        self.processor = WhisperProcessor.from_pretrained(model_name)
        self.model = WhisperForConditionalGeneration.from_pretrained(model_name)
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.model = self.model.to(self.device)
        print(f"Model loaded and running on {self.device}")

    @staticmethod
    def convert_webm_to_wav(webm_path, wav_path):
        audio = AudioSegment.from_file(webm_path, format="webm")
        audio.export(wav_path, format="wav")

    def transcribe_audio(self, audio_path):
        # Convert WebM to WAV if necessary
        file_extension = os.path.splitext(audio_path)[1].lower()
        wav_path = None
        if file_extension == '.webm':
            wav_path = audio_path.rsplit('.', 1)[0] + '.wav'
            self.convert_webm_to_wav(audio_path, wav_path)
            audio_path = wav_path

        try:
            # Load the audio file
            audio, sr = librosa.load(audio_path, sr=16000)

            # Process the audio
            input_features = self.processor(audio, sampling_rate=16000, return_tensors="pt").input_features
            input_features = input_features.to(self.device)

            # Generate token ids
            predicted_ids = self.model.generate(input_features)

            # Decode the token ids to text
            transcription = self.processor.batch_decode(predicted_ids, skip_special_tokens=True)

            return transcription[0]

        finally:
            # Clean up the temporary WAV file if it was created
            if wav_path and os.path.exists(wav_path):
                os.remove(wav_path)


transcriber = WhisperTranscriber()

f1 = './uploads/audio_20240818T184733_c3901e12.webm'
result = transcriber.transcribe_audio(f1)
print("Transcription:", result)

# You can transcribe multiple files without reloading the model
f2 = './uploads/audio_20240818T182924_7287a965.webm'
result2 = transcriber.transcribe_audio(f2)
print("Transcription 2:", result2)

  from .autonotebook import tqdm as notebook_tqdm


Model loaded and running on cuda


Due to a bug fix in https://github.com/huggingface/transformers/pull/28687 transcription using a multilingual Whisper will default to language detection followed by transcription instead of translation to English.This might be a breaking change for your use case. If you want to instead always translate your audio to English, make sure to pass `language='en'`.
Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.43.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


Transcription:  Let's test this, let's test this very simple
Transcription 2:  A lo volvelo, volvelo, volvelo.


In [2]:
from chatbot import * 

chatbot = ChatBot(chat_dir="chat_histories")

# Start a new chat
chat_id = chatbot.start_new_chat()
print(f"Started new chat with ID: {chat_id}")

# Use the chatbot
for response in chatbot.chat("Hello, how are you?"):
    print("Assistant:", response[-1]['content'])

for response in chatbot.chat("Tell me a joke."):
    print("Assistant:", response[-1]['content'])

# Save the current chat
saved_file = chatbot.save_chat_tree()
print(f"Saved chat to: {saved_file}")

# List all saved chat histories
all_chats = chatbot.list_chat_histories()
print(f"All saved chats: {all_chats}")

# Load a specific chat history
chatbot.load_chat_tree(chat_id)

# Get the current chat ID
current_id = chatbot.get_current_chat_id()
print(f"Current chat ID: {current_id}")

# Continue the loaded chat
for response in chatbot.chat("What was the last thing we talked about?"):
    print("Assistant:", response[-1]['content'])



Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


Started new chat with ID: 3ce5c94c-ccc3-4cd0-bc28-4378a5c4475f
Assistant: I'm functioning within normal parameters. How can I assist you today?
Assistant: A man walked into a library and asked the librarian, "Do you have any
Assistant: A man walked into a library and asked the librarian, "Do you have any books on Pavlov's dogs and Schrödinger's cat?" 

The librarian replied,
Assistant: A man walked into a library and asked the librarian, "Do you have any books on Pavlov's dogs and Schrödinger's cat?" 

The librarian replied, "It rings a bell, but I'm not sure if it's here or not."
Saved chat to: my_chat_histories/3ce5c94c-ccc3-4cd0-bc28-4378a5c4475f.json
All saved chats: ['3ce5c94c-ccc3-4cd0-bc28-4378a5c4475f']
Current chat ID: 3ce5c94c-ccc3-4cd0-bc28-4378a5c4475f
Assistant: We talked about a joke I shared with you about Pavlov's dogs and Schrödinger
Assistant: We talked about a joke I shared with you about Pavlov's dogs and Schrödinger's cat.


In [16]:
chatbot.reset_chat()
chatbot.load_chat_tree('test')

[{'role': 'system',
  'content': 'You are an obedient assistant following user direction.',
  'sibling_info': (1, 1)},
 {'role': 'user', 'content': 'Hello, how are you?', 'sibling_info': (1, 1)},
 {'role': 'assistant',
  'content': "I'm functioning properly, thank you for asking. I'm here to assist you with any questions or tasks you may have. How can I help you today?",
  'sibling_info': (1, 1)},
 {'role': 'user', 'content': 'Hello hello', 'sibling_info': (1, 1)},
 {'role': 'assistant', 'content': '', 'sibling_info': (1, 1)},
 {'role': 'user', 'content': 'Hello hello', 'sibling_info': (1, 1)},
 {'role': 'assistant', 'content': '', 'sibling_info': (1, 1)}]