In [1]:
! pip install SpeechRecognition

Collecting SpeechRecognition
  Downloading SpeechRecognition-3.14.1-py3-none-any.whl.metadata (31 kB)
Collecting typing-extensions (from SpeechRecognition)
  Using cached typing_extensions-4.12.2-py3-none-any.whl.metadata (3.0 kB)
Collecting standard-aifc (from SpeechRecognition)
  Downloading standard_aifc-3.13.0-py3-none-any.whl.metadata (969 bytes)
Collecting audioop-lts (from SpeechRecognition)
  Downloading audioop_lts-0.2.1-cp313-abi3-macosx_11_0_arm64.whl.metadata (1.6 kB)
Collecting standard-chunk (from standard-aifc->SpeechRecognition)
  Downloading standard_chunk-3.13.0-py3-none-any.whl.metadata (860 bytes)
Downloading SpeechRecognition-3.14.1-py3-none-any.whl (32.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m32.9/32.9 MB[0m [31m14.2 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hDownloading audioop_lts-0.2.1-cp313-abi3-macosx_11_0_arm64.whl (26 kB)
Downloading standard_aifc-3.13.0-py3-none-any.whl (10 kB)
Using cached typing_extensions-4.12.2-p

In [6]:
import pyaudio
import wave
import numpy as np
import speech_recognition as sr
import csv

class AudioRecorder:
    def __init__(self):
        # 音訊參數設定
        self.FORMAT = pyaudio.paInt16
        self.CHANNELS = 1
        self.RATE = 44100
        self.CHUNK = 1024
        self.audio = pyaudio.PyAudio()
        
    def record(self, seconds):
        """錄製指定秒數的音訊"""
        print("準備開始錄音...")
        
        # 開啟音訊串流
        stream = self.audio.open(
            format=self.FORMAT,
            channels=self.CHANNELS,
            rate=self.RATE,
            input=True,
            frames_per_buffer=self.CHUNK
        )
        
        print("開始錄音...")
        frames = []
        
        # 讀取音訊數據
        for _ in range(0, int(self.RATE / self.CHUNK * seconds)):
            data = stream.read(self.CHUNK)
            frames.append(data)
            
        print("錄音結束")
        
        # 關閉串流
        stream.stop_stream()
        stream.close()
        
        return frames
    
    def save_audio(self, frames, filename="output.wav"):
        """儲存音訊到檔案"""
        with wave.open(filename, 'wb') as wf:
            wf.setnchannels(self.CHANNELS)
            wf.setsampwidth(self.audio.get_sample_size(self.FORMAT))
            wf.setframerate(self.RATE)
            wf.writeframes(b''.join(frames))
        print(f"已儲存音訊檔案: {filename}")
    
    def __del__(self):
        """清理資源"""
        self.audio.terminate()

def speech_to_text(duration=5):
    r = sr.Recognizer()
    with sr.Microphone() as source:
        print("Please wait. Calibrating microphone...")
        # listen for 5 seconds and create the ambient noise energy level
        r.adjust_for_ambient_noise(source, duration=5)
        print("Say something!")
        audio = r.listen(source, timeout=duration)
    try:
        # recognize speech using Google Speech Recognition
        print("Google Speech Recognition thinks you said: \n" + r.recognize_google(audio, language='zh-TW'))
        return r.recognize_google(audio, language='zh-TW')
    except sr.UnknownValueError:
        print("Google Speech Recognition could not understand audio")
    except sr.RequestError as e:
        print("No response from Google Speech Recognition service: {0}".format(e))

def write_to_csv(text):
    with open('speech_to_text.csv', 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow([text])
    print('Write to csv file successfully!')

# 使用範例
if __name__ == "__main__":
    recorder = AudioRecorder()
    audio_frames = recorder.record(seconds=5)
    recorder.save_audio(audio_frames)

準備開始錄音...
開始錄音...
錄音結束
已儲存音訊檔案: output.wav


In [11]:
speech_to_text_result = speech_to_text()
write_to_csv(speech_to_text_result)

Please wait. Calibrating microphone...
Say something!
Google Speech Recognition thinks you said: 
你好現在
Write to csv file successfully!


# 使用現有wav檔案轉成文字

In [13]:
def convert_wav_to_text(wav_file_path):
    """
    將 WAV 檔案轉換為文字
    
    參數:
        wav_file_path (str): WAV 檔案的路徑
    回傳:
        str: 辨識出的文字
    """
    r = sr.Recognizer()
    
    try:
        with sr.AudioFile(wav_file_path) as source:
            # 讀取音訊檔案
            audio = r.record(source)
            
            # 使用 Google Speech Recognition 進行辨識
            print("正在處理音訊...")
            text = r.recognize_google(audio, language='zh-TW')
            print(f"辨識結果: {text}")
            return text
            
    except sr.UnknownValueError:
        print("Google Speech Recognition 無法理解音訊")
        return None
    except sr.RequestError as e:
        print(f"無法從 Google Speech Recognition service 獲取結果; {e}")
        return None
    except Exception as e:
        print(f"發生錯誤: {e}")
        return None

# 使用範例
wav_file_path = "/Users/angelo/Desktop/智慧點餐/output.wav"  # 替換成您的 WAV 檔案路徑
result = convert_wav_to_text(wav_file_path)

if result:
    # 如果需要，可以將結果寫入 CSV
    write_to_csv(result)

正在處理音訊...
辨識結果: 你好我要一杯珍珠奶茶微糖微冰還要一個大杯青茶無糖去冰再一個中被紅茶微糖少冰在一個裝備紅茶多糖少冰在一個裝備紅茶半糖少冰
Write to csv file successfully!


In [8]:
import requests

url = "https://f7a9-125-231-116-59.ngrok-free.app"
response = requests.get(url)

print(response.json())

{'data': [27.0, '晴'], 'message': '后里的天氣'}
