<a href="https://colab.research.google.com/github/YukiAoki-GU/IoT_for_beginners/blob/main/teachable_machine.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#TensorFlow Liteランタイムのインストール
!pip3 install tflite-runtime

In [None]:
import cv2
import numpy as np
import time
import matplotlib.pyplot as plt
from IPython.display import clear_output


def capture():
    # カメラを初期化
    cap = cv2.VideoCapture(0)
    if not cap.isOpened():
        print("Error: Camera could not be opened.")
        return

    try:
        while True:
            ret, frame = cap.read()
            if not ret:
                print("Error: Could not read frame from camera.")
                continue

            # 画像を表示（Jupyter内に表示）
            clear_output(wait=True)  # 前回の画像をクリア
            plt.imshow(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))  # BGRからRGBに変換
            plt.axis('off')  # 軸を非表示
            plt.show()
            time.sleep(1)  # 必要に応じて間隔を調整

    except KeyboardInterrupt:
        print("Stopped by user.")
    finally:
        cap.release()
        cv2.destroyAllWindows()

capture()

In [None]:
import cv2
import numpy as np
import tflite_runtime.interpreter as tflite
import time
import matplotlib.pyplot as plt
from IPython.display import clear_output

def load_labels(path):
    with open(path, 'r') as f:
        return{i: line.strip() for i, line in enumerate(f.readlines())}

def capture_and_predict():
    # モデルとラベルの読み込み
    interpreter = tflite.Interpreter(model_path="model.tflite")
    interpreter.allocate_tensors()
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()
    labels = load_labels("./labels.txt")

    # 入力テンソルのデータ型を取得
    input_dtype = input_details[0]['dtype']

    # カメラを初期化
    cap = cv2.VideoCapture(0)
    if not cap.isOpened():
        print("Error: Camera could not be opened.")
        return

    try:
        while True:
            ret, frame = cap.read()
            if not ret:
                print("Error: Could not read frame from camera.")
                continue

            # 画像を表示（Jupyter内に表示）
            clear_output(wait=True)  # 前回の画像をクリア
            plt.imshow(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))  # BGRからRGBに変換
            plt.axis('off')  # 軸を非表示
            plt.show()

            # 画像の前処理
            img = cv2.resize(frame, (224, 224))

            if input_dtype == np.uint8:
                # UINT8に変換
                img = img.astype(np.uint8)
            elif input_dtype == np.float32:
                # FLOAT32に変換して正規化
                img = img.astype(np.float32) / 255.0

            img = np.expand_dims(img, axis=0)

            # 推論
            interpreter.set_tensor(input_details[0]['index'], img)
            interpreter.invoke()
            output_data = interpreter.get_tensor(output_details[0]['index'])

            # 結果の表示
            predicted_class = np.argmax(output_data)
            confidence = output_data[0][predicted_class]

            print(f"Predicted class: {labels[predicted_class]}, Confidence: {confidence:.4f}")

            time.sleep(1)  # 必要に応じて間隔を調整

    except KeyboardInterrupt:
        print("Stopped by user.")
    finally:
        cap.release()
        cv2.destroyAllWindows()

capture_and_predict()

---
# sound classification

In [None]:
! pip3 install sounddevice

In [None]:
import numpy as np
import sounddevice as sd
import tflite_runtime.interpreter as tflite
import scipy.signal
import time

# モデルとラベルのパス
MODEL_PATH = "sound/soundclassifier_with_metadata.tflite"
LABELS_PATH = "sound/labels.txt"

# サンプリングレート（Teachable Machineのモデルに合わせて調整）
SAMPLE_RATE = 16000
DURATION = 1  # 秒

# モデルをロード
interpreter = tflite.Interpreter(model_path=MODEL_PATH)
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

# モデルの入力形状を確認
print(f"Model expects input shape: {input_details[0]['shape']}")

# ラベルをロード
with open(LABELS_PATH, "r") as f:
    labels = [line.strip() for line in f.readlines()]

# オーディオキャプチャと前処理
def preprocess_audio(audio, sample_rate):
    # モデルが期待する入力サイズを取得
    expected_length = input_details[0]['shape'][1]
    # リサンプリング
    resampled_audio = scipy.signal.resample(audio, expected_length)
    # ゼロパディング
    padded_audio = np.zeros(expected_length)
    padded_audio[:min(len(resampled_audio), len(padded_audio))] = resampled_audio
    # 正規化と整形
    audio_input = np.expand_dims(padded_audio, axis=0).astype(np.float32) / np.max(np.abs(padded_audio))
    return audio_input

# 推論を実行
def predict(audio_input):
    interpreter.set_tensor(input_details[0]['index'], audio_input)
    interpreter.invoke()
    output_data = interpreter.get_tensor(output_details[0]['index'])
    return np.argmax(output_data), output_data[0]

# リアルタイム分類
def classify_audio():
    print("Listening...")
    while True:
        try:
            # 音声をキャプチャ
            audio = sd.rec(int(SAMPLE_RATE * DURATION), samplerate=SAMPLE_RATE, channels=1, dtype='float32')
            sd.wait()

            # 前処理
            audio_input = preprocess_audio(audio[:, 0], SAMPLE_RATE)

            # 推論
            predicted_class, confidence = predict(audio_input)

            # 結果を表示
            print(f"Predicted: {labels[predicted_class]} (Confidence: {confidence[predicted_class]:.2f})")

            time.sleep(0.1)  # 必要に応じて調整
        except KeyboardInterrupt:
            print("Stopped by user.")
            break

# 実行
classify_audio()