In [1]:
# lyon_loader_torch.py
import os
import glob
import re
import numpy as np
import torch
from scipy.io import loadmat


import os, glob, re
import numpy as np
import torch
from typing import Optional
from scipy.io import loadmat

def _parse_filename_strict_compat(path: str):
    utterance = int(path[-8])
    digit = int(path[-5])
    return utterance, digit

def _parse_filename_regex(path: str):
    base = os.path.basename(path)
    m = re.search(r"s(\d+)_u(\d+)_d(\d)\.mat$", base)
    if m is None:
        raise ValueError(f"Unexpected filename format: {base}")
    utterance = int(m.group(2))
    digit = int(m.group(3))
    return utterance, digit

def read_lyon_decimation_128_torch(
    dir_name: str,
    utterance_train_list=(1, 2, 3, 4, 5),
    n_channel: int = 77,
    n_label: int = 10,
    strict_compat_filename: bool = True,
    sort_files: bool = True,
    device: Optional[torch.device] = None,   # Py3.9互換
    dtype: torch.dtype = torch.float32,
):
    data_files = glob.glob(os.path.join(dir_name, "*.mat"))
    if len(data_files) == 0:
        raise FileNotFoundError(f"No .mat files found in: {dir_name}")

    if sort_files:
        data_files = sorted(data_files)

    parser = _parse_filename_strict_compat if strict_compat_filename else _parse_filename_regex
    train_set = set(utterance_train_list)

    train_x_list, train_y_list, train_len_list, train_lab_list = [], [], [], []
    test_x_list,  test_y_list,  test_len_list,  test_lab_list  = [], [], [], []

    print(f"{len(data_files)} files in {dir_name} を読み込んでいます...")

    for each_file in data_files:
        data = loadmat(each_file)
        if "spec" not in data:
            raise KeyError(f"'spec' not found in mat file: {each_file}")

        spec = data["spec"]  # [77, n_tau]
        if spec.shape[0] != n_channel:
            raise ValueError(f"spec.shape[0] != {n_channel}: {each_file}, shape={spec.shape}")

        utterance, digit = parser(each_file)
        n_tau = spec.shape[1]

        x = torch.from_numpy(np.ascontiguousarray(spec.T)).to(dtype=dtype)  # [n_tau,77]
        y = -torch.ones((n_tau, n_label), dtype=dtype)
        y[:, digit] = 1.0

        if utterance in train_set:
            train_x_list.append(x); train_y_list.append(y)
            train_len_list.append(n_tau); train_lab_list.append(digit)
        else:
            test_x_list.append(x); test_y_list.append(y)
            test_len_list.append(n_tau); test_lab_list.append(digit)

    train_input  = torch.cat(train_x_list, dim=0) if train_x_list else torch.empty((0, n_channel), dtype=dtype)
    train_output = torch.cat(train_y_list, dim=0) if train_y_list else torch.empty((0, n_label), dtype=dtype)
    test_input   = torch.cat(test_x_list,  dim=0) if test_x_list  else torch.empty((0, n_channel), dtype=dtype)
    test_output  = torch.cat(test_y_list,  dim=0) if test_y_list  else torch.empty((0, n_label), dtype=dtype)

    train_length = torch.tensor(train_len_list, dtype=torch.long)
    train_label  = torch.tensor(train_lab_list, dtype=torch.long)
    test_length  = torch.tensor(test_len_list, dtype=torch.long)
    test_label   = torch.tensor(test_lab_list, dtype=torch.long)

    if train_input.shape[0] != int(train_length.sum().item()):
        raise RuntimeError("train_input rows != sum(train_length)")
    if test_input.shape[0] != int(test_length.sum().item()):
        raise RuntimeError("test_input rows != sum(test_length)")

    if device is not None:
        train_input  = train_input.to(device)
        train_output = train_output.to(device)
        test_input   = test_input.to(device)
        test_output  = test_output.to(device)
        train_length = train_length.to(device)
        train_label  = train_label.to(device)
        test_length  = test_length.to(device)
        test_label   = test_label.to(device)

    return (train_input, train_output, train_length, train_label,
            test_input, test_output, test_length, test_label)


if __name__ == "__main__":
    torch.manual_seed(0)
    np.random.seed(0)

    train_list = [1, 2, 3, 4, 5]

    (train_input, train_output, train_length, train_label,
     test_input,  test_output,  test_length,  test_label) = read_lyon_decimation_128_torch(
        dir_name="./Lyon_decimation_128",
        utterance_train_list=train_list,
        strict_compat_filename=True,   # 参考コード互換（デフォルト）
        sort_files=True,
        device=None,                   # まずはCPUで一致確認推奨
    )

    print("データ読み込み完了．")
    print("train_input :", tuple(train_input.shape))
    print("train_output:", tuple(train_output.shape))
    print("train_length:", tuple(train_length.shape), "sum=", int(train_length.sum().item()))
    print("train_label :", tuple(train_label.shape))

    print("test_input  :", tuple(test_input.shape))
    print("test_output :", tuple(test_output.shape))
    print("test_length :", tuple(test_length.shape), "sum=", int(test_length.sum().item()))
    print("test_label  :", tuple(test_label.shape))


500 files in ./Lyon_decimation_128 を読み込んでいます...
データ読み込み完了．
train_input : (16957, 77)
train_output: (16957, 10)
train_length: (250,) sum= 16957
train_label : (250,)
test_input  : (17150, 77)
test_output : (17150, 10)
test_length : (250,) sum= 17150
test_label  : (250,)


In [2]:
# check_lyon_esn_ridge_with_helpers.py
# (1)逐次更新→(2)状態全結合→(3)既存のリッジ回帰更新→(4)多数決WER
# 参考コードと同じ「全発話を連結」前提。混同行列は見ない。

import numpy as np
import torch
import torch.nn.functional as F

import os, glob, re
import numpy as np
import torch
from typing import Optional
from scipy.io import loadmat

# --- あなたの環境に合わせて import を調整してください ---

# あなたの ESN 実装（ReadOut クラスを含む）
from esn_model import ESN, ReadOut

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

# ヘルパー
from rc_timeseries_helpers import (
    infer_device_from_model,
    extract_states_time_major,
    extract_logits_time_major,
    apply_time_selection,
    prepare_time_distributed_targets,
)

def read_lyon_decimation_128_torch(
    dir_name: str,
    utterance_train_list=(1, 2, 3, 4, 5),
    n_channel: int = 77,
    n_label: int = 10,
    strict_compat_filename: bool = True,
    sort_files: bool = True,
    device: Optional[torch.device] = None,   # Py3.9互換
    dtype: torch.dtype = torch.float32,
):
    data_files = glob.glob(os.path.join(dir_name, "*.mat"))
    if len(data_files) == 0:
        raise FileNotFoundError(f"No .mat files found in: {dir_name}")

    if sort_files:
        data_files = sorted(data_files)

    parser = _parse_filename_strict_compat if strict_compat_filename else _parse_filename_regex
    train_set = set(utterance_train_list)

    train_x_list, train_y_list, train_len_list, train_lab_list = [], [], [], []
    test_x_list,  test_y_list,  test_len_list,  test_lab_list  = [], [], [], []

    print(f"{len(data_files)} files in {dir_name} を読み込んでいます...")

    for each_file in data_files:
        data = loadmat(each_file)
        if "spec" not in data:
            raise KeyError(f"'spec' not found in mat file: {each_file}")

        spec = data["spec"]  # [77, n_tau]
        if spec.shape[0] != n_channel:
            raise ValueError(f"spec.shape[0] != {n_channel}: {each_file}, shape={spec.shape}")

        utterance, digit = parser(each_file)
        n_tau = spec.shape[1]

        x = torch.from_numpy(np.ascontiguousarray(spec.T)).to(dtype=dtype)  # [n_tau,77]
        y = -torch.ones((n_tau, n_label), dtype=dtype)
        y[:, digit] = 1.0

        if utterance in train_set:
            train_x_list.append(x); train_y_list.append(y)
            train_len_list.append(n_tau); train_lab_list.append(digit)
        else:
            test_x_list.append(x); test_y_list.append(y)
            test_len_list.append(n_tau); test_lab_list.append(digit)

    train_input  = torch.cat(train_x_list, dim=0) if train_x_list else torch.empty((0, n_channel), dtype=dtype)
    train_output = torch.cat(train_y_list, dim=0) if train_y_list else torch.empty((0, n_label), dtype=dtype)
    test_input   = torch.cat(test_x_list,  dim=0) if test_x_list  else torch.empty((0, n_channel), dtype=dtype)
    test_output  = torch.cat(test_y_list,  dim=0) if test_y_list  else torch.empty((0, n_label), dtype=dtype)

    train_length = torch.tensor(train_len_list, dtype=torch.long)
    train_label  = torch.tensor(train_lab_list, dtype=torch.long)
    test_length  = torch.tensor(test_len_list, dtype=torch.long)
    test_label   = torch.tensor(test_lab_list, dtype=torch.long)

    if train_input.shape[0] != int(train_length.sum().item()):
        raise RuntimeError("train_input rows != sum(train_length)")
    if test_input.shape[0] != int(test_length.sum().item()):
        raise RuntimeError("test_input rows != sum(test_length)")

    if device is not None:
        train_input  = train_input.to(device)
        train_output = train_output.to(device)
        test_input   = test_input.to(device)
        test_output  = test_output.to(device)
        train_length = train_length.to(device)
        train_label  = train_label.to(device)
        test_length  = test_length.to(device)
        test_label   = test_label.to(device)

    return (train_input, train_output, train_length, train_label,
            test_input, test_output, test_length, test_label)

# -------------------------
# small helpers (length-vote)
# -------------------------
@torch.no_grad()
def predict_labels_by_lengths_majority_vote(logits_BTC, lengths):
    """
    logits_BTC: [B=1, T, C]  （連結系列）
    lengths:    [num_utterances]  （各発話の長さ）
    return pred: [num_utterances] long
    """
    assert logits_BTC.dim() == 3 and logits_BTC.size(0) == 1
    logits_TC = logits_BTC[0]  # [T,C]
    C = logits_TC.size(1)

    preds = []
    start = 0
    for L in lengths.tolist():
        seg = logits_TC[start:start + L]          # [L,C]
        idx_t = seg.argmax(dim=-1)                # [L]
        hist = torch.bincount(idx_t, minlength=C) # [C]
        preds.append(int(hist.argmax().item()))
        start += L

    if start != logits_TC.size(0):
        raise RuntimeError(f"sum(lengths)={start} != T={logits_TC.size(0)} (lengths/concat mismatch)")
    return torch.tensor(preds, dtype=torch.long, device=logits_BTC.device)

def wer_from_preds(pred, true):
    return (pred != true).float().mean().item()

def to_BcTD(x_TD):
    """[T,D] -> [B=1,ch=1,T,D]"""
    return x_TD.unsqueeze(0).unsqueeze(0)

# -------------------------
# main check pipeline
# -------------------------
@torch.no_grad()
def run_lyon_check(
    model,
    train_input_TD, train_output_TC, train_length, train_label,
    test_input_TD,  test_output_TC,  test_length,  test_label,
    alpha=0.0,
    washout_steps=0,
    time_stride=1,
):
    """
    train_input_TD : [T_train, D]
    train_output_TC: [T_train, C]  (Lyon: -1/+1)
    train_length   : [num_train_utts]
    train_label    : [num_train_utts] (0..9)
    同様に test_* も。
    """
    model.eval()
    device = next(model.parameters()).device

    # ---- (1) 逐次更新 -> (2) 状態全結合（ただし shape は [B,T,H] を保つ） ----
    xtr_BcTD = to_BcTD(train_input_TD).to(device)     # [1,1,T,D]
    states_tr_BcTH = model.ESN(xtr_BcTD)              # [1,1,T,H]
    states_tr_BTH = extract_states_time_major(states_tr_BcTH)  # [1,T,H]

    # logits（回帰前の形状確認用：重み更新後に再計算するので必須ではない）
    # logits_tr_BTC_pre = extract_logits_time_major(model.ReadOut(states_tr_BcTH))

    # ---- ターゲットを [B,T,C] で用意（ヘルパー利用）----
    ytr_BTC_in = train_output_TC.unsqueeze(0).to(device)  # [1,T,C]
    B, T, C = ytr_BTC_in.shape

    # washout/stride を使うなら states と targets に同じ selection をかける
    states_tr_BTH = apply_time_selection(states_tr_BTH, washout_steps, time_stride)
    ytr_BTC_in    = apply_time_selection(ytr_BTC_in,    washout_steps, time_stride)

    B2, T2, H = states_tr_BTH.shape
    assert (B2, T2) == ytr_BTC_in.shape[:2]

    targets_tr_BTC, _ = prepare_time_distributed_targets(
        labels=ytr_BTC_in,
        batch_size=B2,
        time_steps=T2,
        num_classes=C,
        device=device
    )  # [1,T2,C]（ここでは入力と同一のはず）

    # ---- (3) 既存の ESN リッジ回帰関数で ReadOut 重み更新 ----
    # 既存実装は X を [H,N]、Y を [C,N] として渡す前提（N=B*T）
    X_HN = states_tr_BTH.reshape(B2 * T2, H).T.contiguous()     # [H,N]
    Y_CN = targets_tr_BTC.reshape(B2 * T2, C).T.contiguous()    # [C,N]

    # 既存関数で更新（alpha=0 なら「リッジ無し」だが solve が不安定なら小さく正則化）
    ReadOut.ridge_regression_update(outputs=X_HN, targets=Y_CN, model=model, alpha=alpha)

    # ---- 更新後に logits を計算（形状を保って [B,T,C] に）----
    logits_tr_BTC = extract_logits_time_major(model.ReadOut(states_tr_BcTH))  # [1,T,C]
    logits_tr_BTC = apply_time_selection(logits_tr_BTC, washout_steps, time_stride)

    # 注意：washout/stride を使う場合、発話ごとの lengths も対応させる必要がある（ここでは未対応）
    if washout_steps != 0 or time_stride != 1:
        raise NotImplementedError("washout/stride を使う場合は発話ごとの length 変換も必要です。まずは 0/1 で一致確認してください。")

    pred_train = predict_labels_by_lengths_majority_vote(logits_tr_BTC, train_length.to(device))
    train_WER = wer_from_preds(pred_train, train_label.to(device))

    # ---- test 側（重みは train のまま）----
    xte_BcTD = to_BcTD(test_input_TD).to(device)
    states_te_BcTH = model.ESN(xte_BcTD)
    logits_te_BTC = extract_logits_time_major(model.ReadOut(states_te_BcTH))  # [1,T,C]

    pred_test = predict_labels_by_lengths_majority_vote(logits_te_BTC, test_length.to(device))
    test_WER = wer_from_preds(pred_test, test_label.to(device))

    return train_WER, test_WER


if __name__ == "__main__":
    torch.manual_seed(0)
    np.random.seed(0)

    # --- Lyon 読み込み（参考コードと同様 train_list=[1..5]） ---
    train_list = [1, 2, 3, 4, 5]
    (train_input, train_output, train_length, train_label,
     test_input,  test_output,  test_length,  test_label) = read_lyon_decimation_128_torch(
        dir_name="./Lyon_decimation_128",
        utterance_train_list=train_list,
        strict_compat_filename=True,
        sort_files=False,   # 参考コードが glob 順固定でないので、まず False を推奨
        device=None,
    )

    # --- ESN 構築（Lyon向けの例：あなたの params 名に合わせて調整） ---
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    model_params = {
        "reservoir_size": 100,
        "reservoir_weights_scale": 1.0,
        "input_size": 77,
        "channel_size": 1,
        "input_weights_scale": 1.0e4,     # 参考: input_scale
        "spectral_radius": 0.9,           # 参考: rho
        "reservoir_density": 0.05,        # 参考: density
        "leak_rate": 1.0,                 # 参考コードに寄せるなら 1.0 推奨
        "ReadOut_output_size": 10,
        "Batch_Training": True,
    }
    dataset_params = {"sequence_length": 1, "slicing_size": 1}
    training_params = {}

    model = ESN(model_params, training_params, dataset_params).to(device)

    # --- 実行（まずは washout=0, stride=1 で一致確認） ---
    train_WER, test_WER = run_lyon_check(
        model=model,
        train_input_TD=train_input, train_output_TC=train_output,
        train_length=train_length, train_label=train_label,
        test_input_TD=test_input, test_output_TC=test_output,
        test_length=test_length, test_label=test_label,
        alpha=0.0,           # solve が不安定なら 1e-6 〜 1e-2 を試す
        washout_steps=0,
        time_stride=1,
    )

    print(f"訓練誤差： WER = {train_WER:.4f}")
    print(f"検証誤差： WER = {test_WER:.4f}")


500 files in ./Lyon_decimation_128 を読み込んでいます...
訓練誤差： WER = 0.0120
検証誤差： WER = 0.0760
