In [4]:
import numpy as np
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
np.complex = complex

In [5]:
import torch
from torch import nn
import librosa
import soundfile as sf
import os

In [6]:
sns.set(rc={"figure.figsize":(9,6)})

In [7]:
data = pd.read_csv("train.csv")

In [8]:
data.head()

Unnamed: 0,id,message
0,1.opus,03ЩУЫЛПИГХ
1,2.opus,ЪЛТ0ДС6А3Г
2,3.opus,5ЭКЫБЗХЯН
3,4.opus,ЖЫЦОИ68КФ
4,5.opus,32Ю7МЫ ЗЛ


In [9]:
all_text = "".join(data["message"].astype(str).tolist())
unique_chars = sorted(set(all_text))
blank_token = "_"
vocab = [blank_token] + unique_chars
char2idx = {c: i for i, c in enumerate(vocab)}
idx2char = {i: c for c, i in char2idx.items()}

In [10]:
# Extract Tensor
def extract_mel_spectrogram(path, sr=8000, n_mels=64, n_fft=512, hop_length=128):
    y, _ = librosa.load(path, sr=sr)

    y, _ = librosa.effects.trim(y, top_db=30)
    y = y / (np.max(np.abs(y)) + 1e-6)
    mel = librosa.feature.melspectrogram(
        y=y, sr=sr,
        n_fft=n_fft,
        hop_length=hop_length,
        n_mels=n_mels
    )
    mel_db = librosa.power_to_db(mel, ref=np.max, top_db=40)
    return torch.from_numpy(mel_db).unsqueeze(0).float()

import time

# Замер времени для одного аудиофайла
start = time.time()
spec = extract_mel_spectrogram("morse_dataset/morse_dataset/34999.opus")
print(f"Mel-spectrogram time: {time.time() - start:.3f}s")

Mel-spectrogram time: 1.520s


In [None]:
from tqdm import tqdm

def extract_mel_spectrogram(path, sr=8000, n_mels=64, n_fft=512, hop_length=128, max_len=512):
    y, _ = librosa.load(path, sr=sr)
    y, _ = librosa.effects.trim(y, top_db=30)
    y = y / (np.max(np.abs(y)) + 1e-6)
    mel = librosa.feature.melspectrogram(
        y=y, sr=sr,
        n_fft=n_fft,
        hop_length=hop_length,
        n_mels=n_mels
    )
    mel_db = librosa.power_to_db(mel, ref=np.max, top_db=40)

    
    if mel_db.shape[1] < max_len:
        pad_width = max_len - mel_db.shape[1]
        mel_db = np.pad(mel_db, ((0, 0), (0, pad_width)), mode='constant')
    else:
        mel_db = mel_db[:, :max_len]
        
    return mel_db.astype(np.float32)  # (n_mels, max_len)

# Папка с файлами
input_folder = "morse_dataset/morse_dataset/"

# Параметры
max_len = 512
mel_list = []
file_names = []

# Файлы
file_list = [f for f in os.listdir(input_folder) if f.endswith(".opus")]
file_list.sort()

for filename in tqdm(file_list):
    path = os.path.join(input_folder, filename)
    try:
        mel = extract_mel_spectrogram(path, max_len=max_len)
        mel_list.append(mel)  # shape: [64, 512]
        file_names.append(filename)
    except Exception as e:
        print(f"❌ Ошибка при обработке {filename}: {e}")

# Теперь всё у нас в памяти:
# numpy array (для быстрой работы):
mel_array = np.stack(mel_list)  # shape: [N, 64, 512]

# если нужно в DataFrame:
import pandas as pd
df = pd.DataFrame({
    "filename": file_names,
    "mel": mel_list  # каждая строка — np.array([64, 512])
})

print("✅ Готово! Кол-во спектрограмм:", len(df))

100%|█████████████████████████████████████| 35000/35000 [23:54<00:00, 24.41it/s]


In [1]:
from torch.utils.data import Dataset
import torch

class MorseDataset(Dataset):
    def __init__(self, mel_array, filenames, csv_path, char2idx):
        """
        mel_array: numpy array [N, 64, 512] или torch.Tensor [N, 64, 512]
        filenames: список имён файлов в том же порядке, что и mel_array
        csv_path: путь к train.csv с полем id и message
        char2idx: словарь преобразования символов в индексы
        """
        self.df = pd.read_csv(csv_path)
        self.mel_array = torch.from_numpy(mel_array).float() if isinstance(mel_array, np.ndarray) else mel_array
        self.filenames = filenames
        self.char2idx = char2idx

        # мапим filename -> индекс в mel_array
        self.name_to_idx = {fname: i for i, fname in enumerate(filenames)}

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        fname = row["id"]
        text = row["message"]

        mel_idx = self.name_to_idx.get(fname, None)
        if mel_idx is None:
            raise ValueError(f"Файл {fname} не найден в массиве спектрограмм")

        spec = self.mel_array[mel_idx]  # [64, 512]
        target = torch.tensor([self.char2idx[c] for c in text], dtype=torch.long)
        return spec, target

In [2]:
def collate_fn(batch):
    specs, targets = zip(*batch)
    # длины по времени
    spec_lens = torch.tensor([s.shape[-1] for s in specs], dtype=torch.long)
    # паддим спектрограммы до max T
    # сначала меняем shape [1, n_mels, T] → [T, n_mels] для pad_sequence
    specs_seq = [s.squeeze(0).transpose(0,1) for s in specs]
    specs_padded = pad_sequence(specs_seq, batch_first=True)  
    # обратно в [B, 1, n_mels, T_max]
    specs_padded = specs_padded.transpose(1,2).unsqueeze(1)

    # таргеты
    target_lens = torch.tensor([t.numel() for t in targets], dtype=torch.long)
    targets_concat = torch.cat(targets)

    return specs_padded, spec_lens, targets_concat, target_lens

In [3]:
class MorseCTCModel(nn.Module):
    def __init__(self, n_mels=64, num_classes=len(vocab), lstm_hidden=128, lstm_layers=2):
        super().__init__()
        # CNN‑encoder
        self.cnn = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.MaxPool2d((2,2)),  # n_mels/2, T/2

            nn.Conv2d(32, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.MaxPool2d((2,2)),  # n_mels/4, T/4
        )
        # размер признаков для LSTM
        self.lstm_input_size = (n_mels // 4) * 32

        self.lstm = nn.LSTM(
            input_size=self.lstm_input_size,
            hidden_size=lstm_hidden,
            num_layers=lstm_layers,
            batch_first=True,
            bidirectional=True,
            dropout=0.3 if lstm_layers > 1 else 0
        )
        self.classifier = nn.Linear(lstm_hidden * 2, num_classes)

    def forward(self, x):
        """
        x: [B, 1, n_mels, T]
        """
        print(f" forward {x.size()}")
        B = x.size(0)
        x = self.cnn(x)  # [B, C, n_mels/4, T/4]
        # подготовка для LSTM
        x = x.permute(0, 3, 1, 2)             # [B, T', C, F]
        Tprime = x.size(1)
        x = x.contiguous().view(B, Tprime, -1)  # [B, T', C*F]

        x, _ = self.lstm(x)                   # [B, T', 2*hidden]
        x = self.classifier(x)                # [B, T', num_classes]
        # CTC будет работать с log-probs
        return x.log_softmax(dim=-1)

NameError: name 'nn' is not defined