–í —ç—Ç–æ–º –Ω–æ—É—Ç–±—É–∫–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω –ø—Ä–∏–º–µ—Ä –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏—è, –∞ —Ç–∞–∫–∂–µ —Ä–∞–∑–≤–µ—Ä—Ç—ã–≤–∞–Ω–∏–µ —Å–µ—Ä–≤–∏—Å–∞ –ø–æ –≥–µ–Ω–µ—Ä–∞—Ü–∏–∏ –∞–∫–∫–æ—Ä–¥–æ–≤.

In [1]:
import numpy as np
import polars as pl
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
import random
from torch.utils.data import Dataset, DataLoader
from torch.optim import Adam
from tqdm import tqdm

import re
from typing import List, Dict, Any, Tuple, Optional, Mapping, Set, Self, NamedTuple, TypedDict
from utils import *

In [23]:
class ChordTokenizer:
    def __init__(self):
        self._padding_token = "[PAD]"
        self._unknown_token = "[UNK]"
        self._cls_token = "[CLS]"
        self._sep_token = "[SEP]"
        self._mask_token = "[MASK]"
        
        # Special tokens IDs
        self._padding_id = 0
        self._cls_id = 1
        self._sep_id = 2
        self._mask_token_id = 3
        self._unknown_token_id = 4
        
        # –ú—É–∑—ã–∫–∞–ª—å–Ω—ã–µ —ç–ª–µ–º–µ–Ω—Ç—ã
        self.notes = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
        self.moods = ['m', 'maj', 'min', 'aug', 'dim', 'sus2', 'sus4', 'sus']
        self.extensions = [
            '5', '6', '7', '9', '11', '13', 
            'add9', 'add11', 'add13'
        ]
        self.symbols = ['/', 'b', '#', '(', ')', ' ']
        
        # –°–ª–æ–∂–Ω—ã–µ –∞–∫–∫–æ—Ä–¥—ã –¥–ª—è –¥–æ–±–∞–≤–ª–µ–Ω–∏—è –≤ —Å–ª–æ–≤–∞—Ä—å
        self.complex_chords = [
            'A5(9)', 'Cadd9', 'Dsus4', 'Emadd9', 'G5(11)',
            'Fmaj7', 'G9', 'Am11', 'C7(9)', 'Dsus2',
            'Cmaj9', 'F#m7', 'Bbmaj7', 'E7sus4', 'Aadd9'
        ]
        
        self._init_vocab()

    @property
    def vocab(self) -> Mapping[int, str]:
        return self._vocab
    
    @property
    def reverse_vocab(self) -> Mapping[str, int]:
        return {token: idx for idx, token in self._vocab.items()}
    
    @property
    def cls_id(self) -> int:
        return self._cls_id
    
    @property
    def mask_token_id(self) -> int:
        return self._mask_token_id
    
    @property
    def padding_id(self) -> int:
        return self._padding_id
    
    @property
    def sep_id(self) -> int:
        return self._sep_id
    
    @property
    def unknown_token_id(self) -> int:
        return self._unknown_token_id

    def _init_vocab(self) -> None:
        """–ò–Ω–∏—Ü–∏–∞–ª–∏–∑–∞—Ü–∏—è —Å–ª–æ–≤–∞—Ä—è —Å —Å–ø–µ—Ü–∏–∞–ª—å–Ω—ã–º–∏ —Ç–æ–∫–µ–Ω–∞–º–∏"""
        self._vocab = {
            self._padding_id: self._padding_token,
            self._cls_id: self._cls_token,
            self._sep_id: self._sep_token,
            self._mask_token_id: self._mask_token,
            self._unknown_token_id: self._unknown_token,
        }
    
    def fit(self, corpus: List[str]) -> Self:
        """–°–æ–∑–¥–∞–Ω–∏–µ —Å–ª–æ–≤–∞—Ä—è –Ω–∞ –æ—Å–Ω–æ–≤–µ –∫–æ—Ä–ø—É—Å–∞"""
        self._init_vocab()
        
        # –î–æ–±–∞–≤–ª—è–µ–º –±–∞–∑–æ–≤—ã–µ –º—É–∑—ã–∫–∞–ª—å–Ω—ã–µ —ç–ª–µ–º–µ–Ω—Ç—ã
        all_elements = (self.notes + self.moods + self.extensions + 
                       self.symbols + self.complex_chords)
        
        for element in all_elements:
            if element not in self._vocab.values():
                self._vocab[len(self._vocab)] = element
        
        # –û–±—Ä–∞–±–∞—Ç—ã–≤–∞–µ–º –∫–æ—Ä–ø—É—Å –¥–ª—è –∏–∑–≤–ª–µ—á–µ–Ω–∏—è –¥–æ–ø–æ–ª–Ω–∏—Ç–µ–ª—å–Ω—ã—Ö –∞–∫–∫–æ—Ä–¥–æ–≤
        for text in corpus:
            chords = text.split()
            for chord in chords:
                if chord not in self.reverse_vocab and chord not in self._vocab.values():
                    self._vocab[len(self._vocab)] = chord
        
        return self
    
    def tokenize_text(self, text: str | List[str]) -> List[str] | List[List[str]]:
        """–¢–æ–∫–µ–Ω–∏–∑–∞—Ü–∏—è —Ç–µ–∫—Å—Ç–∞ –≤ —Å—Ç—Ä–æ–∫–æ–≤—ã–µ —Ç–æ–∫–µ–Ω—ã"""
        if isinstance(text, str):
            return self._tokenize_text(text)
        assert isinstance(text, list), "`text` should be str or List[str]"
        return [self._tokenize_text(chunk) for chunk in text]
 
    def tokenize_ids(self, text: str | List[str]) -> List[int] | List[List[int]]:
        """–¢–æ–∫–µ–Ω–∏–∑–∞—Ü–∏—è —Ç–µ–∫—Å—Ç–∞ –≤ ID —Ç–æ–∫–µ–Ω–æ–≤"""
        if isinstance(text, str):
            return self._tokenize_ids(text)
        assert isinstance(text, list), "`text` should be str or List[str]"
        return [self._tokenize_ids(chunk) for chunk in text]
    
    def decode(self, tokens: List[int]) -> str:
        """–î–µ–∫–æ–¥–∏—Ä–æ–≤–∞–Ω–∏–µ ID —Ç–æ–∫–µ–Ω–æ–≤ –æ–±—Ä–∞—Ç–Ω–æ –≤ —Å—Ç—Ä–æ–∫—É"""
        content = []
        reverse_vocab = self.reverse_vocab
        
        for token_id in tokens:
            if token_id in [self._padding_id, self._cls_id, self._sep_id, self._mask_token_id]:
                continue
            
            token = self._vocab.get(token_id, self._unknown_token)
            if token == self._unknown_token:
                continue
                
            content.append(token)
        
        # –°–æ–±–∏—Ä–∞–µ–º –∞–∫–∫–æ—Ä–¥—ã –∏–∑ —Ç–æ–∫–µ–Ω–æ–≤
        result = []
        current_chord = []
        
        for token in content:
            if token == ' ':
                if current_chord:
                    result.append(''.join(current_chord))
                    current_chord = []
            else:
                current_chord.append(token)
        
        if current_chord:
            result.append(''.join(current_chord))
            
        return ' '.join(result)

    def _tokenize_text(self, text: str) -> List[str]:
        """–í–Ω—É—Ç—Ä–µ–Ω–Ω–∏–π –º–µ—Ç–æ–¥ –¥–ª—è —Ç–æ–∫–µ–Ω–∏–∑–∞—Ü–∏–∏ —Å—Ç—Ä–æ–∫–∏ –≤ —Ç–µ–∫—Å—Ç–æ–≤—ã–µ —Ç–æ–∫–µ–Ω—ã"""
        tokens = [self._cls_token]
        reverse_vocab = self.reverse_vocab
        
        chords = text.split()
        
        for i, chord in enumerate(chords):
            # –ü—ã—Ç–∞–µ–º—Å—è –Ω–∞–π—Ç–∏ —Ü–µ–ª—ã–π –∞–∫–∫–æ—Ä–¥ –≤ —Å–ª–æ–≤–∞—Ä–µ
            if chord in reverse_vocab:
                tokens.append(chord)
            else:
                # –†–∞–∑–±–∏–≤–∞–µ–º –∞–∫–∫–æ—Ä–¥ –Ω–∞ —Å–æ—Å—Ç–∞–≤–ª—è—é—â–∏–µ
                chord_parts = self._split_chord(chord)
                for part in chord_parts:
                    if part in reverse_vocab:
                        tokens.append(part)
                    else:
                        tokens.append(self._unknown_token)
            
            # –î–æ–±–∞–≤–ª—è–µ–º –ø—Ä–æ–±–µ–ª –º–µ–∂–¥—É –∞–∫–∫–æ—Ä–¥–∞–º–∏ (–∫—Ä–æ–º–µ –ø–æ—Å–ª–µ–¥–Ω–µ–≥–æ)
            if i < len(chords) - 1:
                tokens.append(' ')
        
        tokens.append(self._sep_token)
        return tokens
    
    def _tokenize_ids(self, text: str) -> List[int]:
        """–í–Ω—É—Ç—Ä–µ–Ω–Ω–∏–π –º–µ—Ç–æ–¥ –¥–ª—è —Ç–æ–∫–µ–Ω–∏–∑–∞—Ü–∏–∏ —Å—Ç—Ä–æ–∫–∏ –≤ ID —Ç–æ–∫–µ–Ω–æ–≤"""
        text_tokens = self._tokenize_text(text)
        reverse_vocab = self.reverse_vocab
        return [reverse_vocab.get(token, self._unknown_token_id) for token in text_tokens]
    
    def _split_chord(self, chord: str) -> List[str]:
        """–†–∞–∑–±–∏–≤–∞–µ—Ç –∞–∫–∫–æ—Ä–¥ –Ω–∞ —Å–æ—Å—Ç–∞–≤–ª—è—é—â–∏–µ —ç–ª–µ–º–µ–Ω—Ç—ã"""
        # –†–µ–≥—É–ª—è—Ä–Ω–æ–µ –≤—ã—Ä–∞–∂–µ–Ω–∏–µ –¥–ª—è —Ä–∞–∑–±–æ—Ä–∞ –∞–∫–∫–æ—Ä–¥–æ–≤
        pattern = r'[A-G][#b]?|[a-z]+|\d+|[\/\(\)#b]'
        parts = re.findall(pattern, chord)
        return parts
    
    def __len__(self) -> int:
        return len(self._vocab)

    
class ChordTokenizerHF:
    def __init__(self, chord_tokenizer: ChordTokenizer):
        self.chord_tokenizer = chord_tokenizer

    def __call__(self, texts, padding=True, truncation=True, max_length=128, return_tensors=None):
        if isinstance(texts, str):
            texts = [texts]

        input_ids = []
        attention_masks = []

        for text in texts:
            token_ids = self.chord_tokenizer.tokenize_ids(text)

            # –û–±—Ä–µ–∑–∞–µ–º –µ—Å–ª–∏ –Ω—É–∂–Ω–æ
            if truncation and len(token_ids) > max_length:
                token_ids = token_ids[:max_length]

            attention_mask = [1] * len(token_ids)

            # –î–æ–±–∞–≤–ª—è–µ–º –ø–∞–¥–¥–∏–Ω–≥ –µ—Å–ª–∏ –Ω—É–∂–Ω–æ
            if padding:
                padding_length = max_length - len(token_ids)
                token_ids = token_ids + [self.chord_tokenizer.padding_id] * padding_length
                attention_mask = attention_mask + [0] * padding_length

            input_ids.append(token_ids)
            attention_masks.append(attention_mask)

        output = {
            'input_ids': input_ids,
            'attention_mask': attention_masks
        }

        if return_tensors == 'pt':
            import torch
            output['input_ids'] = torch.tensor(output['input_ids'])
            output['attention_mask'] = torch.tensor(output['attention_mask'])

        return output

    def decode(self, token_ids: List[int]) -> str:
        """–î–µ–∫–æ–¥–∏—Ä–æ–≤–∞–Ω–∏–µ ID —Ç–æ–∫–µ–Ω–æ–≤ –æ–±—Ä–∞—Ç–Ω–æ –≤ —Å—Ç—Ä–æ–∫—É"""
        return self.chord_tokenizer.decode(token_ids)

### –ó–∞–≥—Ä—É–∂–∞–µ–º –º–æ–¥–µ–ª—å

In [24]:
def load_tokenizer(filepath):
    """–ó–∞–≥—Ä—É–∂–∞–µ—Ç —Ç–æ–∫–µ–Ω–∞–π–∑–µ—Ä –∏–∑ —Ñ–∞–π–ª–∞"""
    tokenizer_data = torch.load(filepath)
    
    # –°–æ–∑–¥–∞–µ–º –Ω–æ–≤—ã–π —Ç–æ–∫–µ–Ω–∞–π–∑–µ—Ä
    tokenizer = ChordTokenizer()
    
    # –í–æ—Å—Å—Ç–∞–Ω–∞–≤–ª–∏–≤–∞–µ–º —Å–æ—Å—Ç–æ—è–Ω–∏–µ
    tokenizer._vocab = tokenizer_data['_vocab']
    tokenizer.notes = tokenizer_data['notes']
    tokenizer.moods = tokenizer_data['moods']
    tokenizer.extensions = tokenizer_data['extensions']
    tokenizer.symbols = tokenizer_data['symbols']
    tokenizer.complex_chords = tokenizer_data['complex_chords']
    
    print(f"–¢–æ–∫–µ–Ω–∞–π–∑–µ—Ä –∑–∞–≥—Ä—É–∂–µ–Ω –∏–∑ {filepath}")
    return tokenizer
loaded_tokenizer = load_tokenizer('chord_tokenizer.pth')

–¢–æ–∫–µ–Ω–∞–π–∑–µ—Ä –∑–∞–≥—Ä—É–∂–µ–Ω –∏–∑ chord_tokenizer.pth


In [25]:
VOCAB_SIZE = len(loaded_tokenizer.vocab)
BATCH_SIZE = 128
MAX_SEQ_LEN = 50
N_LAYERS = 6
EMBEDDING_SIZE = 64
NUM_HEADS = 8
HEAD_EMBEDDING_SIZE = EMBEDDING_SIZE // NUM_HEADS
FCCN_HIDDEN_SIZE = EMBEDDING_SIZE * 4
n_epoch = 10

In [26]:
def load_model(filepath, encoder_class, encoder_config):
    """–ó–∞–≥—Ä—É–∂–∞–µ—Ç –º–æ–¥–µ–ª—å –∏–∑ —Ñ–∞–π–ª–∞"""
    checkpoint = torch.load(filepath, map_location='cpu')
    
    encoder = encoder_class(**encoder_config)
    model = BERTLM(encoder)
    
    model.load_state_dict(checkpoint['model_state_dict'])
    
    print(f"–ú–æ–¥–µ–ª—å –∑–∞–≥—Ä—É–∂–µ–Ω–∞ –∏–∑ {filepath}")
    
    return model


encoder_config = {
    'vocab_size': VOCAB_SIZE,
    'n_layers': N_LAYERS,
    'embedding_size': EMBEDDING_SIZE,
    'num_heads': NUM_HEADS,
    'head_embedding_size': HEAD_EMBEDDING_SIZE,
    'fcnn_hidden_size': FCCN_HIDDEN_SIZE,
}

loaded_model = load_model('chord_bert_model.pth', Encoder, encoder_config)

–ú–æ–¥–µ–ª—å –∑–∞–≥—Ä—É–∂–µ–Ω–∞ –∏–∑ chord_bert_model.pth


In [27]:
loaded_model.eval()

BERTLM(
  (_encoder): Encoder(
    (_embeddings): BERTEmbedding(
      (_embeddings): Embedding(1213, 64, padding_idx=0)
      (_segment_embeddings): Embedding(3, 64, padding_idx=0)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (_positional_embeddings): RotaryPositionEmbedding()
    (_layers): ModuleList(
      (0-5): 6 x EncoderLayer(
        (_mha): RoPEMultiHeadedAttention(
          (_positional_embedding): RotaryPositionEmbedding()
          (_Q): Linear(in_features=64, out_features=64, bias=True)
          (_K): Linear(in_features=64, out_features=64, bias=True)
          (_V): Linear(in_features=64, out_features=64, bias=True)
          (_W_proj): Linear(in_features=64, out_features=64, bias=True)
          (_dropout): Dropout(p=0.1, inplace=False)
          (_layernorm): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
        )
        (_fcnn): FCNNBlock(
          (_linear1): Linear(in_features=64, out_features=256, bias=False)
          (_linear2): Linear(in_f

In [28]:
def predict_masked_chord(model, tokenizer, chord_sequence):
    """
    –ü—Ä–µ–¥—Å–∫–∞–∑—ã–≤–∞–µ—Ç –∞–∫–∫–æ—Ä–¥ –Ω–∞ –º–µ—Å—Ç–µ –∑–Ω–∞–∫–∞ –≤–æ–ø—Ä–æ—Å–∞ –≤ –ø–æ—Å–ª–µ–¥–æ–≤–∞—Ç–µ–ª—å–Ω–æ—Å—Ç–∏
    –ü—Ä–∏–º–µ—Ä: ["A", "B", "?", "B", "D"] -> –ø—Ä–µ–¥—Å–∫–∞–∑—ã–≤–∞–µ—Ç –∞–∫–∫–æ—Ä–¥ –≤–º–µ—Å—Ç–æ '?'
    """

    print(f'Input {chord_sequence}')
    chord_sequence = chord_sequence.split()
    masked_sequence = [chord if chord != '?' else tokenizer._mask_token for chord in chord_sequence]
    if tokenizer._mask_token not in masked_sequence:
        masked_sequence.append(tokenizer._mask_token)

    input_ids = tokenizer.tokenize_ids(masked_sequence)
    input_ids = [i[1] for i in input_ids]


    inputs = torch.tensor([input_ids])

    mask_index = input_ids.index(tokenizer.mask_token_id)
    segment_label = torch.zeros_like(inputs)
    # –ü—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏–µ
    with torch.no_grad():
        outputs = model(inputs, segment_label)
        predictions = outputs.logits if hasattr(outputs, 'logits') else outputs

    predicted_index = torch.argmax(predictions[1][0][mask_index]).item()
    predicted_chord = tokenizer.vocab.get(predicted_index, tokenizer._unknown_token)
    masked_sequence[mask_index] = predicted_chord
    separator = " "
    result = separator.join(masked_sequence)
    print(f'Result {result}')
    return predicted_chord, result, predictions

In [29]:
predicted_chord, result, predictions = predict_masked_chord(loaded_model, loaded_tokenizer, 'D C D F B B F B B F B ? F D F C D D C D D F')

Input D C D F B B F B B F B ? F D F C D D C D D F
Result D C D F B B F B B F B G F D F C D D C D D F


### –ü–æ–¥–Ω–∏–º–∞–µ–º gradio

### –¥–ª—è –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏—è –∂–∞–Ω—Ä–∞

In [30]:
from transformers import AutoModel
bert_model = AutoModel.from_pretrained("prajjwal1/bert-mini")

hidden_dim = 128
num_classes = 15

model_genre = ChordBERTMiniLSTMClassifier(
    bert_model=bert_model,
    hidden_dim=hidden_dim,
    num_classes=num_classes,
    dropout=0.3
)

MODEL_PATH = 'bert_model_genre.pt'
state_dict = torch.load(MODEL_PATH, map_location="cpu")
model_genre.load_state_dict(state_dict)

model_genre.eval()

def predict_genres(model, tokenizer, chord_sequence, threshold=0.5):
    """
    –î–µ–ª–∞–µ—Ç –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏–µ –∂–∞–Ω—Ä–æ–≤ –Ω–∞ –æ—Å–Ω–æ–≤–µ –ø–æ—Å–ª–µ–¥–æ–≤–∞—Ç–µ–ª—å–Ω–æ—Å—Ç–∏ –∞–∫–∫–æ—Ä–¥–æ–≤.
    –ú–æ–¥–µ–ª—å ‚Äî multilabel classifier.
    """
    model.eval()

    inputs = tokenizer(
        chord_sequence,
        return_tensors='pt',
        padding=True,
        truncation=True,
        max_length=128
    )

    with torch.no_grad():
        logits = model(
            input_ids=inputs["input_ids"],
            attention_mask=inputs["attention_mask"]
        )

    logits = logits[0]             
    probs = torch.sigmoid(logits) 

    predicted = (probs > threshold).int().tolist()
    all_genres = ['children / family', 'classical', 'electronic / edm', 'folk / country', 'hip hop / rap',
                    'jazz / blues', 'latin / world', 'metal', 'other / misc', 'pop', 'punk / hardcore', 'r&b / soul',
            'religious / worship', 'rock', 'soundtrack / score / instrumental']
    predicted_genres = [all_genres[i] for i in range(len(all_genres)) if predicted[i] == 1]
    return predicted_genres

Input D ? D F B B F B B F B B F D F C D D C D F
Result D G D F B B F B B F B B F D F C D D C D F


In [33]:
import gradio as gr
import numpy as np
import io
import wave
import random


# –°–∏–Ω—Ç–µ–∑ –≥–∏—Ç–∞—Ä–Ω–æ–≥–æ –∑–≤—É–∫–∞ 
SAMPLE_RATE = 22050

NOTE_FREQS = {
    'C': 261.6, 'C#': 277.2, 'Db': 277.2,
    'D': 293.7, 'D#': 311.1, 'Eb': 311.1,
    'E': 329.6, 'F': 349.2, 'F#': 370.0,
    'Gb': 370.0, 'G': 392.0, 'G#': 415.3,
    'Ab': 415.3, 'A': 440.0, 'A#': 466.2,
    'Bb': 466.2, 'B': 493.9
}

INTERVALS = {
    'maj': [0, 4, 7],
    'min': [0, 3, 7],
    'dim': [0, 3, 6],
    'aug': [0, 4, 8],
    '7': [0, 4, 7, 10],
    'maj7': [0, 4, 7, 11],
    'm7': [0, 3, 7, 10],
    'sus2': [0, 2, 7],
    'sus4': [0, 5, 7],
    '5': [0, 7]
}

def parse_chord(chord):
    chord = chord.strip()
    root = ''
    quality = ''
    for note in sorted(NOTE_FREQS.keys(), key=lambda x: -len(x)):
        if chord.startswith(note):
            root = note
            quality = chord[len(note):]
            break
    if quality == '':
        quality = 'maj'
    if quality in ['m', 'min']:
        quality = 'min'
    elif quality in ['maj', '']:
        quality = 'maj'
    if quality not in INTERVALS:
        quality = 'maj'
    return root, quality

def chord_to_wave_guitar(chord, duration=1.1):
    root, quality = parse_chord(chord)
    base_freq = NOTE_FREQS.get(root, 261.6)
    intervals = INTERVALS[quality]

    t = np.linspace(0, duration, int(SAMPLE_RATE * duration), False)
    wave_data = np.zeros_like(t)
    for interval in intervals:
        freq = base_freq * 2 ** (interval / 12)
        envelope = np.exp(-3 * t)  # –≥–∏—Ç–∞—Ä–Ω–æ–µ –∑–∞—Ç—É—Ö–∞–Ω–∏–µ
        wave_data += 0.3 * np.sin(2 * np.pi * freq * t) * envelope

    wave_data = wave_data / np.max(np.abs(wave_data))
    pcm = (wave_data * 32767).astype(np.int16)

    buf = io.BytesIO()
    with wave.open(buf, "wb") as wf:
        wf.setnchannels(1)
        wf.setsampwidth(2)
        wf.setframerate(SAMPLE_RATE)
        wf.writeframes(pcm.tobytes())
    return buf.getvalue()

def sequence_to_audio(chords: str):
    chunks = [chord_to_wave_guitar(ch) for ch in chords.split()]
    full = io.BytesIO()
    with wave.open(full, "wb") as wf:
        wf.setnchannels(1)
        wf.setsampwidth(2)
        wf.setframerate(SAMPLE_RATE)
        for w in chunks:
            wf.writeframes(wave.open(io.BytesIO(w)).readframes(99999999))
    return full.getvalue()


def predict_chords(user_input, predict_genre_flag):
    result_chords = predict_masked_chord(loaded_model, loaded_tokenizer, user_input)[1]
    audio_bytes = sequence_to_audio(result_chords)
    if predict_genre_flag:
        res_genre = predict_genres(model_genre, ChordTokenizerHF(loaded_tokenizer), user_input)
        return result_chords, (SAMPLE_RATE, np.frombuffer(audio_bytes, dtype=np.int16)), res_genre
    return result_chords, (SAMPLE_RATE, np.frombuffer(audio_bytes, dtype=np.int16)), '–ñ–∞–Ω—Ä –Ω–µ –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω'



iface = gr.Interface(
    fn=predict_chords,
    inputs=[
        gr.Textbox(label="–í–≤–µ–¥–∏—Ç–µ –∞–∫–∫–æ—Ä–¥—ã", placeholder="Am ? Dm E"),
        gr.Checkbox(label="–ü—Ä–µ–¥—Å–∫–∞–∑–∞—Ç—å –∂–∞–Ω—Ä", value=False)
    ],
    outputs=[
        gr.Textbox(label="–°–≥–µ–Ω–µ—Ä–∏—Ä–æ–≤–∞–Ω–Ω—ã–µ –∞–∫–∫–æ—Ä–¥—ã"),
        gr.Audio(label="–ê—É–¥–∏–æ", type="numpy"),
        gr.Textbox(label="–ü—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–Ω—ã–π –∂–∞–Ω—Ä")
    ],
    title="üé∏ –ì–µ–Ω–µ—Ä–∞—Ç–æ—Ä –∞–∫–∫–æ—Ä–¥–æ–≤", 
    allow_flagging='never'
)
iface.launch()



* Running on local URL:  http://127.0.0.1:7865
* To create a public link, set `share=True` in `launch()`.




Input D C D F B B F B B F B B F D F C D D C ? F
Result D C D F B B F B B F B B F D F C D D C C F
Input D C D F B B F B B F B B F D F C D D C ? F
Result D C D F B B F B B F B B F D F C D D C C F
