Skip to content

Commit

Permalink
tokenization tests added
Browse files Browse the repository at this point in the history
  • Loading branch information
susnato committed Mar 23, 2023
1 parent 4d9fcc3 commit 5dd2d02
Show file tree
Hide file tree
Showing 11 changed files with 604 additions and 35 deletions.
2 changes: 1 addition & 1 deletion docs/source/en/index.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -369,7 +369,7 @@ Flax), PyTorch, and/or TensorFlow.
| Pix2Struct | | | | | |
| PLBart | | | | | |
| PoolFormer | | | | | |
| Pop2Piano | | | | | |
| Pop2Piano | | | | | |
| ProphetNet | | | | | |
| QDQBert | | | | | |
| RAG | | | | | |
Expand Down
33 changes: 32 additions & 1 deletion docs/source/en/model_doc/pop2piano.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,38 @@ Tips:

1. Pop2Piano is an Encoder-Decoder based model like T5.
2. Pop2Piano can be used to generate midi-audio files for a given audio sequence. This HuggingFace implementation allows to save midi_output as well as stereo-mix output of the audio sequence.
3. Choosing different composers in Pop2PianoForConditionalGeneration.generate can lead to variety of different results.
3. Choosing different composers in `Pop2PianoForConditionalGeneration.generate()` can lead to variety of different results.
4. Please note that HuggingFace implementation of Pop2Piano(both Pop2PianoForConditionalGeneration and Pop2PianoFeatureExtractor) can only work with one raw_audio sequence at a time. So if you want to process multiple files, please feed them one by one.

This model was contributed by [Susnato Dhar](https://huggingface.co/susnato).
The original code can be found [here](https://github.com/sweetcocoa/pop2piano).

Example:
```
import librosa
from transformers import Pop2PianoFeatureExtractor, Pop2PianoForConditionalGeneration, Pop2PianoTokenizer

raw_audio, sr = librosa.load("audio.mp3", sr=44100)
model = Pop2PianoForConditionalGeneration.from_pretrained("susnato/pop2piano_dev")
feature_extractor = Pop2PianoFeatureExtractor.from_pretrained("susnato/pop2piano_dev")
tokenizer = Pop2PianoTokenizer.from_pretrained("susnato/pop2piano_dev")

model.eval()

feature_extractor_outputs = fe(raw_audio=raw_audio, audio_sr=sr, return_tensors="pt")
model_outputs = model.generate(feature_extractor_outputs, composer="composer1")

opt_postprocess = tokenizer(relative_tokens=model_outputs,
beatsteps=feature_extractor_outputs["beatsteps"],
ext_beatstep=feature_extractor_outputs["ext_beatstep"],
raw_audio=raw_audio,
sampling_rate=sr,
save_path="./Music/Outputs/",
audio_file_name="filename",
save_midi=True
)
```


## Pop2PianoConfig

Expand All @@ -59,3 +85,8 @@ The original code can be found [here](https://github.com/sweetcocoa/pop2piano).
- forward
- generate

## Pop2PianoTokenizer

[[autodoc]] Pop2PianoTokenizer
- __call__

3 changes: 2 additions & 1 deletion src/transformers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3443,6 +3443,7 @@
]
else:
_import_structure["models.pop2piano"].append("Pop2PianoFeatureExtractor")
_import_structure["models.pop2piano"].append("Pop2PianoTokenizer")


# FLAX-backed objects
Expand Down Expand Up @@ -6554,7 +6555,7 @@
except OptionalDependencyNotAvailable:
from .utils.dummy_music_objects import *
else:
from .models.pop2piano import Pop2PianoFeatureExtractor
from .models.pop2piano import Pop2PianoFeatureExtractor, Pop2PianoTokenizer

try:
if not is_flax_available():
Expand Down
2 changes: 2 additions & 0 deletions src/transformers/models/pop2piano/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
pass
else:
_import_structure["feature_extraction_pop2piano"] = ["Pop2PianoFeatureExtractor"]
_import_structure["tokenization_pop2piano"] = ["Pop2PianoTokenizer"]


if TYPE_CHECKING:
Expand Down Expand Up @@ -89,6 +90,7 @@
pass
else:
from .feature_extraction_pop2piano import Pop2PianoFeatureExtractor
from .tokenization_pop2piano import Pop2PianoTokenizer

else:
import sys
Expand Down
18 changes: 0 additions & 18 deletions src/transformers/models/pop2piano/feature_extraction_pop2piano.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,16 +51,6 @@ class Pop2PianoFeatureExtractor(SequenceFeatureExtractor):
Whether to preprocess for `LogMelSpectrogram` or not. For the current implementation this must be `True`.
padding_value (`int`, *optional*, defaults to 0):
Padding value used to pad the audio. Should correspond to silences.
vocab_size_special (`int`, *optional*, defaults to 4):
Number of special values.
vocab_size_note (`int`, *optional*, defaults to 128):
This represents the number of Note Values. Note values indicate a pitch event for one of the MIDI pitches.
But only the 88 pitches corresponding to piano keys are actually used.
vocab_size_velocity (`int`, *optional*, defaults to 2):
Number of Velocity tokens.
vocab_size_time (`int`, *optional*, defaults to 100):
This represents the number of Beat Shifts. Beat Shift [100 values] Indicates the relative time shift within
the segment quantized into 8th-note beats(half-beats).
n_fft (`int`, *optional*, defaults to 4096):
Size of Fast Fourier Transform, creates n_fft // 2 + 1 bins.
hop_length (`int`, *optional*, defaults to 1024):
Expand All @@ -78,10 +68,6 @@ def __init__(
sampling_rate: int = 22050,
use_mel: int = True,
padding_value: int = 0,
vocab_size_special: int = 4,
vocab_size_note: int = 128,
vocab_size_velocity: int = 2,
vocab_size_time: int = 100,
n_fft: int = 4096,
hop_length: int = 1024,
f_min: float = 10.0,
Expand All @@ -99,10 +85,6 @@ def __init__(
self.sampling_rate = sampling_rate
self.use_mel = use_mel
self.padding_value = padding_value
self.vocab_size_special = vocab_size_special
self.vocab_size_note = vocab_size_note
self.vocab_size_velocity = vocab_size_velocity
self.vocab_size_time = vocab_size_time
self.n_fft = n_fft
self.hop_length = hop_length
self.f_min = f_min
Expand Down
Loading

0 comments on commit 5dd2d02

Please sign in to comment.