In [94]:
import numpy as np
import scipy.io.wavfile

def additive_synthesis(frequencies, amplitudes, duration, samplerate=16000):
    t = np.linspace(0, duration, int(samplerate * duration), endpoint=False)
    signal = sum(a * np.sin(2 * np.pi * f * t) for f, a in zip(frequencies, amplitudes))
    return signal

# Example usage
freqs = [40,320, 440, 660]
amps = [0.5, 0.3, 0.2]
duration = 3.0
sound = additive_synthesis(freqs, amps, duration)


scipy.io.wavfile.write('sound.wav', 16000, sound)




In [None]:
#N=10 freqs

In [79]:
from transformers import Wav2Vec2ForSequenceClassification, Wav2Vec2FeatureExtractor
import librosa
import torch

# Genre mapping corrected to a dictionary
genre_mapping = {
    0: "Electronic",
    1: "Rock",
    2: "Punk",
    3: "Experimental",
    4: "Hip-Hop",
    5: "Folk",
    6: "Chiptune / Glitch",
    7: "Instrumental",
    8: "Pop",
    9: "International",
}

model = Wav2Vec2ForSequenceClassification.from_pretrained("gastonduault/music-classifier")
feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained("facebook/wav2vec2-large")

# Function for preprocessing audio for prediction
def preprocess_audio(audio_path):
    audio_array, sampling_rate = librosa.load(audio_path, sr=16000)
    return feature_extractor(audio_array, sampling_rate=16000, return_tensors="pt", padding=True)


In [84]:
# Path to your audio file
audio_path = "/home/flowers-user/adtool/examples/synth/sound.wav"

# Preprocess audio
inputs = preprocess_audio(audio_path)

# Predict
with torch.no_grad():
    logits = model(**inputs).logits
    predicted_class = torch.argmax(logits, dim=-1).item()

# Output the result
print(f"song analized:{audio_path}")
print(f"Predicted genre: {genre_mapping[predicted_class]}")

song analized:/home/flowers-user/Music/Vrais.mp3
Predicted genre: Hip-Hop


In [3]:
# Load model directly
from transformers import AutoProcessor, AutoModelForAudioClassification
import librosa

model = AutoModelForAudioClassification.from_pretrained("MarekCech/GenreVim-Music-Classification-DistilHuBERT")

In [28]:
from transformers import Wav2Vec2ForSequenceClassification, Wav2Vec2FeatureExtractor


import torch

feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained("facebook/wav2vec2-large")


# Function for preprocessing audio for prediction
def preprocess_audio(audio_path):
    audio_array, sampling_rate = librosa.load(audio_path, sr=16000)
    return feature_extractor(audio_array, sampling_rate=16000, return_tensors="pt", padding=True)


# Path to your audio file
audio_path = "/home/flowers-user/adtool/examples/synth/sound.wav"


# Preprocess audio
inputs = preprocess_audio(audio_path)

# Predict
with torch.no_grad():
    logits = model(**inputs).logits
    # compute probabilities
    probs = logits.softmax(dim=-1).flatten().numpy()


# goal space is not an hypercube but a simplex



In [29]:
probs

array([3.7884933e-03, 1.2772303e-04, 1.6620138e-04, 4.3238746e-03,
       3.4952513e-04, 7.3576248e-03, 2.7683962e-03, 3.0574331e-04,
       6.7073375e-04, 3.5359649e-04, 4.4623679e-01, 4.8845625e-03,
       1.0407319e-02, 8.4842811e-04, 1.7685121e-03, 5.1541668e-01,
       2.2582195e-04], dtype=float32)