<a href="https://colab.research.google.com/github/ThaDuyx/Classify/blob/dev/classify.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Preliminary tasks

In [None]:
# install libraries
%%capture
!pip install datasets
!pip install transformers
!pip install evaluate
!pip install accelerate -U

# Main tasks




In [None]:
# import libraries
import numpy as np
import evaluate
from datasets import load_dataset
from transformers import Trainer, TrainingArguments, AutoModelForAudioClassification, AutoProcessor, AutoFeatureExtractor, Wav2Vec2Tokenizer, Wav2Vec2ForCTC
from huggingface_hub import notebook_login

In [None]:
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
%%capture
dataset = load_dataset("TheDuyx/bass_design_encoded")

In [None]:
dataset = dataset["test"].train_test_split(seed=42, shuffle=True, test_size=0.1)

In [None]:
dataset["train"][0]

In [None]:
model_id = "ntu-spml/distilhubert"
feature_extractor = AutoFeatureExtractor.from_pretrained(
    model_id, do_normalize=True, return_attention_mask=True
)

preprocessor_config.json:   0%|          | 0.00/214 [00:00<?, ?B/s]

In [None]:
# %%
# determine what the required sampling rate is for the training data
sampling_rate = feature_extractor.sampling_rate
sampling_rate

16000

In [None]:
sample = dataset["train"][0]["audio"]
inputs = feature_extractor(sample["array"], sampling_rate=sample["sampling_rate"])
print(f"Mean: {np.mean(sample['array']):.3}, Variance: {np.var(sample['array']):.3}")


print(f"inputs keys: {list(inputs.keys())}")

print(f"Mean: {np.mean(inputs['input_values']):.3}, Variance: {np.var(inputs['input_values']):.3}")

In [None]:
id2label_fn = dataset["train"].features["label"].int2str

id2label = {
    str(i): id2label_fn(i)
    for i in range(len(dataset["train"].features["label"].names))
}

label2id = {v: k for k, v in id2label.items()}

id2label["0"]

In [None]:
num_labels = len(id2label)

model = AutoModelForAudioClassification.from_pretrained(
    model_id,
    num_labels=num_labels,
    label2id=label2id,
    id2label=id2label,
)

In [None]:
model_name = model_id.split("/")[-1]
batch_size = 8
gradient_accumulation_steps = 1
num_train_epochs = 1 # usually sat to 10

training_args = TrainingArguments(
    f"{model_name}-finetuned-bass-test",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=5e-5,
    per_device_train_batch_size=batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=num_train_epochs,
    warmup_ratio=0.1,
    logging_steps=5,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    fp16=True,
    push_to_hub=True,
)

In [None]:
metric = evaluate.load("accuracy")


def compute_metrics(eval_pred):
    """Computes accuracy on a batch of predictions"""
    predictions = np.argmax(eval_pred.predictions, axis=1)
    return metric.compute(predictions=predictions, references=eval_pred.label_ids)

In [None]:
from transformers import Trainer

trainer = Trainer(
    model,
    training_args,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    tokenizer=feature_extractor,
    compute_metrics=compute_metrics,
)

trainer.train()

In [None]:
%%capture
kwargs = {
    "dataset_tags": "TheDuyx/bass_design_encoded",
    "dataset": "bass_design_encoded",
    "model_name": f"{model_name}-finetuned-bass-test",
    "finetuned_from": model_id,
    "tasks": "audio-classification",
}

In [None]:
%%capture
trainer.push_to_hub(**kwargs)

In [None]:
model = AutoModelForAudioClassification.from_pretrained("TheDuyx/distilhubert-finetuned-bass-test")

In [None]:
from transformers import AutoFeatureExtractor

model_id = "TheDuyx/distilhubert-finetuned-bass-test"
feature_extractor = AutoFeatureExtractor.from_pretrained(
    model_id, do_normalize=True, return_attention_mask=True
)