<a href="https://colab.research.google.com/github/ThaDuyx/Classify/blob/dev/classify.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Preliminary tasks

In [None]:
# install libraries
%%capture
!pip install datasets
!pip install transformers
!pip install evaluate
!pip install accelerate -U

# Main tasks




In [None]:
# import libraries
import numpy as np
import evaluate
from datasets import load_dataset
from transformers import Trainer, TrainingArguments, AutoModelForAudioClassification, AutoFeatureExtractor
from huggingface_hub import notebook_login

In [None]:
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
%%capture
datasetName = "TheDuyx/augmented_bass_sounds"
datasetTag = "augmented_bass_sounds"
pre_name="bass4"

dataset = load_dataset(datasetName)

model_id = "ntu-spml/distilhubert"
feature_extractor = AutoFeatureExtractor.from_pretrained(
    model_id, do_normalize=True, return_attention_mask=True
)

id2label_fn = dataset["train"].features["label"].int2str

id2label = {
    str(i): id2label_fn(i)
    for i in range(len(dataset["train"].features["label"].names))
}

label2id = {v: k for k, v in id2label.items()}

num_labels = len(id2label)

model = AutoModelForAudioClassification.from_pretrained(
    model_id,
    num_labels=num_labels,
    label2id=label2id,
    id2label=id2label,
)

# CPU resources training

In [None]:
model_name = model_id.split("/")[-1]
batch_size = 256
gradient_accumulation_steps = 1
num_train_epochs = 3 # usually sat to 10

training_args = TrainingArguments(
    f"{model_name}-{pre_name}",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=5e-6,
    per_device_train_batch_size=batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=num_train_epochs,
    warmup_ratio=0.1,
    logging_steps=5,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    fp16=False,
    push_to_hub=True,
)

metric = evaluate.load("accuracy")


def compute_metrics(eval_pred):
    """Computes accuracy on a batch of predictions"""
    predictions = np.argmax(eval_pred.predictions, axis=1)
    return metric.compute(predictions=predictions, references=eval_pred.label_ids)

trainer = Trainer(
    model,
    training_args,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    tokenizer=feature_extractor,
    compute_metrics=compute_metrics,
)

trainer.train()

kwargs = {
    "dataset_tags": datasetTag,
    "dataset": datasetName,
    "model_name": f"{model_name}-{pre_name}",
    "finetuned_from": model_id,
    "tasks": "audio-classification",
}

trainer.push_to_hub(**kwargs)

# GPU resources training

In [None]:
model_name = model_id.split("/")[-1]
batch_size = 256
gradient_accumulation_steps = 1
num_train_epochs = 4 # usually sat to 10

training_args = TrainingArguments(
    f"{model_name}-{pre_name}",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=5e-4,
    per_device_train_batch_size=batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=num_train_epochs,
    warmup_ratio=0.1,
    logging_steps=5,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    fp16=True,
    push_to_hub=True,
)

metric = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    """Computes accuracy on a batch of predictions"""
    predictions = np.argmax(eval_pred.predictions, axis=1)
    return metric.compute(predictions=predictions, references=eval_pred.label_ids)

trainer = Trainer(
    model,
    training_args,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    tokenizer=feature_extractor,
    compute_metrics=compute_metrics,
)

trainer.train()

kwargs = {
    "dataset_tags": datasetTag,
    "dataset": datasetName,
    "model_name": f"{model_name}-{pre_name}",
    "finetuned_from": model_id,
    "tasks": "audio-classification",
}

trainer.push_to_hub(**kwargs)

Epoch,Training Loss,Validation Loss,Accuracy
1,0.0395,0.055208,0.984421
2,0.0045,0.005416,0.998236
3,0.0028,0.005085,0.99853
4,0.0005,0.001316,0.999706


CommitInfo(commit_url='https://huggingface.co/TheDuyx/distilhubert-bass4/commit/79036861cdc3011781ebfceab241f5508b2a6476', commit_message='End of training', commit_description='', oid='79036861cdc3011781ebfceab241f5508b2a6476', pr_url=None, pr_revision=None, pr_num=None)