<a href="https://colab.research.google.com/github/hameedolaniyi97/Classifying-Musical-Instruments-with-Transfer-Learning/blob/main/musical_instrument_Transfer_learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
from torch import nn, optim
import numpy as np
import timm
import wandb
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split
from sklearn.metrics import f1_score, precision_score, recall_score
import matplotlib.pyplot as plt
import os
from tqdm import tqdm

In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("nikolasgegenava/music-instruments")

print("Path to dataset files:", path)

Path to dataset files: /kaggle/input/music-instruments


In [None]:
data_path = os.path.join(path, 'music_instruments')

In [None]:
# setup the transforms

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    transforms.RandomHorizontalFlip(p=0.4),
    transforms.ColorJitter(brightness=0.2, saturation=0.2,
                           hue=0.1,contrast=0.1),
    transforms.Resize((200,200))
])

In [None]:
# load the dataset
data = datasets.ImageFolder(root=data_path, transform=transform)

In [None]:
# split the data into train and test set
train_size = int(0.6 * len(data))
test_size = len(data) - train_size
train_data, test_data = random_split(dataset=data, lengths=[train_size, test_size])

# repeat this step to split test (40% of the data) into train and test set to fit gpu.
train_size = int(0.8 * len(test_data))
test_size = len(test_data) - train_size
train_data, test_data = random_split(dataset=test_data, lengths=[train_size, test_size])


# setup the data loader
BATCH_SIZE = 32
train_loader = DataLoader(dataset = train_data, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(dataset=test_data, batch_size=BATCH_SIZE, shuffle=True)

In [None]:
# setup the model architecture and hyperparameters

model = timm.create_model(model_name='efficientnet_b3', pretrained=True)
model.classifier = nn.Linear(in_features=model.classifier.in_features, out_features=10)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
LEARNING_RATE = 1e-4
optimizer = optim.AdamW(params=model.parameters(), lr=LEARNING_RATE, weight_decay=0.01)
scheduler = optim.lr_scheduler.StepLR(optimizer=optimizer, step_size=2, gamma=0.1)
EPOCHS = 10
criterion = nn.CrossEntropyLoss()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors:   0%|          | 0.00/49.3M [00:00<?, ?B/s]

In [None]:
music_ins_list = ['accordion','banjo','drum',
               'flute','guitar','harmonica',
               'saxophone','sitar','tabla','violin']
len(music_ins_list)

10

In [None]:
# init wandb for tracking

run = wandb.init(
    project = 'music_instrument Classification With Fine Tuned EfficientNet-B3',
    name= 'second run',
    config= {
        'model_name': 'efficientnet-b3',
        'device': device,
        'data_path': data_path,
        'learning rate': LEARNING_RATE,
        'optimizer': 'AdamW',
        'weight decay': 0.01,
        'scheduler': {'step': 2, 'gamma': 0.1},
        'epochs': EPOCHS
    }
)

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mdatawhizcoder[0m ([33mdatawhizcoder-apple[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [None]:
# begin the training loop


for epoch in range(EPOCHS):
  train_epoch_loss, test_epoch_loss = [], []
  train_epoch_labels, train_epoch_preds = [], []
  test_epoch_labels, test_epoch_preds = [], []
  train_batch = tqdm(train_loader, desc= f'Training Epoch: {epoch+1}/{EPOCHS}')
  for image, label in train_batch:
    model.train()
    image, label = image.to(device), label.to(device)
    train_preds = model(image)
    loss = criterion(train_preds, label)

    # back propagation
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    train_epoch_loss.append(loss.item())
    train_epoch_labels.extend(label.cpu().detach().numpy())
    _, train_preds = torch.max(train_preds, 1)
    train_epoch_preds.extend(train_preds.cpu().detach().numpy())

    train_batch.set_postfix(loss=loss.item())

  scheduler.step()
  # compute the f1_score, precision and recall for training set
  train_final_loss = sum(train_epoch_loss)/len(train_epoch_loss)
  train_f1 = f1_score(train_epoch_labels, train_epoch_preds, average='weighted')
  train_precision = precision_score(train_epoch_labels, train_epoch_preds, average='weighted')
  train_recall = recall_score(train_epoch_labels, train_epoch_preds, average='weighted')

  # now evaluate
  model.eval()
  with torch.no_grad():
    test_batch = tqdm(test_loader, desc= f'Evaluating tests for Epoch: {epoch+1}/{EPOCHS}')
    for test_image, test_labels in test_batch:
      test_image, test_labels = test_image.to(device), test_labels.to(device)
      test_preds = model(test_image)


      test_loss = criterion(test_preds, test_labels)
      test_epoch_loss.append(test_loss.item())
      test_epoch_labels.extend(test_labels.cpu().detach().numpy())
      _, test_preds = torch.max(test_preds, 1)
      test_epoch_preds.extend(test_preds.cpu().detach().numpy())

  # compute the f1_score, precision and recall for test set
  test_final_loss = sum(test_epoch_loss)/len(test_epoch_loss)
  test_f1 = f1_score(test_epoch_labels, test_epoch_preds, average='weighted')
  test_precision = precision_score(test_epoch_labels, test_epoch_preds, average='weighted')
  test_recall = recall_score(test_epoch_labels, test_epoch_preds, average='weighted')

  print(f'Completed training Epoch: {epoch+1}..f1: {train_f1, test_f1}, loss: {train_final_loss, test_final_loss}')
  print(f'Completed training Epoch: {epoch+1}..recall: {train_recall, test_recall}, precision: {train_precision, test_precision}')

  # append result in wandb
  run.log({
      'epochs': epoch + 1,
      'train loss': train_final_loss,
      'test loss': test_final_loss,
      'train f1': train_f1,
      'train precision': train_precision,
      'train recall': train_recall,
      'test f1': test_f1,
      'test precision': test_precision,
      'test recall': test_recall
  })

run.finish()


Training Epoch: 1/10: 100%|██████████| 19/19 [00:12<00:00,  1.47it/s, loss=2.1]
Evaluating tests for Epoch: 1/10: 100%|██████████| 5/5 [00:02<00:00,  2.50it/s]


Completed training Epoch: 1..f1: (0.3141418324432297, 0.6284903071997995), loss: (2.1649285617627596, 1.9268031358718871)
Completed training Epoch: 1..recall: (0.3494809688581315, 0.6413793103448275), precision: (0.4032133603720137, 0.6750130419501617)


Training Epoch: 2/10: 100%|██████████| 19/19 [00:07<00:00,  2.38it/s, loss=2.04]
Evaluating tests for Epoch: 2/10: 100%|██████████| 5/5 [00:01<00:00,  4.49it/s]


Completed training Epoch: 2..f1: (0.7753826445209852, 0.7940261221414876), loss: (1.681069581132186, 1.4647547721862793)
Completed training Epoch: 2..recall: (0.7785467128027682, 0.7931034482758621), precision: (0.7910715781703527, 0.8112777840820437)


Training Epoch: 3/10: 100%|██████████| 19/19 [00:08<00:00,  2.27it/s, loss=1.95]
Evaluating tests for Epoch: 3/10: 100%|██████████| 5/5 [00:01<00:00,  4.72it/s]


Completed training Epoch: 3..f1: (0.8403357763968259, 0.7549891404899385), loss: (1.3454328775405884, 1.50155029296875)
Completed training Epoch: 3..recall: (0.842560553633218, 0.7517241379310344), precision: (0.8503330144635104, 0.7817265513865279)


Training Epoch: 4/10: 100%|██████████| 19/19 [00:08<00:00,  2.16it/s, loss=1.97]
Evaluating tests for Epoch: 4/10: 100%|██████████| 5/5 [00:01<00:00,  3.29it/s]


Completed training Epoch: 4..f1: (0.8622127015205767, 0.7908895512677494), loss: (1.2983701856512773, 1.4087446451187133)
Completed training Epoch: 4..recall: (0.8633217993079585, 0.7931034482758621), precision: (0.870591138270405, 0.8025396459524248)


Training Epoch: 5/10: 100%|██████████| 19/19 [00:09<00:00,  2.11it/s, loss=2.28]
Evaluating tests for Epoch: 5/10: 100%|██████████| 5/5 [00:01<00:00,  4.71it/s]


Completed training Epoch: 5..f1: (0.8509475179191435, 0.7916502168496172), loss: (1.283408798669514, 1.4232098579406738)
Completed training Epoch: 5..recall: (0.8529411764705882, 0.7931034482758621), precision: (0.8584027311376221, 0.8094152185430076)


Training Epoch: 6/10: 100%|██████████| 19/19 [00:08<00:00,  2.29it/s, loss=2.12]
Evaluating tests for Epoch: 6/10: 100%|██████████| 5/5 [00:01<00:00,  3.78it/s]


Completed training Epoch: 6..f1: (0.8580440653417901, 0.8003457738915262), loss: (1.287690049723575, 1.4231035947799682)
Completed training Epoch: 6..recall: (0.8598615916955017, 0.8), precision: (0.8663722030181764, 0.8351143813661163)


Training Epoch: 7/10: 100%|██████████| 19/19 [00:08<00:00,  2.25it/s, loss=2.01]
Evaluating tests for Epoch: 7/10: 100%|██████████| 5/5 [00:01<00:00,  4.70it/s]


Completed training Epoch: 7..f1: (0.8795463305958714, 0.7715933115210091), loss: (1.2511997975801166, 1.4422648429870606)
Completed training Epoch: 7..recall: (0.8806228373702422, 0.7724137931034483), precision: (0.883797947188812, 0.7868431081391474)


Training Epoch: 8/10: 100%|██████████| 19/19 [00:08<00:00,  2.20it/s, loss=2.06]
Evaluating tests for Epoch: 8/10: 100%|██████████| 5/5 [00:01<00:00,  4.65it/s]


Completed training Epoch: 8..f1: (0.8749368070566995, 0.7666610302352431), loss: (1.2535004678525423, 1.4213567972183228)
Completed training Epoch: 8..recall: (0.8771626297577855, 0.7655172413793103), precision: (0.8819048243906947, 0.7857106720189886)


Training Epoch: 9/10: 100%|██████████| 19/19 [00:08<00:00,  2.20it/s, loss=2.04]
Evaluating tests for Epoch: 9/10: 100%|██████████| 5/5 [00:01<00:00,  4.57it/s]


Completed training Epoch: 9..f1: (0.8660739241992864, 0.789635730612585), loss: (1.248556181004173, 1.423336958885193)
Completed training Epoch: 9..recall: (0.8685121107266436, 0.7931034482758621), precision: (0.8723891312418691, 0.8017264429282684)


Training Epoch: 10/10: 100%|██████████| 19/19 [00:08<00:00,  2.30it/s, loss=1.83]
Evaluating tests for Epoch: 10/10: 100%|██████████| 5/5 [00:01<00:00,  3.95it/s]

Completed training Epoch: 10..f1: (0.8778881960673635, 0.7797370739124863), loss: (1.2394441303453947, 1.404770851135254)
Completed training Epoch: 10..recall: (0.8788927335640139, 0.7793103448275862), precision: (0.8833300262680382, 0.8007193752832699)





0,1
epochs,▁▂▃▃▄▅▆▆▇█
test f1,▁█▆███▇▇█▇
test loss,█▂▂▁▁▁▂▁▁▁
test precision,▁▇▆▇▇█▆▆▇▆
test recall,▁█▆███▇▆█▇
train f1,▁▇████████
train loss,█▄▂▁▁▁▁▁▁▁
train precision,▁▇████████
train recall,▁▇▇███████

0,1
epochs,10.0
test f1,0.77974
test loss,1.40477
test precision,0.80072
test recall,0.77931
train f1,0.87789
train loss,1.23944
train precision,0.88333
train recall,0.87889


In [None]:
from PIL import Image

softmax = nn.Softmax()


def predict_music_ins() -> None:
  new_image = Image.open(input('Enter image url: ')).convert('RGB')
  new_image = transform(new_image).unsqueeze(0)
  new_image = new_image.to(device)
  preds = model(new_image)
  preds = softmax(preds)
  probability, prediction = torch.max(preds, 1)

  print(f'This is an image of {music_ins_list[prediction.item()]} with probability of {probability.item()}')




predict_music_ins()

Enter image url: /content/OIP (1).webp
This is an image of tabla with probability of 0.17019538581371307


  return self._call_impl(*args, **kwargs)
