# Deep Learning Guitar Classification Project

This is Deeplearning Project with limited amount of data to classify images of guitars into two classes.

*   Acoustic Guitar
*   Electric Guitar




In [None]:
#Import Libraries 
import os
import glob
import random
import zipfile
import shutil
from tqdm import tqdm
import IPython.display as display
from PIL import Image
import numpy as np
from sklearn.metrics import confusion_matrix

In [None]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
# using fastai vision library
from fastai.vision import *
from fastai.metrics import error_rate

In [None]:
#unzip file and extract data
with zipfile.ZipFile('image_data/GuitarDatasetChallenge.zip', 'r') as zip_ref:
    zip_ref.extractall('/data')

In [None]:
#path to the extracted data folder
path = '/data/'

# **Make data folder function**
This function performs the following tasks to design the directories similar to imagenet

*   Splits data into standard train , test and validation set 
*   Makes new directory to the splitted data 

*   Removes the initial directory with data





In [None]:
def make_data_folders(path):
  os.makedirs(os.path.join(path,'GuitarDatasetChallenge/train/acoustic'))
  os.makedirs(os.path.join(path,'GuitarDatasetChallenge/valid/electric'))
  os.makedirs(os.path.join(path,'GuitarDatasetChallenge/test'))
  os.makedirs(os.path.join(path,'GuitarDatasetChallenge/train/electric'))
  os.makedirs(os.path.join(path,'GuitarDatasetChallenge/valid/acoustic'))
  delimeter ='/*jpg'
  acq_dir = glob.glob(os.path.join(path , 'GuitarDatasetChallenge/acoustic') + delimeter )
  elec_dir = glob.glob(os.path.join(path , 'GuitarDatasetChallenge/electric') + delimeter )
  random.seed(55)
  random.shuffle(acq_dir)
  random.shuffle(elec_dir)
  train_percentage = 0.75
  valid_percentage = 0.15
  print('\nsplitting and moving acoustic guitar images to train, test and valid sub-directories')
  for i in tqdm(range(0, len(acq_dir))):
    if i <= int(len(acq_dir) * train_percentage):
      shutil.move(acq_dir[i], os.path.join(path,'GuitarDatasetChallenge/train/acoustic'))
    elif i <=int(len(acq_dir) * valid_percentage)+ int(len(acq_dir) * train_percentage):
      shutil.move(acq_dir[i], os.path.join(path,'GuitarDatasetChallenge/valid/acoustic'))
    else:
      shutil.move(acq_dir[i], os.path.join(path,'GuitarDatasetChallenge/test'))
    
  print('splitting and moving electric guitar images to train, test and valid sub-directories')

  for i in tqdm(range(0, len(elec_dir))):
      if i <= int(len(elec_dir) * train_percentage):
        shutil.move(elec_dir[i], os.path.join(path,'GuitarDatasetChallenge/train/electric'))
      elif i <=int(len(elec_dir) * valid_percentage)+ int(len(elec_dir) * train_percentage):
        shutil.move(elec_dir[i], os.path.join(path,'GuitarDatasetChallenge/valid/electric'))
      else:
        shutil.move(elec_dir[i], os.path.join(path,'GuitarDatasetChallenge/test'))
  train_elec = len(glob.glob(os.path.join(path,'GuitarDatasetChallenge/train/electric') + delimeter))
  valid_elec = len(glob.glob(os.path.join(path,'GuitarDatasetChallenge/valid/electric') + delimeter))
  train_acq = len(glob.glob(os.path.join(path,'GuitarDatasetChallenge/train/acoustic') + delimeter))
  valid_acq = len(glob.glob(os.path.join(path,'GuitarDatasetChallenge/valid/acoustic') + delimeter))
  print('finished moving acoustic and electric guitar images')
  print(f'There are {train_elec} electric train data')
  print(f'There are {valid_elec} electric valid data')
  print(f'There are {train_acq} acoustic train data')
  print(f'There are {valid_acq} acoustic valid data')
  shutil.rmtree(os.path.join(path,'GuitarDatasetChallenge/electric'))
  shutil.rmtree(os.path.join(path,'GuitarDatasetChallenge/acoustic'))
    


In [None]:
make_data_folders(path)

In [None]:
#batch size
bs = 4

In [None]:
path_2 =os.path.join(path, 'GuitarDatasetChallenge' )


In [None]:
#path to images
path_to_im = Path(path_2)

In [None]:
path_to_im

In [None]:

# read the images using fastai standard dataloader, perform random data augmentation , resize the images and normalize images with imagenet stats
data = (ImageList.from_folder(path_to_im)
        .split_by_folder()          
        .label_from_folder()
        .add_test_folder('test')
        .transform(tfms=get_transforms(), size=224)
        .databunch(bs=bs)
        .normalize(imagenet_stats)) 

In [None]:
# view few pictures just to get a sense howof the images look like
data.show_batch(rows=3, figsize=(7,6))

In [None]:
# view the data classes available in the dataset
print(data.classes)
len(data.classes),data.c

In [None]:
#load a pretrained resnet conv net with 18 layers
learn = cnn_learner(data, models.resnet18, metrics=error_rate)

In [None]:
#summary of the model just to see the overall architecture
learn.model

In [None]:
??learn.fit_one_cycle()

In [None]:
# train the model using one fit cycle policy 
learn.fit_one_cycle(3)

In [None]:
interp = ClassificationInterpretation.from_learner(learn)

losses,idxs = interp.top_losses()

len(data.valid_ds)==len(losses)==len(idxs)

In [None]:
interp.plot_top_losses(9, figsize=(15,11))

In [None]:
interp.plot_confusion_matrix(figsize=(12,12), dpi=60)

In [None]:
path_to_im

## save and make predictions on the test set

In [None]:
learn.save('guitar_model')

In [None]:
learn.export()


In [None]:
learn = load_learner(path_to_im)


In [None]:
img = data.train_ds[0][0]
learn.predict(img)

In [None]:
learn = load_learner(path_to_im, test=ImageList.from_folder(path_to_im/'test'))


In [None]:
preds, y, losses = learn.get_preds(ds_type=DatasetType.Test, with_loss=True)
y_preds = torch.argmax(preds, dim=1)