# Instrument recognition


## `Import packages & mount google drive`

In [1]:
import os       # For path-manipulations etc
import librosa  # For all kinds of audio manipulations
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# Mount google drive
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/drive


## Import data

In [14]:
def import_data(data_dir, ins_annotations, max_songs_per_annotation = None):
  """
  Imports the data.

  Parameters
  ----------

  data_dir : string
    Path of the directory that contains the data. It is assumed that 
    this directory contains subdirectories, whose titles are given by
    the annotations of the various instruments. The actual sound-files 
    are inside these subfolders.

  ins_annotations : array of strings
    List of the annotations of the various annotations - these strings 
    also the names of the subfolders that contain the data with that 
    annotation.

  max_songs_per_annotations : int, optional
    Maximum number of songs to include per annotation. Useful to test my code
    while working only a smaller dataset.
  Returns
  -------

  sample_rates : np.array of ints
    List of the sample-rates (in Hz) of all the music-files

  data : list of np.arrays
    List of time-domain data (in np.array format) of all music snippets.

  classifications : np.array of strings
    List of the annotation for every song - the annotation is for
    the predominant instrument.

  """

  # Initalize the to-be-returned np.arrays
  sample_rates = np.array([])
  data = [] # note: its type is list, not np.array, because the data-signals 
            # it contains may - in principle - be of different length
  classifications = np.array([])

  # Import
  for annotation in ins_annotations:

    print(f"Importing data from annotation '{annotation}'")
    data_subdir = os.path.join(data_dir,annotation)

    for filename in os.listdir(data_subdir)[:max_songs_per_annotation]:

      dt, sr = librosa.load( os.path.join(data_subdir,filename), sr=None, mono = False )

      data.append(dt)
      sample_rates = np.append(sample_rates,sr)
      classifications = np.append(classifications,annotation)

  return sample_rates, data, classifications

In [15]:
# Set working directory for this project
base_dir = '/content/drive/My Drive/Colab Notebooks/instrument_recognition'
os.chdir(base_dir)

# Data directory:
data_dir = os.path.join(base_dir,'data','IRMAS','trainingdata')
# List of the strings used as instrument annotations
ins_annotations = ['cel','cla','flu','gac','gel','org','pia','sax','tru','vio','voi']

#Import data
sample_rates, data, classifications = import_data(data_dir, ins_annotations, max_songs_per_annotation = 20)

Importing data from annotation 'cel'
Importing data from annotation 'cla'
Importing data from annotation 'flu'
Importing data from annotation 'gac'
Importing data from annotation 'gel'
Importing data from annotation 'org'
Importing data from annotation 'pia'
Importing data from annotation 'sax'
Importing data from annotation 'tru'
Importing data from annotation 'vio'
Importing data from annotation 'voi'


## Preprocessing

In [20]:
data[1][0]

array([ 0.00869751,  0.0098877 ,  0.01147461, ..., -0.0201416 ,
       -0.01913452, -0.0178833 ], dtype=float32)

In [55]:
range(0,)

TypeError: ignored

In [46]:
plt.figure()
plt.plot(range(0,3,1./sample_rates[3]), data[3][0])
plt.plot(data[3][1])

AttributeError: ignored

<Figure size 432x288 with 0 Axes>

## Train a classifier