<a href="https://colab.research.google.com/github/florian-frey/AudioSimilarity/blob/main/src/DataPreprocessing/SpectrogramPreprocessingASS2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Convert Music to Spectrograms

This notebook preprocesses spectrograms created with the FMA music dataset further.

This includes resizing all data to one specific shape (x\*1920\*3) and splitting the data in diagrams representing three seconds each.

## 1 Imports

In [1]:
# import necessary librarys
import numpy as np
import pandas as pd
import cv2
import shutil
from tqdm import tqdm
import os

In [2]:
# connect to google drive --> allows to store kaggle.json API token under the main folder in the drive,
#   so that it doesn't have to be uploaded every time
# Alternatively the kaggle.json can be uploaded under /content/

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# 2 Preprocess data further

In [3]:
# extract all images of the first folder

for i in range(3):
  shutil.unpack_archive(
      f"/content/drive/MyDrive/All_Spectrogramms/"\
      f"All_Spectrogramms{i}.zip", "/content/data/")

In [4]:
# extract all images of the second folder

for i in range(4, 16):
  shutil.unpack_archive(
      f"/content/drive/MyDrive/All_Spectrogramms (1)/"\
      f"All_Spectrogramms{i}.zip", "/content/data/")

In [5]:
# delete all empty folders

DATA_DIR = "/content/data/"
for item in os.listdir("/content/data/"):
  if len(os.listdir(os.path.join(DATA_DIR, item))) == 0:
    os.rmdir(os.path.join(DATA_DIR, item))

In [6]:
# read original data and store it in separated in images and labels

total_images = []
total_labels = []

for root, dirs, files in os.walk(DATA_DIR):
    for file in files:
        if file.endswith(".jpg"):
            path = os.path.join(root, file)
            total_images.append(path)
            #Extract the name of class from the root
            total_labels.append(root.split(os.path.sep)[-1])

print('Image Root: ', total_images[0])
print('Label: ', total_labels[0])

Image Root:  /content/data/spectral_contrast/Rock/137632_spectral_contrast.jpg
Label:  Rock


In [7]:
# check number of total images

len(total_images)

22491

In [8]:
# check the shape of the data and the aspect ratio of the images
#   for the shape the first twenty different shapes will be stored
#   and for the aspect ratio of the images the first 100 different

list_of_shapes = list()
list_of_formats = list()
for path in total_images:
  image_temp = cv2.imread(path)
  image_shape_temp = image_temp.shape
  list_of_shapes.append(image_shape_temp)
  list_of_formats.append(image_shape_temp[0]/image_shape_temp[1])

In [9]:
# print the maximum and minimum shape object

print(f"""Smallest detected shape: {list(sorted(list_of_shapes))[0]}
Largest detected shape: {list(sorted(list_of_shapes))[-1]}""")

Smallest detected shape: (7, 22, 3)
Largest detected shape: (128, 1293, 3)


In [10]:
# get an overview of different exisiting shapes

pd.Series(list_of_shapes).value_counts()

(12, 1291, 3)     3963
(128, 1291, 3)    3963
(7, 1291, 3)      3963
(7, 1293, 3)      3142
(12, 1293, 3)     3142
(128, 1293, 3)    3142
(7, 1292, 3)       389
(128, 1292, 3)     389
(12, 1292, 3)      389
(12, 66, 3)          1
(12, 70, 3)          1
(128, 66, 3)         1
(128, 70, 3)         1
(128, 22, 3)         1
(7, 22, 3)           1
(7, 66, 3)           1
(7, 70, 3)           1
(12, 22, 3)          1
dtype: int64

In [11]:
# function to resize images to one specific size

def resize_image(image_path, new_width, new_height):
    # Load the image
    image = cv2.imread(image_path)

    # Resize the image
    resized_image = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_LINEAR)

    # Save the resized image
    cv2.imwrite(image_path, resized_image)

In [12]:
# resize all images that have a width over the default 1291
#   and delete all smaller image outlier

indices_to_delete: list = []
for counter, item in enumerate(list_of_shapes):
  if item[1] > 1290:
    resize_image(total_images[counter], 1290, item[0])
  elif item[1] < 1290:
    os.remove(total_images[counter])

In [13]:
# define target path for the images

target_path_spectrograms = "/content/drive/MyDrive/AllSpectrogramsASS/"
# shutil.rmtree(target_path_spectrograms)
if not os.path.exists(target_path_spectrograms):
  shutil.copytree("/content/data/", target_path_spectrograms)

In [14]:
# read original data and store it in separated in images and labels
#   duplicate of one cell above to not consider already deleted images

total_images = []
total_labels = []

for root, dirs, files in os.walk(DATA_DIR):
    for file in files:
        if file.endswith(".jpg"):
            path = os.path.join(root, file)
            total_images.append(path)
            #Extract the name of class from the root
            total_labels.append(root.split(os.path.sep)[-1])

print('Image Root: ', total_images[0])
print('Label: ', total_labels[0])

Image Root:  /content/data/spectral_contrast/Rock/137632_spectral_contrast.jpg
Label:  Rock


In [15]:
# display new lenght --> 9 images less

len(total_images)

22482

In [28]:
# split image in nearly ten parts --> 3 second snippets

for path in tqdm(total_images):
  image_temp = cv2.imread(path)
  for counter, i in enumerate(range(0, 1280, 64)):
    target_dir = "data_3_seconds/"+path.split("/")[2]+"/"+path.split("/")[3]+"/"+path.split("/")[4]+"/"
    os.makedirs(target_dir, exist_ok = True)
    target_path = path.split("/")[-1].split(".")[0] + str(counter) + ".jpg"
    cv2.imwrite(target_dir + target_path, image_temp[:, i:(i+128), :])

100%|██████████| 22482/22482 [03:45<00:00, 99.87it/s] 


In [30]:
# archive the created three seconds images on google drive for persistence

output_dir = "/content/drive/MyDrive/AllSpectrograms3Seconds"
shutil.make_archive(output_dir, "zip", f"/content/data_3_seconds/data/")

'/content/drive/MyDrive/AllSpectrograms3Seconds.zip'