# Spectrograms vectorizing

- functions for conversion of spectrograms to vectors (average of each frequency & average for each window)

In [1]:
# libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from PIL import Image
import matplotlib
import random
import time

In [10]:
# Loading of datasets

spectr_train_list = pd.read_csv("spectr_train_list.txt", index_col = 0).iloc[:,0].values.tolist()
spectr_test_list = pd.read_csv("spectr_test_list.txt", index_col = 0).iloc[:,0].values.tolist()
spectr_valid_list = pd.read_csv("spectr_valid_list.txt", index_col = 0).iloc[:,0].values.tolist()

# labels_train, labels_test
# spectrograms_train, spectrograms_test

labels_train = []
spectrograms_train = []

for path in spectr_train_list:
    labels_train.append(path.split("/")[1])
    
    
    rgb = matplotlib.image.imread(path)
    gray = np.mean(rgb, -1)
    spectrogram = (255*(gray - np.min(gray))/np.ptp(gray)).astype(int)  # [0, 255] scaled

    spectrograms_train.append(spectrogram)

labels_test = []
spectrograms_test = []

for path in spectr_test_list:
    labels_test.append(path.split("/")[1])
    
    rgb = matplotlib.image.imread(path)
    gray = np.mean(rgb, -1)
    spectrogram = (255*(gray - np.min(gray))/np.ptp(gray)).astype(int)  # [0, 255] scaled
    
    spectrograms_test.append(spectrogram)

labels_valid = []
spectrograms_valid = []

for path in spectr_valid_list:
    labels_valid.append(path.split("/")[1])
    
    rgb = matplotlib.image.imread(path)
    gray = np.mean(rgb, -1)
    spectrogram = (255*(gray - np.min(gray))/np.ptp(gray)).astype(int)  # [0, 255] scaled
    
    spectrograms_valid.append(spectrogram)


In [6]:
spectro = spectrograms_train[0]

In [4]:
# # each value as a separate feature: # NOT USED, SEEMS NOT GOOD TO NOT MAKE USE OF PIXELS 2D POSITIONS
# def s_vectorize1(s):
#     return s.reshape(s.shape[0]*s.shape[1])


In [17]:
# s_vectorize1(spectro) # 4096 values

array([0, 0, 0, ..., 0, 0, 0])

In [5]:
# means of rows of a spectrogram
def s_vectorize2(s):
    return np.mean(s, axis=1)


In [27]:
s_vectorize2(spectro) # 64 values

array([0.0000000e+00, 0.0000000e+00, 1.5625000e-02, 1.5625000e-02,
       4.6875000e-02, 3.5937500e-01, 3.2812500e-01, 1.3281250e+00,
       3.0937500e+00, 5.3125000e-01, 7.8125000e-02, 3.1250000e-02,
       3.1250000e-02, 4.6875000e-02, 1.5625000e-02, 0.0000000e+00,
       1.5625000e-02, 1.5625000e-02, 3.1250000e-02, 1.7187500e-01,
       1.7187500e-01, 1.4218750e+00, 2.3281250e+00, 1.5312500e+00,
       8.9062500e-01, 7.9687500e-01, 1.1406250e+00, 2.0468750e+00,
       2.3593750e+00, 2.5468750e+00, 6.9843750e+00, 6.3750000e+00,
       4.2812500e+00, 7.9843750e+00, 1.2234375e+01, 1.1796875e+01,
       1.8828125e+01, 1.0578125e+01, 1.1875000e+01, 4.7812500e+00,
       7.2031250e+00, 6.0156250e+00, 5.6718750e+00, 8.0156250e+00,
       6.1093750e+00, 8.3593750e+00, 7.2968750e+00, 8.7656250e+00,
       4.6406250e+00, 4.0156250e+00, 3.3750000e+00, 6.4062500e-01,
       3.4375000e-01, 2.9687500e-01, 9.6875000e-01, 1.5625000e+00,
       2.5468750e+00, 3.4062500e+00, 3.0781250e+00, 2.6718750e

In [6]:
# means of values inside each of 4x4 windows
def s_vectorize3(s):
    window_step_x = 4
    window_step_y = 4
    output_list = []
    for i in range(0,s.shape[0],window_step_x):
        for j in range(0,s.shape[1],window_step_y):
            output_list.append(np.mean(s[i:(i+4),j:(j+4)]))
            
    return np.array(output_list)        


In [35]:
s_vectorize3(spectro) # 4096 / window_step_x / window_step_y values --> 4096 /4 /4 values --> 256 values

array([0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00,
       0.00000e+00, 6.25000e-02, 0.00000e+00, 6.25000e-02, 0.00000e+00,
       0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00,
       0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00,
       0.00000e+00, 0.00000e+00, 0.00000e+00, 1.50000e+00, 3.50000e+00,
       2.31250e+00, 9.37500e-01, 0.00000e+00, 0.00000e+00, 0.00000e+00,
       0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00,
       0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00,
       1.31250e+00, 9.93750e+00, 3.68750e+00, 0.00000e+00, 0.00000e+00,
       0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00,
       0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00,
       0.00000e+00, 0.00000e+00, 3.12500e-01, 6.25000e-02, 0.00000e+00,
       0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00,
       0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00, 0.000

### VECTORIZATION 1 - too many features

In [11]:
# # train
# listka = []

# for s in spectrograms_train:
#     listka.append(s_vectorize1(s))

# data = pd.DataFrame(listka)
# data.to_csv("train1s.csv", index = None, header = None)

# # test
# listka = []

# for s in spectrograms_test:
#     listka.append(s_vectorize1(s))

# data = pd.DataFrame(listka)
# data.to_csv("test1s.csv", index = None, header = None)

# # valid
# listka = []

# for s in spectrograms_valid:
#     listka.append(s_vectorize1(s))

# data = pd.DataFrame(listka)
# data.to_csv("valid1s.csv", index = None, header = None)

KeyboardInterrupt: 

### VECTORIZATION 2

In [8]:
# # train
# listka = []

# for s in spectrograms_train:
#     listka.append(s_vectorize2(s))

# data = pd.DataFrame(listka)
# data.to_csv("train2s.csv", index = None, header = None)

# test
listka = []

for s in spectrograms_test:
    listka.append(s_vectorize2(s))

data = pd.DataFrame(listka)
data.to_csv("test2s.csv", index = None, header = None)

# valid
listka = []

for s in spectrograms_valid:
    listka.append(s_vectorize2(s))

data = pd.DataFrame(listka)
data.to_csv("valid2s.csv", index = None, header = None)

### VECTORIZATION 3

In [12]:
# train
listka = []

for s in spectrograms_train:
    listka.append(s_vectorize3(s))

data = pd.DataFrame(listka)
data.to_csv("train3s.csv", index = None, header = None)

# test
listka = []

for s in spectrograms_test:
    listka.append(s_vectorize3(s))

data = pd.DataFrame(listka)
data.to_csv("test3s.csv", index = None, header = None)

# valid
listka = []

for s in spectrograms_valid:
    listka.append(s_vectorize3(s))

data = pd.DataFrame(listka)
data.to_csv("valid3s.csv", index = None, header = None)

