In [16]:
import pandas as pd
import os
import cv2
import numpy as np
import random
import pickle

from sklearn.model_selection import train_test_split

In [2]:
df = pd.read_csv("urbanSoundDataset/UrbanSound8K.csv")
df.head()

Unnamed: 0,slice_file_name,fsID,start,end,salience,fold,classID,class
0,100032-3-0-0.wav,100032,0.0,0.317551,1,5,3,dog_bark
1,100263-2-0-117.wav,100263,58.5,62.5,1,5,2,children_playing
2,100263-2-0-121.wav,100263,60.5,64.5,1,5,2,children_playing
3,100263-2-0-126.wav,100263,63.0,67.0,1,5,2,children_playing
4,100263-2-0-137.wav,100263,68.5,72.5,1,5,2,children_playing


In [3]:
images = []
numberOfErrors = 0
numberOfProcessed = 0

for index, row in df.iterrows():
    fileName = row["slice_file_name"]
    fold = row["fold"]
    classID = row["classID"]
    pathToImage = "urbanSoundDataset/images/fold{}/{}.png".format(fold, fileName.split(".")[0])
    try:
        img = cv2.imread(pathToImage, 0)
        img = cv2.resize(img, (128, 128))
        img = img/255
        images.append([img, int(classID)])
        numberOfProcessed += 1
    except:
        numberOfErrors += 1
print("Processed: {}\nErrors: {}".format(numberOfProcessed, numberOfErrors))

Processed: 8732
Errors: 0


In [4]:
print(len(images))

8732


In [5]:
for _ in range(10):
    random.shuffle(images)

In [6]:
X = []
Y = []

for image, idx in images:
    X.append(image)
    Y.append(idx)

In [7]:
X_train, X_temporary, Y_train, Y_temporary = train_test_split(X, Y, test_size = 0.2, random_state = 1) 

In [8]:
X_val, X_test, Y_val, Y_test = train_test_split(X_temporary, Y_temporary, test_size = 0.5, random_state = 1)

In [9]:
# Print the lengths of the X, X_train, X_val and X_test
print(f"Length of the dataset: {len(X)}")
print(f"Length of the training dataset: {len(X_train)}")
print(f"Length of the validation dataset: {len(X_val)}")
print(f"Length of the test dataset: {len(X_test)}")

Length of the dataset: 8732
Length of the training dataset: 6985
Length of the validation dataset: 873
Length of the test dataset: 874


In [10]:
X_train = np.array(X_train)
Y_train = np.array(Y_train)

X_val = np.array(X_val)
Y_val = np.array(Y_val)

X_test = np.array(X_test)
Y_test = np.array(Y_test)

In [12]:
X_train = X_train.reshape(-1, 128, 128, 1) 
X_val = X_val.reshape(-1, 128, 128, 1) 
X_test = X_test.reshape(-1, 128, 128, 1) 

In [18]:
if "pickles" not in os.listdir():
    os.mkdir("pickles")

with open("pickles/X_train.pickle", "wb") as f:
  pickle.dump(X_train, f)  
with open("pickles/y_train.pickle", "wb") as f:
  pickle.dump(Y_train, f)

with open("pickles/X_val.pickle", "wb") as f:
  pickle.dump(X_val, f)  
with open("pickles/y_val.pickle", "wb") as f:
  pickle.dump(Y_val, f)

with open("pickles/X_test.pickle", "wb") as f:
  pickle.dump(X_test, f)  
with open("pickles/y_test.pickle", "wb") as f:
  pickle.dump(Y_test, f)