In [None]:
import numpy as np
import librosa
import librosa.display
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import os
import requests
import zipfile
import io

In [None]:
# Funkcja do ekstrakcji mel-spektrogramu
def extract_features(file_path, n_mfcc=40, max_len=130):
    audio, sr = librosa.load(file_path, duration=30)
    mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=n_mfcc)
    if mfcc.shape[1] < max_len:
        pad_width = max_len - mfcc.shape[1]
        mfcc = np.pad(mfcc, pad_width=((0, 0), (0, pad_width)), mode='constant')
    else:
        mfcc = mfcc[:, :max_len]
    return mfcc

In [None]:
# URL do zbioru danych GTZAN
url = "http://opihi.cs.uvic.ca/sound/genres.tar.gz"

# Pobierz plik tar.gz
try:
    response = requests.get(url, stream=True)
    response.raise_for_status()  # Raise HTTPError for bad responses (4xx or 5xx)

    # Extract the archive
    z = zipfile.ZipFile(io.BytesIO(response.content))
    z.extractall("/content/gtzan") # Extract to /content/gtzan

    print("Dataset downloaded and extracted successfully.")

except requests.exceptions.RequestException as e:
    print(f"Error downloading dataset: {e}")
except zipfile.BadZipFile as e:
    print(f"Error extracting dataset: {e}")


Error downloading dataset: HTTPConnectionPool(host='opihi.cs.uvic.ca', port=80): Max retries exceeded with url: /sound/genres.tar.gz (Caused by ConnectTimeoutError(<urllib3.connection.HTTPConnection object at 0x7fd611d98250>, 'Connection to opihi.cs.uvic.ca timed out. (connect timeout=None)'))


In [None]:
# Ścieżka do pobranego zbioru danych
data_path = "/content/gtzan/genres"

# Lista gatunków muzycznych
genres = os.listdir(data_path)

# Inicjalizacja pustych list do przechowywania cech i etykiet
features = []
labels = []

# Ekstrakcja cech z plików audio
for genre in genres:
    genre_path = os.path.join(data_path, genre)
    for file in os.listdir(genre_path):
        file_path = os.path.join(genre_path, file)
        mfcc = extract_features(file_path)
        features.append(mfcc)
        labels.append(genre)

# Konwersja do tablic numpy
features = np.array(features)
labels = np.array(labels)


# Normalizacja danych
scaler = StandardScaler()
features = scaler.fit_transform(features.reshape(-1, features.shape[-1])).reshape(features.shape)

# Kodowanie etykiet
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(labels)

# Podział na zbiory treningowy i testowy
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)


# Dodanie wymiaru kanałów dla CNN
X_train = X_train[..., np.newaxis]
X_test = X_test[..., np.newaxis]

print(X_train.shape, X_test.shape)
print(y_train.shape, y_test.shape)

In [None]:


# Konwersja danych do tablic numpy
data = np.array(data)
labels = np.array(labels)

# Normalizacja danych
data = data / np.max(data)

# Kodowanie etykiet
encoder = LabelEncoder()
encoded_labels = encoder.fit_transform(labels)

# Rozdzielenie danych na zestaw treningowy i testowy
X_train, X_test, y_train, y_test = train_test_split(data, encoded_labels, test_size=0.2, random_state=42)

# Dodanie wymiaru kanałów (dla CNN)
X_train = X_train[..., np.newaxis]
X_test = X_test[..., np.newaxis]

FileNotFoundError: [Errno 2] No such file or directory: 'path_to_gtzan_dataset'

In [None]:
# Budowa modelu CNN
model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(40, 130, 1)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(len(genres), activation='softmax')
])

# Kompilacja modelu
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [None]:
# Trenowanie modelu
history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.2)


In [None]:
# Ocena modelu
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2)
print(f"Test accuracy: {test_acc}")


In [None]:
# Wykresy dokładności i strat
plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Accuracy')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Loss')
plt.legend()

plt.show()

In [None]:
# Funkcja do generowania i wyświetlania spektrogramu
def plot_spectrogram(file_path, title):
    audio, sr = librosa.load(file_path, duration=30)
    spectrogram = librosa.feature.melspectrogram(y=audio, sr=sr, n_mels=128, fmax=8000)
    log_spectrogram = librosa.power_to_db(spectrogram, ref=np.max)

    plt.figure(figsize=(10, 4))
    librosa.display.specshow(log_spectrogram, sr=sr, x_axis='time', y_axis='mel', fmax=8000, cmap='coolwarm')
    plt.colorbar(format='%+2.0f dB')
    plt.title(title)
    plt.tight_layout()
    plt.show()

# Przykładowe pliki do wyświetlenia spektrogramów
example_files = [
    os.path.join(DATA_PATH, genres[0], os.listdir(os.path.join(DATA_PATH, genres[0]))[0]),
    os.path.join(DATA_PATH, genres[1], os.listdir(os.path.join(DATA_PATH, genres[1]))[0])
]

# Wyświetlenie spektrogramów
plot_spectrogram(example_files[0], f"Spectrogram: {genres[0]}")
plot_spectrogram(example_files[1], f"Spectrogram: {genres[1]}")
