In [1]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np


In [None]:

# Hyperparameters
BATCH_SIZE = 32
NUM_CLASSES = 4
IMAGE_SIZE = 128
DATA_PATH = '/Users/abynaya/code/project_akhir_uas/dataset dl/bener_output'


***OLD MODEL***

In [None]:
# CNN Model (updated for 3-channel input)
class GenreCNN(nn.Module):
    def __init__(self):
        super(GenreCNN, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(3, 32, 3, padding=1),  # now using 3 input channels
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(32, 64, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(64, 128, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.fc = nn.Sequential(
            nn.Linear(128 * (IMAGE_SIZE // 8) * (IMAGE_SIZE // 8), 256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, NUM_CLASSES)
        )

    def forward(self, x):
        x = self.conv(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x


In [None]:

# Initialize model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = resnet18_genre_classifier().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
for epoch in range(EPOCHS):
    model.train()
    total_loss = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    avg_loss = total_loss / len(train_loader)

    # Validation
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    val_accuracy = 100 * correct / total
    print(f"Epoch {epoch+1}/{EPOCHS} | Loss: {avg_loss:.4f} | Validation Accuracy: {val_accuracy:.2f}%")

# Save model
torch.save(model.state_dict(), 'genre_cnn_model_rgb.pth')
print("Model saved as 'genre_cnn_model_rgb.pth'")

In [None]:
# Save model after training
torch.save(model.state_dict(), '/content/drive/MyDrive/genre_cnn_baru.pth')
print("Model saved to Google Drive")

In [19]:
import torch
from torchvision.datasets import ImageFolder
from torchvision import transforms
from torch.utils.data import DataLoader, random_split

# Define your data directory
DATA_DIR = '/Users/abynaya/code/project_akhir_uas/dataset dl/bener_output'
BATCH_SIZE = 32  # You can adjust your batch size here


transform = transforms.Compose([
    transforms.ToTensor(),

])

# 2. Load the full dataset using ImageFolder
full_dataset = ImageFolder(DATA_DIR, transform=transform)

# 3. Calculate split lengths
dataset_size = len(full_dataset)
train_size = int(0.6 * dataset_size)
test_size = dataset_size - train_size # The remaining for the test set

# 4. Perform the random split
# random_split returns two Subset objects, which are like datasets but hold a subset of indices
train_dataset, test_dataset = random_split(full_dataset, [train_size, test_size])

# 5. Create DataLoaders for both train and test sets
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True) # Shuffle training data
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False) # No need to shuffle test data

print(f"Total dataset size: {dataset_size}")
print(f"Training dataset size: {len(train_dataset)}")
print(f"Testing dataset size: {len(test_dataset)}")
print(f"Number of training batches: {len(train_loader)}")
print(f"Number of testing batches: {len(test_loader)}")

# You can iterate through your loaders like this:
# for images, labels in train_loader:
#     # Your training logic here
#     pass

# for images, labels in test_loader:
#     # Your testing/evaluation logic here
#     pass

Total dataset size: 430
Training dataset size: 258
Testing dataset size: 172
Number of training batches: 9
Number of testing batches: 6


***NEW MODEL***

In [20]:
import torch
import torch.nn as nn
import os

# --- Global variables (replace with your actual values) ---
IMAGE_SIZE = 128  # Example image size
NUM_CLASSES = 4  # Example number of genre classes
# -----------------------------------------------------------

class AlexNetGenreClassifier(nn.Module):
    """
    An AlexNet-based model for genre classification.
    """
    def __init__(self, num_classes=NUM_CLASSES):
        super(AlexNetGenreClassifier, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

def alexnet_genre_classifier():
    """
    Constructs an AlexNet model for genre classification.
    """
    return AlexNetGenreClassifier()



In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
# Assume resnet18_genre_classifier, train_loader, test_loader, device are defined

EPOCHS = 160
PATIENCE = 50  # Number of epochs to wait for improvement
min_val_loss = float('inf') # Initialize with a very large number
epochs_no_improve = 0 # Counter for epochs without improvement
early_stop = False

# Initialize model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = alexnet_genre_classifier().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
for epoch in range(EPOCHS):
    if early_stop:
        print(f"Early stopping triggered after {epoch} epochs.")
        break

    model.train()
    total_loss = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    avg_train_loss = total_loss / len(train_loader) # Renamed for clarity

    # Validation
    model.eval()
    val_correct = 0
    val_total = 0
    val_loss = 0 # Initialize validation loss for the current epoch
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels) # Calculate loss on validation set
            val_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            val_total += labels.size(0)
            val_correct += (predicted == labels).sum().item()

    avg_val_loss = val_loss / len(test_loader) # Average validation loss
    val_accuracy = 100 * val_correct / val_total

    print(f"Epoch {epoch+1}/{EPOCHS} | Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f} | Validation Accuracy: {val_accuracy:.2f}%")

    # Early Stopping Logic
    if avg_val_loss < min_val_loss:
        min_val_loss = avg_val_loss
        epochs_no_improve = 0
        # Optionally, save the best model weights here
        torch.save(model.state_dict(), 'best_genre_alexnet.pth')
        print("Validation loss improved. Saving best model.")
    else:
        epochs_no_improve += 1
        print(f"Validation loss did not improve for {epochs_no_improve} epoch(s).")
        if epochs_no_improve >= PATIENCE:
            early_stop = True

# Load the best model if early stopping occurred and you saved it
if early_stop:
    print("Loading best model weights before final save.")
    model.load_state_dict(torch.load('best_genre_resnet18_model.pth'))
    # You might want to rename the final saved model to reflect it's the best one
    torch.save(model.state_dict(), 'final_early_stopped_genre_alexnet18_model.pth')
    print("Final model saved as 'final_early_stopped_genre_alexnett18_model.pth'")
else:
    # If training completed all epochs without early stopping
    torch.save(model.state_dict(), 'genre_resnet18_model_full_epochs.pth')
    print("Model saved as 'genre_resnet18_model_full_epochs.pth'")

Epoch 1/160 | Train Loss: 1.4308 | Val Loss: 1.3899 | Validation Accuracy: 19.77%
Validation loss improved. Saving best model.
Epoch 2/160 | Train Loss: 1.3941 | Val Loss: 1.3715 | Validation Accuracy: 23.84%
Validation loss improved. Saving best model.


KeyboardInterrupt: 

**Convert To Spectogram**

In [None]:

# Input and output folders
input_root = '/content/drive/MyDrive/dataset dl/data primer'
output_root = '/content/drive/MyDrive/dataset dl/waveform secondary data'

# Loop through genre folders
for genre in os.listdir(input_root):
    genre_path = os.path.join(input_root, genre)
    if not os.path.isdir(genre_path):
        continue

    output_genre_path = os.path.join(output_root, genre)
    os.makedirs(output_genre_path, exist_ok=True)

    # Loop through each audio file
    for filename in os.listdir(genre_path):
        if not filename.lower().endswith(('.wav', '.mp3', '.au')):
            continue

        filepath = os.path.join(genre_path, filename)
        try:
            y, sr = librosa.load(filepath, sr=None, mono=True)

            S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
            S_DB = librosa.power_to_db(S, ref=np.max)

            plt.figure(figsize=(2.56, 2.56), dpi=50)  # 128x128 pixels
            librosa.display.specshow(S_DB, sr=sr, cmap='magma')
            plt.axis('off')

            output_file = os.path.join(output_genre_path, filename.rsplit('.', 1)[0] + '.png')
            plt.savefig(output_file, bbox_inches='tight', pad_inches=0)
            plt.close()

            print(f"[✔] Saved: {output_file}")

        except Exception as e:
            print(f"[✘] Failed to process {filepath}: {e}")

In [18]:
import torch
import os
# Assuming 'model', 'test_loader', 'device' are already defined and 'full_dataset' from the splitting example.

# Evaluation mode
model.eval()
correct = 0
total = 0

# --- FIX START ---
# Get class names from the original full_dataset
class_names = full_dataset.classes

# Get all file paths from the original full_dataset
# test_dataset is a Subset, so its 'samples' are not directly accessible in the same way.
# We need to map the indices of the test_dataset (Subset) back to the original full_dataset's samples.
original_file_paths = [path for path, _ in full_dataset.samples]

# Get the indices of the test_dataset (Subset)
test_indices = test_dataset.indices

# Create a list of file paths specifically for the test set
test_file_paths = [original_file_paths[i] for i in test_indices]

file_idx = 0 # This index will now correctly iterate through test_file_paths
# --- FIX END ---


with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)

        for i in range(images.size(0)):
            # Use test_file_paths for getting the filename
            filename = os.path.basename(test_file_paths[file_idx])
            true_class = class_names[labels[i].item()]
            pred_class = class_names[predicted[i].item()]
            print(f"[✓] File: {filename} | Predicted: {pred_class} | Actual: {true_class}")
            file_idx += 1

        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f'\nTest Accuracy: {accuracy:.2f}%')

[✓] File: rock.00073.png | Predicted: rock | Actual: rock
[✓] File: rock.00091.png | Predicted: rock | Actual: rock
[✓] File: Oasis - Don’t Look Back In Anger.png | Predicted: rock | Actual: rock
[✓] File: hiphop.00069.png | Predicted: pop | Actual: hiphop
[✓] File: rock.00015.png | Predicted: hiphop | Actual: rock
[✓] File: rock.00034.png | Predicted: rock | Actual: rock
[✓] File: hiphop.00009.png | Predicted: hiphop | Actual: hiphop
[✓] File: pop.00009.png | Predicted: pop | Actual: pop
[✓] File: hiphop.00042.png | Predicted: hiphop | Actual: hiphop
[✓] File: rock.00059.png | Predicted: rock | Actual: rock
[✓] File: classical.00081.png | Predicted: classical | Actual: classical
[✓] File: rock.00021.png | Predicted: rock | Actual: rock
[✓] File: classical.00092.png | Predicted: classical | Actual: classical
[✓] File: rock.00084.png | Predicted: rock | Actual: rock
[✓] File: hiphop.00085.png | Predicted: hiphop | Actual: hiphop
[✓] File: classical.00009.png | Predicted: classical | Act