In [1]:
import torch
import matplotlib.pyplot as plt
import librosa
import json
from torch import nn
import numpy as np
import d2l.torch as d2l
from torch.utils.data import DataLoader,TensorDataset
import math
import random


In [2]:
DATA_DIR = './genres'
JSON_PATH = 'data.json'
LEARNING_RATE = 1e-3
WEIGHT_DECAY = 0.9
TRAIN_OVDER_ALL = 0.7
BATCH_SIZE = 32
SAMPLE_RATE = 22050
TRACK_DURATION = 30 # seconds
SAMPLE_PER_TRACK = SAMPLE_RATE * TRACK_DURATION
if torch.cuda.is_available():
    device = 'cuda'
else:
    device = 'cpu'

In [3]:
def load_data(json_path):
    with open(json_path,'r') as f:
        data = json.load(f,encoding='gb18030')
    X, y = np.array(data['mfcc']), np.array(data['labels'])
    return X, y

X, y = load_data(JSON_PATH) 
X = np.expand_dims(X,1).astype(np.float32)
X, y = torch.from_numpy(X), torch.from_numpy(y)

data_loader = TensorDataset(X,y)

In [None]:
def split_train_test(data_loader,ratio=0.7):
    cnt = dict()
    for X,y in data_loader:
        if y.item() in cnt.keys():
            cnt[y.item()]+=ratio
        else:
            cnt[y.item()]=ratio
    
    train_dataloader = []
    test_dataloader = []
    index = [i for i in range(len(data_loader))]
    random.shuffle(index)
    for i in range(len(index)):
        X,y = data_loader[index[i]]
        cnt[y.item()]-=1
        if cnt[y.item()]>=0:
            train_dataloader.append((X,y))
        else :
            test_dataloader.append((X,y))
    return train_dataloader, test_dataloader

In [None]:
train_dataloader, test_dataloader = split_train_test(data_loader,TRAIN_OVDER_ALL)
train_dataloader = DataLoader(train_dataloader,batch_size=BATCH_SIZE)
test_dataloader = DataLoader(test_dataloader,batch_size=BATCH_SIZE)

In [None]:
def train(model, train_dataloader, test_dataloader, optimizer, loss_fn, device,
          epochs):
    animator = d2l.Animator(xlabel='epoch',
                            xlim=[1, epochs],
                            ylim=[0, 1],
                            legend=['train acc', 'test acc'])
    n_batches = len(train_dataloader)
    for epoch in range(epochs):
        metric = d2l.Accumulator(2)
        model.train()
        for i, (X, y) in enumerate(train_dataloader):
            X, y = X.to(device), y.to(device)
            predict = model(X)
            optimizer.zero_grad()
            loss = loss_fn(predict, y.long()).sum()
            loss.backward()
            with torch.no_grad():
                metric.add(d2l.accuracy(predict, y), X.shape[0])
            optimizer.step()
            train_acc = metric[0] / metric[1]
            if (i + 1) % (n_batches // 5) == 0 or i == n_batches - 1:
                animator.add(epoch + (i + 1) / n_batches, (train_acc, None))
        test_acc = d2l.evaluate_accuracy_gpu(model, test_dataloader)
        animator.add(epoch + 1, (None, test_acc))
    print(f'train acc {train_acc:.3f}, '
          f'test acc {test_acc:.3f}')


In [None]:
class CNN(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=3, padding=1),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.Conv2d(16, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)
        )

        self.conv2 = nn.Sequential(
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
        )
        self.conv3 = nn.Sequential(
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
        )
        self.pool = nn.MaxPool2d(kernel_size=2)
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, 128),
            nn.ReLU(),
            nn.Linear(128, num_classes)
        )

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x) + x #删除+x为去除残差
        x = self.pool(x)
        x = self.conv3(x) + x
        x = self.pool(x)
        x = self.classifier(x)
        return x

In [None]:
model = CNN(10).to(device) 
optimizer = torch.optim.AdamW(model.parameters(), lr=3e-4,weight_decay=1e-3)
loss_fn = nn.CrossEntropyLoss()

In [None]:
train(model,train_dataloader,test_dataloader,optimizer,loss_fn,device,100)