# 导入所需要的库

In [1]:
import os
import torch
from torch.utils.data import Dataset,DataLoader
import torch.nn as nn 

import pandas as pd
import numpy as np
import csv

import math

from torch.utils.tensorboard import SummaryWriter
from tqdm import tqdm

# 参数

In [2]:
config = {
    'data_path':'./timit_11/',
    'seed':0,
    'batch_size':64,
    'learning_rate':0.0001,
    'n_epochs':20,
    'save_path':'./models/model.ckpt',
    'early_stop':5,
    'valid_ratio':0.2,
}
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cuda


一些有用的函数

In [3]:
def same_seeds(seed):
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)  
    np.random.seed(seed)  
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

# 数据模块
## 提取数据

In [4]:
train_audio = np.load(config['data_path']+'train_11.npy')
train_label = np.load(config['data_path']+'train_label_11.npy')
test_data = np.load(config['data_path']+'test_11.npy')
print('the shape of train_audio:{}'.format(train_audio.shape))
print('the shape of train_label:{}'.format(train_label.shape))
print('the shape of test_data:{}'.format(test_data.shape))

the shape of train_audio:(1229932, 429)
the shape of train_label:(1229932,)
the shape of test_data:(451552, 429)


## 划分训练集与验证集

In [5]:
def train_valid_set(train_audio,train_label, valid_ratio):
    percent = int(train_audio.shape[0]*(1-valid_ratio))
    x_train,y_train,x_valid,y_valid = train_audio[:percent],train_label[:percent],train_audio[percent:],train_label[percent:]
    return x_train,y_train,x_valid,y_valid

## Dataset

In [6]:
class Phoneme_Classification(Dataset):
    def __init__(self, X, y=None):
        self.data = torch.from_numpy(X).float()
        if y is not None:
            y = y.astype(int)
            self.label = torch.LongTensor(y)
        else:
            self.label = None

    def __getitem__(self, idx):
        if self.label is not None:
            return self.data[idx], self.label[idx]
        else:
            return self.data[idx]

    def __len__(self):
        return len(self.data)

## 选择特征值（可选）

## DataLoader

In [7]:
x_train,y_train,x_valid,y_valid = train_valid_set(train_audio,train_label, config['valid_ratio'])
print(x_train.shape)
print(x_valid.shape)
train_dataset,valid_dataset, test_dataset = Phoneme_Classification(x_train,y_train),Phoneme_Classification(x_valid,y_valid),Phoneme_Classification(test_data)
print(len(train_dataset))
train_loader = DataLoader(train_dataset,batch_size = config['batch_size'],shuffle = True)
valid_loader = DataLoader(valid_dataset,batch_size = config['batch_size'],shuffle = True)
test_loader = DataLoader(test_dataset,batch_size = config['batch_size'],shuffle = False)

(983945, 429)
(245987, 429)
983945


In [8]:
import gc

del train_audio, train_label, x_train, y_train, x_valid, y_valid
gc.collect()

23

# Neural Network Model

In [14]:
# class My_Model(nn.Module):
#     def __init__(self):
#         super(My_Model, self).__init__()
#         self.layer1 = nn.Linear(429, 1024)
#         self.layer2 = nn.Linear(1024, 512)
#         self.layer3 = nn.Linear(512, 128)
#         self.out = nn.Linear(128, 39) 

#         self.act_fn = nn.Sigmoid()

#     def forward(self, x):
#         x = self.layer1(x)
#         x = self.act_fn(x)

#         x = self.layer2(x)
#         x = self.act_fn(x)

#         x = self.layer3(x)
#         x = self.act_fn(x)

#         x = self.out(x)
        
#         return x

# Train loop

In [15]:
def trainer(train_loader,valid_loader,model,device,config):
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(),lr = config['learning_rate'])
    
    writer = SummaryWriter()
    
    if not os.path.isdir('./models'):
        os.mkdir('./models')
    
    n_epochs,best_acc,step,early_stop_count = config['n_epochs'],0,0,0
    
    for epoch in range(n_epochs):
        #print(len(train_dataset))
        model.train()
        train_acc_record = []
        #train_pbar = tqdm(train_loader)
        #for x,y in train_pbar:
        for x,y in train_loader:
            optimizer.zero_grad()
            x,y = x.to(device),y.to(device)
            pred,hidden_prev = model(x)
            loss = criterion(pred,y)
            
            _, train_pred = torch.max(pred, 1) # get the index of the class with the highest probability
            
            loss.backward()
            optimizer.step()
            
            step += 1
            train_acc = (train_pred.cpu()==y.cpu()).sum().item()
            train_acc_record.append(train_acc)
            
            #train_pbar.set_description(f'Epoch [{epoch+1}/{n_epochs}]')
            #train_pbar.set_postfix({'train_acc':train_acc})
        mean_train_acc = sum(train_acc_record)/len(train_dataset)
        
        writer.add_scalar('Loss/train',mean_train_acc,step)
        
        model.eval()
        valid_acc_record = []
        with torch.no_grad():
            for x,y in valid_loader:
                x,y = x.to(device),y.to(device)
                pred,hidden_prev = model(x)
                loss = criterion(pred,y)
                _, valid_pred = torch.max(pred, 1)
                
                valid_acc = (valid_pred.cpu()==y.cpu()).sum().item()
                valid_acc_record.append(valid_acc)
        
        mean_valid_acc = sum(valid_acc_record)/len(valid_dataset)
        writer.add_scalar('Loss/train',mean_valid_acc,step)
        
        print('[{:03d}/{:03d}] Train Acc: {:3.6f}  | Val Acc: {:3.6f} '.format(epoch + 1, n_epochs, mean_train_acc, mean_valid_acc))
        sum_valid_acc = sum(valid_acc_record)
        if sum_valid_acc > best_acc:
            best_acc = sum_valid_acc
            torch.save(model.state_dict(),config['save_path'])
            print('saving model with acc {:.4f}'.format(best_acc/len(valid_dataset)))
            early_stop_count = 0
        else:
            early_stop_count += 1
        if early_stop_count == config['early_stop']:
            print('\nModel is not improving, so we halt the training session.')
            return

# Start training！

In [16]:
model = Net(input_size,hidden_size,num_layers).to(device)
trainer(train_loader,valid_loader,model,device,config)

NameError: name 'output_size' is not defined

# Plot learning curves with tensorboard

In [None]:
%reload_ext tensorboard
%tensorboard --logdir=./runs/

# predict

In [None]:
def save_pred(preds,file):
    with open(file,'w') as fp:
        writer = csv.writer(fp)
        writer.writerow(['id','class'])
        for i,p in enumerate(preds):
            writer.writerow([i,p])

In [None]:
def predict(test_loader,model,device):
    model.eval()
    preds = []
    for x in tqdm(test_loader):
        x = x.to(device)
        with torch.no_grad():
            pred,hidden_prev = model(x)
            _, test_pred = torch.max(pred, 1)
            for y in test_pred.cpu().numpy():
                preds.append(y)
    return preds

In [None]:
model = My_Model().to(device)
model.load_state_dict(torch.load(config['save_path']))
preds = predict(test_loader,model,device)
save_pred(preds,'pred.csv')