# 训练篇

1. 选择nn.loss损失函数
2. 选择torch.optim优化算法
3. 设置超参数
4. 设置tensorboard进行可视化

In [1]:
import torch 
import torch.nn as nn # 包含loss和各种blocks, layers
from torch.utils.data import * # 包括Dataset和DataLoader
from tensorboardX import SummaryWriter 

from resnet import * 
from generate_dataset import * 

1. settings: 训练集地址和验证集地址
2. writer for tensorboard
3. prepare dataloader
4. load model
5. loss func 
6. optim 
7. let's go 

In [2]:
def train(model_path):
    # path for training and testing data
    train_data_path = "/share/mal/malware/data/image_train"
    test_data_path = "/share/mal/malware/data/image_train"
    
    # writer for debug
    writer = SummaryWriter(comment="Resnet")
    
    # prepare dataloader
    train_set = generate_dataset(train_data_path)
    test_set = generate_dataset(test_data_path)
    train_loader = DataLoader(train_set, batch_size=64, shuffle=True)
    test_loader = DataLoader(test_set, batch_size=64, shuffle=True)
    
    # load model
    model = ResNet(num_block=[3, 4, 6, 3])
    model.cuda()
    model.train()
    
    # loss func
    criterion = nn.CrossEntropyLoss()
    
    # optim
    optim = torch.optim.Adam(model.parameters(), lr=1e-3)
    
    # let's go 
    EPOCH = 10
    step = 0
    best_acc = 0.0
    
    for ep in range(EPOCH):
        # training
        for idx, data in enumerate(train_loader):
            x, y = data
            y = torch.from_numpy(np.asarray(y, dtype=np.long))
            y-=1 # 训练集标签从1-9，映射到0-8
            x = x.cuda()
            y = y.cuda()
            
            y_pred = model(x)
            loss = criterion(y_pred, y)
            loss.backward()
            optim.step()
            optim.zero_grad()
            
            if step % 10 == 0:
                print ("epoch {} step {}: loss={}".format(ep, step, loss))
            writer.add_scalar("Loss", loss, step)
            step += 1
        
        # testing 
        correct = 0
        total = 0
        model.eval()
        for idx, data in enumerate(test_loader):
            x, y = data
            y = torch.from_numpy(np.asarray(y, dtype=np.long))
            y-=1
            x = x.cuda()
            y = y.cuda()
            
            y_pred = model(x)
            prediction = torch.argmax(y_pred, 1)
            correct += (prediction == y).sum()
            total += len(y)
        
        acc = correct/total
        print ("epoch {}: acc={}".format(ep, acc))
        writer.add_scalar("acc", acc, ep)
        if acc > best_acc:
            torch.save(model, "".join(model_path.split('.')[:-1]) + "_best.pth")
            best_acc = acc
        
    torch.save(model, model_path)
    writer.close()

In [3]:
torch.cuda.is_available()

True

In [None]:
model_path = "/root/paperwithcode/第三周-训练篇/malware_dpcnn/A.pth" 
train(model_path)

epoch 0 step 0: loss=2.1984505653381348
epoch 0 step 10: loss=1.789381742477417
epoch 0 step 20: loss=1.4160553216934204
epoch 0 step 30: loss=0.8571893572807312
epoch 0 step 40: loss=1.2058979272842407
epoch 0 step 50: loss=0.6912479400634766
epoch 0 step 60: loss=0.9555216431617737
epoch 0 step 70: loss=0.818180501461029
epoch 0 step 80: loss=0.6732709407806396
epoch 0 step 90: loss=0.6725689768791199
epoch 0 step 100: loss=0.4437403082847595
epoch 0 step 110: loss=0.5286104679107666
epoch 0 step 120: loss=0.5549123883247375
epoch 0 step 130: loss=0.6708193421363831
epoch 0 step 140: loss=0.3919709026813507
epoch 0 step 150: loss=0.39155325293540955
epoch 0 step 160: loss=0.5423536896705627
