In [None]:
import torch
import numpy as np
import pandas as pd
import os
import sys
import multiprocessing
import importlib
# 处理思路：数据处理（引用函数dataset）->模型实例化、损失函数、优化器->
# dataloader构造->循环训练模型（寻找参数）

TRAIN_DIR = "./data/training"
VALIDATION_DIR = "./data/validation"
TEST_DIR = "./data/testing"
TEST_REULST_PATH = "./data/result.csv"

# 用以对cpu和GPU进行兼容
use_cuda = True & torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
print("Use Cuda." if use_cuda else "Use cpu.")


In [None]:
# 指定模型类别并实例化模型
import image_classification  # 分类模型实现类
import image_set  # 图片数据集实现类
importlib.reload(image_classification)
importlib.reload(image_set)

#选择模型类型，可以选择在image_classification定义好的模型
model_class = image_classification.GYHF_AlexNet 

# 训练数据集
data_train = image_set.LearningSet(TRAIN_DIR, model_class.input_size)
class_count = data_train.GetClassNum()  # 获取类别数量

# 模型实例化
SAVE_PATH = str(model_class) + ".pkl"
if os.path.exists(SAVE_PATH):
    print("model has been loaded from file.")
    model = torch.load(SAVE_PATH)
else:
    print("create a new model.")
    model = model_class(class_count)

In [None]:
import model_manager
# 如果自定义的模块代码改变，需要reload
importlib.reload(model_manager)

def calc_right_percent(dir):    
    #构造一个train的dataset来获取标签
    data_train = image_set.LearningSet(dir, model.input_size)
    labels = data_train.GetLabels()
    num = len(labels)

    #对目录中的所有图片进行预测
    y_pred = model_manager.predict(model,device, dir, model.input_size)
    
    #计算预测正确的数量
    right_count = 0
    for i in range(num):
        if y_pred[i] == labels[i]:
            right_count += 1
    return right_count/num

def print_accuracy():   
    # 用验证集和训练集验证:
    print("waiting for validation...")
    print("train accuracy: %f%%" % (100 * calc_right_percent(TRAIN_DIR))) 
    print("validation accuracy: %f%%" % (100 * calc_right_percent(VALIDATION_DIR)))

print_accuracy()

In [None]:
# 训练模型
print("waiting for training...")
iters = 20
for i in range(iters):
    print("[iters %d/%d]:" %(i, iters))
    model = model_manager.train_model(
        model,
        data_train,
        device=device,
        lr=0.001,
        epochs=5,
        nbatch=128,  # 可根据显存和模型大小来调整batchsize的大小
    )
    # 每5轮保存一次模型，同时验证一下正确率
    # 模型保存
    torch.save(model, SAVE_PATH)
    print_accuracy()
 

In [None]:
# 测试集结果输出
y_test = model_manager.predict(model, device, TEST_DIR,  model.input_size)
pd.DataFrame({"Id": [x for x in range(len(y_test))], "Category": y_test}).to_csv(TEST_REULST_PATH, index=False)
print("test result has been written into ./data/result.csv")