In [2]:
import torch
import torch.nn as nn 
import numpy as np 
import pandas as pd 
from collections import Counter
import os 
from tqdm.autonotebook import tqdm 

In [11]:
from typing import List, Tuple, Dict
from torch.optim.lr_scheduler import StepLR, MultiStepLR
import torch.nn.functional as F

from sklearn.preprocessing import StandardScaler

In [14]:
import gc 

In [None]:
# 设定模型使用的设备
device = torch.device("cuda:0") if torch.cuda.is_available() else "cpu"

# 搭建模型

## 宽度模型

In [3]:
class Wide(nn.Module):
    def __init__(self, wide_dim, output_dim):
        '''
        wide_dim表示输入神经元数
        output_dim表示输出神经元数
        '''
        super().__init__()
        self.wlinear = nn.Linear(wide_dim, output_dim)
        
    def forward(self, X):
        out = self.wlinear(X)
        return out

## 深度模型

In [110]:
class DeepDense(nn.Module):
    def __init__(self, embeddings_input, continuous_cols, 
                deep_column_idx, hidden_layers, dropout, output_dim):
        '''
        embedding_input: list型，其中每个元素都是tuple类型，tuple[0]表示column name，
                        tuple[1]表示不同值的数量，tuple[2]表示embedding size
        
        embeddings_encoding_dict: dict型，包含了编码映射　
        
        continouous_cols: list型，所有连续型特征的列　
        
        deep_column_idx: dict型，所有需要embedding列的索引
        
        hidden_layers: list型，每个dense层的神经元数
        
        dropout: list型，两个dense层直接的dropout
        
        output_dim: int型，如果是lr和回归问题是1，多分类问题是n
        '''
        super().__init__()
        
        self.embeddings_input = embeddings_input
        #self.embeddings_encoding_dict = embeddings_encoding_dict
        self.continuous_cols = continuous_cols
        self.deep_column_idx = deep_column_idx
        
        ## 定义编码层
        for col, val, dim in embeddings_input:
            setattr(self, "emb_layer_"+col, nn.Embedding(val, dim))
        
        ## 计算输入的所有神经元数
        input_emb_dim = np.sum([emb[2] for emb in embeddings_input]) + len(continuous_cols)
        hidden_layers = [input_emb_dim] + hidden_layers
        dropout = [0.0] + dropout     ## 对输入层不使用droprout
        self.dense = nn.Sequential()
        for i in range(1, len(hidden_layers)):
            self.dense.add_module(
                f'den_layer_{i-1}', 
                self.dense_layer(hidden_layers[i-1], hidden_layers[i], dropout[i-1])
            )
        self.dense.add_module('last_linear', nn.Linear(hidden_layers[-1], output_dim))
            
    def dense_layer(self, inp, outp, dropout):
        return nn.Sequential(
            nn.Linear(inp, outp),
            nn.LeakyReLU(inplace=True),
            nn.Dropout(dropout)
        )
    
    def forward(self, X):
        emb = [getattr(self, 'emb_layer_'+col)(X[:, self.deep_column_idx[col]].long())
              for col, _, _ in self.embeddings_input]
        if self.continuous_cols:
            cont_idx = [self.deep_column_idx[col] for col in self.continuous_cols]
            cont = [X[:, cont_idx].float()]
            inp = torch.cat(emb+cont, 1)
        else:
            inp = torch.cat(emb, 1)
        out = self.dense(inp)
        return out 

## 深宽模型

In [61]:
def set_optimizer(model_params, opt_params):
    '''
    model_params: 表示模型参数 
    opt_params: 表示优化器参数
    '''
    try:
        opt, lr, m = opt_params
    except:
        opt, lr = opt_params
    
    if opt == "Adam":
        return torch.optim.Adam(model_params, lr=lr)
    if opt == "SGD":
        return torch.optim.SGD(model_params, lr=lr, momentum=m)

In [62]:
def set_scheduler(optimizer, sch_params):
    sch, s, g = sch_params
    if sch == "StepLR":
        return StepLR(optimizer, step_size=s, gamma=g)
    if sch == "MultiStepLR":
        return MultiStepLR(optimizer, milestones=s, gamma=g)

In [63]:
class MultipleOptimizer(object):
    def __init__(self, opts):
        self.optimizers= opts 
    
    def zero_grad(self):
        for op in self.optimizers:
            op.zero_grad()
    
    def step(self):
        for op in self.optimizers:
            op.step()
            

            
class MultipleLRScheduler(object):
    def __init__(self, scheds):
        self.schedulers = scheds
    
    def step(self):
        for sc in self.schedulers:
            sc.step()

In [132]:
class WideDeep(nn.Module):
    def __init__(self, output_dim, **params):
        super().__init__()
        
        self.datasets = {}
        self.n_datasets = 1 
        self.output_dim = output_dim
        
        ## 宽度模型
        for k, v in params['wide'].items():
            setattr(self, k, v)
        self.wide = Wide(self.wide_dim, self.output_dim)
        
        ## 深度模型
        if "deep_dense" in params.keys():
            self.datasets['deep_dense'] = self.n_datasets
            self.n_datasets += 1 
            for k, v in params['deep_dense'].items():
                setattr(self, k, v)
            
            self.deep_dense = DeepDense(
                self.embeddings_input,
                #self.embeddings_encoding_dict, 
                self.continuous_cols,
                self.deep_column_idx,
                self.hidden_layers,
                self.dropout,
                self.output_dim
            )
            
    def compile(self, optimizer, lr_scheduler=None):
        '''
        optimizer: dict型，需要制定不同模型的参数，比如 optimizer={'wide': ['SGD', 0.001, 0.3],
                                                              'deep': ['Adam', 0.001]}
        lr_shcheduler用于梯度衰减
        '''
        ## 定义模型的训练方法和损失函数
        ### 由于是二分类的任务
        #self.method = method
        self.activation, self.criterion = torch.sigmoid, F.binary_cross_entropy
        
        ## 定义优化器和学习率下降方法
        wide_opt = set_optimizer(self.wide.parameters(), optimizer['wide'])
        wide_sch = set_scheduler(wide_opt, lr_scheduler['wide']) if lr_scheduler else None
        optimizers, schedulers = [wide_opt], [wide_sch]
        if "deep_dense" in optimizer.keys():
            deep_dense_opt = set_optimizer(self.deep_dense.parameters(), optimizer['deep_dense'])
            deep_dense_sch = set_scheduler(deep_dense_opt, lr_scheduler['deep_dense']) if lr_scheduler else None 
            optimizers += [deep_dense_opt]
            schedulers += [deep_dense_sch]
            
        self.optimizer = MultipleOptimizer(optimizers)
        self.lr_scheduler = MultipleLRScheduler(optimizers)
        
    def forward(self, X):
        ## 宽度模型输出
        wide_inp = X[0]
        wide_deep = self.wide(wide_inp)
        
        
        ## 深度模型输出
        if 'deep_dense' in self.datasets.keys():
            deep_dense_idx = self.datasets['deep_dense']
            deep_dense_out = self.deep_dense(X[deep_dense_idx])
            wide_deep.add_(deep_dense_out)
        
        if not self.activation:
            return wide_deep
        
        else:
            if (self.activation == F.softmax):
                out = self.activation(wide_deep, dim=1)
            else:
                out = self.activation(wide_deep)
        return out
    
    def fit(self, n_epochs, train_loader, eval_loader=None):
        train_steps = (len(train_loader.dataset) // train_loader.batch_size) + 1 
        if eval_loader:
            eval_steps = (len(eval_loader.dataset) // eval_loader.batch_size) + 1 
        
        for epoch in range(n_epochs):
            print(f"正在进行第{epoch+1}轮迭代")
            ## 学习率衰减
            if self.lr_scheduler:
                self.lr_scheduler.step()
            net = self.train()
            net = net.to(device)
            total, correct, running_loss = 0, 0, 0 
            with tqdm(range(train_steps)) as t: 
                for i, (data, target) in zip(t, train_loader):
                    X = tuple(x.to(device) for x in data)
                    y = target.float().to(device)
                    
                    self.optimizer.zero_grad()
                    y_pred = net(X)
                    if (self.criterion == F.cross_entropy):
                        loss = self.criterion(y_pred, y)
                    else:
                        loss = self.criterion(y_pred, y.view(-1, 1))
                    loss.backward()
                    self.optimizer.step()
                    
                    running_loss += loss.item()
                    avg_loss = running_loss / (i+1)
                    ## 得到预测的值
                    y_pred_cat = (y_pred > 0.5).squeeze(1).float()
                    ### 用于计算准确率
                    correct += float((y_pred_cat == y).sum().item())
                    total += y.size(0)
            
            
                    if eval_loader:
                        if i >0 and i % 2000 == 0:
                            print(f"Train —— Loss :{avg_loss} | Accuracy: {correct/total}")
                            total, correct, running_loss = 0, 0, 0 
                            net = self.eval()
                            net = net.to(device)
                            with torch.no_grad():
                                with tqdm(range(eval_steps)) as v: 
                                    for j, (data, target) in zip(v, eval_loader):
                                        X = tuple(x.to(device) for x in data)
                                        y = target.float().to(device)
                                        y_pred = net(X)

                                        loss = self.criterion(y_pred, y.view(-1, 1))
                                        running_loss += loss.item()
                                        avg_loss = running_loss / (i+1)

                                        y_pred_cat = (y_pred > 0.5).squeeze(1).float()

                                        correct += float((y_pred_cat == y).sum().item())
                                        total += y.size(0)

                            print(f"Eval —— Loss: {avg_loss} | Accuracy: {correct / total}")

# 准备数据

In [15]:
class FieldHandler(object):
    def __init__(self, train_file_path, test_file_path=None, category_columns=[], continuation_columns=[]):
        '''
        :param train_file_path: 训练集文件文件名
        :param test_file_path: 测试集文件文件名
        :param category_columns: 类别型特征, list型
        :param continuation_columns: 连续型特征, list型
        '''
        self.train_file_path = None
        self.test_file_path = None
        #self.feature_nums = 1   ## 预留一位，第0号特征表示缺失值
        self.field_dict = {}
        self.cat_nums = {}
        

        self.category_columns = category_columns
        self.continuation_columns = continuation_columns

        if not isinstance(train_file_path, str):
            raise ValueError("train file path must str")
        if os.path.exists(train_file_path):
            self.train_file_path = train_file_path
        else:
            raise OSError("train file path isn't exists!")

        if test_file_path:
            if os.path.exists(test_file_path):
                self.test_file_path = test_file_path
            else:
                raise OSError("test file path isn't exists!")
        ## 读取数据
        self.read_data()

        ## 构建场到特征的字典
        self.build_field_dict()
        self.build_standard_scaler()
        ## 该模型只对类别型特征进行embedding
        self.cat_feats = len(self.category_columns)
        self.num_feats = len(self.continuation_columns)
        

    def read_data(self):
        '''
        读取数据
        '''
        if self.train_file_path and self.test_file_path:
            train_df = pd.read_csv(self.train_file_path)[self.category_columns + self.continuation_columns]
            test_df = pd.read_csv(self.test_file_path)[self.category_columns + self.continuation_columns]
            self.df = pd.concat([train_df, test_df])
        else:
            self.df = pd.read_csv(self.train_file_path)[self.category_columns + self.continuation_columns]


    def build_field_dict(self):
        '''
        构建场到特征的映射关系
        '''
        for column in self.df.columns:
            if column in self.category_columns:
                ## 类别型特征中所有不同的值
                ## 去掉缺失值
                cv = [f for f in self.df[column].unique() if str(f) != "nan"]
                ## 不同场的特征对应的类型都是从1开始，0用于填充缺失值
                self.field_dict[column] = dict(zip(cv, range(1, 1+len(cv))))
                self.cat_nums[column] = len(cv) + 1


    def build_standard_scaler(self):
        '''
        对连续型特征进行标准化
        '''
        if self.continuation_columns:
            self.standard_scaler = StandardScaler()
            self.standard_scaler.fit(self.df[self.continuation_columns].values)
        else:
            self.standard_scaler = None

In [31]:
def transformation_data(file_path, field_hander, label=None):
    '''
    返回准备好的数据
    :param label: 目标值对应的列名
    :return:
    '''
    df_ = pd.read_csv(file_path)
    if label:
        if label in df_.columns:
            ## 获取对应的目标值
            labels = df_[[label]].values.astype("float32")
        else:
            raise KeyError(f"label '{label}' isn\'t exists!")
    df_v = df_[field_hander.category_columns]
    num_features = df_[field_hander.continuation_columns]
    
    del df_ 
    gc.collect()
    
    ## 对连续型特征和类别型特征的缺失值进行填充
    ## 对连续型特征进行归一化
    if field_hander.standard_scaler:
        num_features[field_hander.continuation_columns] = field_hander.standard_scaler.transform(num_features.values)
    ## 对连续型特征的缺失值进行填充
    num_features.fillna(0, inplace=True)

    ## 这个DataFrame用于记录每个类别型特征值和连续型特征对应的特征标号
    df_i = df_v.copy()

    for column in df_v.columns:
        print("cat: ", column)
        df_i[column] = df_i[column].map(field_hander.field_dict[column])
        ## 对于测试集，可能有的特征值没有在训练集中出现
        ## 第0号特征留给缺失值
        df_i[column].fillna(0, inplace=True)
        ## 对非缺失值赋值为1
        df_v.loc[df_v[column].apply(lambda x: str(x) != "nan").values, column] = 1
        ## 对值序列的缺失值用0填充
        df_v[column].fillna(0, inplace=True)
    
    
    cat_index = dict(zip(df_v.columns, range(len(df_v.columns))))
    con_index = dict(zip(num_features.columns, range(len(num_features.columns))))
    cat_v = df_v.values.astype("float32")
    cat_i = df_i.values.astype("int32")
    num_features = num_features.values.astype("float32")
    features = {
        "cat_i": cat_i,
        "cat_v": cat_v,
        "num_feats": num_features,
        "cat_index": cat_index,
        "con_index": con_index
    }

    if label:
        return features, labels
    return features, None

In [17]:
data = pd.read_csv("../data/criteo/criteo_data.csv")

## 取出连续型特征和类别型特征
con = [f for f in data.columns if f.startswith("I")]
cat = [f for f in data.columns if f.startswith("C")]

In [20]:
# 定义fieldhandler对象
field_handler = FieldHandler(train_file_path="../data/criteo/criteo_data.csv", continuation_columns=con,
                           category_columns=cat)

In [32]:
# 获取要输入的特征和标签值
features, labels = transformation_data(file_path="../data/criteo/criteo_data.csv",
                                      field_hander=field_handler, label="Label")

cat:  C1
cat:  C2
cat:  C3
cat:  C4
cat:  C5
cat:  C6
cat:  C7
cat:  C8
cat:  C9
cat:  C10
cat:  C11
cat:  C12
cat:  C13
cat:  C14
cat:  C15
cat:  C16
cat:  C17
cat:  C18
cat:  C19
cat:  C20
cat:  C21
cat:  C22
cat:  C23
cat:  C24
cat:  C25
cat:  C26


In [33]:
# 取出各种类型的特征
con_fe = features["num_feats"]
cat_fe = features["cat_i"]
con_index = features["con_index"]
cat_index = features["cat_index"]

In [35]:
## 将连续性和类别型特征拼接作为deep的输入
deep_input = np.concatenate([con_fe, cat_fe], axis=1)
for key in cat_index.keys():
    cat_index[key] += len(con_index)

In [37]:
deep_column_index = {}
deep_column_index.update(con_index)
deep_column_index.update(cat_index)

In [38]:
# 划分训练集和测试集
train_idx = slice(0, int(len(labels)*0.8))
val_idx = slice(int(len(labels)*0.8), int(len(labels)))

train_wide, train_deep, train_y = con_fe[train_idx], deep_input[train_idx], labels[train_idx]
val_wide, val_deep, val_y = con_fe[val_idx], deep_input[val_idx], labels[val_idx]

train_data = {"wide": train_wide, "deep_dense": train_deep, "target": train_y}
val_data = {"wide": val_wide, "deep_dense": val_deep, "target": val_y}

data = {"train": train_data, "val": val_data}

## 定义数据封装类

In [41]:
from torch.utils.data import Dataset, DataLoader

In [43]:
class WideDeepLoader(Dataset):
    def __init__(self, data, mode="train"):
        self.mode = mode 
        self.input_types = list(data.keys())
        self.X_wide = data['wide']
        if "deep_dense" in self.input_types:
            self.X_deep_dense = data["deep_dense"]
        
        if self.mode is "train":
            self.Y = data['target']
        elif self.mode is "test":
            self.Y = None
            
    def __getitem__(self, idx):
        xw = self.X_wide[idx]
        X = (xw,)
        if "deep_dense" in self.input_types:
            xdd = self.X_deep_dense[idx]
            X += (xdd,)
        
        if self.mode is "train":
            y = self.Y[idx]
            return X, y 
        elif self.mode is "test":
            return X
        
    def __len__(self):
        return len(self.X_wide)

In [44]:
train_set = WideDeepLoader(data['train'], mode='train')
valid_set = WideDeepLoader(data['val'], mode='train')

## 设置参数

In [50]:
## 定义每个类别型特征的输入维度和隐向量维度
embeddings_input = []
for key, value in field_handler.cat_nums.items():
    if value >= 100:
        emb_dim = 20
    elif value >= 20: 
        emb_dim = 10
    else:
        emb_dim = 5 
    item = (key, value, emb_dim)
    embeddings_input.append(item)

In [69]:
params = dict()

params['wide'] = dict(wide_dim=field_handler.num_feats)

params['deep_dense'] = dict(
    embeddings_input = embeddings_input,
    continuous_cols = con,
    deep_column_idx=deep_column_index,
    hidden_layers = [64, 32],
    dropout=[0.5]
)

In [133]:
model = WideDeep(output_dim=1, **params)

In [134]:
# 定义优化器和学习率衰减
optimizer = dict(
    wide=["Adam", 0.1],
    deep_dense=["Adam", 0.01]
)

lr_scheduler = dict(
    wide=["StepLR", 3, 0.1],
    deep_dense=["StepLR", 3, 0.1]
)

In [135]:
model.compile(optimizer=optimizer, lr_scheduler=lr_scheduler)

In [137]:
train_loader = DataLoader(dataset=train_set, batch_size=128, num_workers=4, shuffle=True)
valid_loader = DataLoader(dataset=valid_set, batch_size=128, num_workers=4, shuffle=False)

In [138]:
model.fit(n_epochs=10, train_loader=train_loader, eval_loader=valid_loader)

正在进行第1轮迭代


HBox(children=(IntProgress(value=0, max=6251), HTML(value='')))

Train —— Loss :0.5151050503941431 | Accuracy: 89.77086456771615


HBox(children=(IntProgress(value=0, max=1563), HTML(value='')))

Eval —— Loss: 0.388253600537211 | Accuracy: 89.93517
Train —— Loss :0.44126166197336547 | Accuracy: 89.18160526315789


HBox(children=(IntProgress(value=0, max=1563), HTML(value='')))

Eval —— Loss: 0.19069799216232786 | Accuracy: 87.61846
Train —— Loss :0.29008364713186024 | Accuracy: 87.89494736842106


HBox(children=(IntProgress(value=0, max=1563), HTML(value='')))

Eval —— Loss: 0.1272702196939173 | Accuracy: 90.81823

正在进行第2轮迭代


HBox(children=(IntProgress(value=0, max=6251), HTML(value='')))

Train —— Loss :0.45040036321639537 | Accuracy: 85.48756871564218


HBox(children=(IntProgress(value=0, max=1563), HTML(value='')))

Eval —— Loss: 0.3969581995321357 | Accuracy: 86.47612
Train —— Loss :0.41618000836081576 | Accuracy: 85.4378201754386


HBox(children=(IntProgress(value=0, max=1563), HTML(value='')))

Eval —— Loss: 0.2018098768607523 | Accuracy: 89.20461
Train —— Loss :0.27645505077539734 | Accuracy: 86.56595614035088


HBox(children=(IntProgress(value=0, max=1563), HTML(value='')))

Eval —— Loss: 0.13762878081914168 | Accuracy: 85.88481

正在进行第3轮迭代


HBox(children=(IntProgress(value=0, max=6251), HTML(value='')))

Train —— Loss :0.3775008406670674 | Accuracy: 83.95431971514243


HBox(children=(IntProgress(value=0, max=1563), HTML(value='')))

Eval —— Loss: 0.4609490311753446 | Accuracy: 86.00929
Train —— Loss :0.4106842063682075 | Accuracy: 84.66174122807017


HBox(children=(IntProgress(value=0, max=1563), HTML(value='')))

Eval —— Loss: 0.26008533974106207 | Accuracy: 82.48266
Train —— Loss :0.29410316474664966 | Accuracy: 83.21069736842105


HBox(children=(IntProgress(value=0, max=1563), HTML(value='')))

Eval —— Loss: 0.15016495767801408 | Accuracy: 84.8858

正在进行第4轮迭代


HBox(children=(IntProgress(value=0, max=6251), HTML(value='')))

Train —— Loss :0.34399336240608536 | Accuracy: 83.56673225887056


HBox(children=(IntProgress(value=0, max=1563), HTML(value='')))

Eval —— Loss: 0.532102852002315 | Accuracy: 86.54369
Train —— Loss :0.43091200834004706 | Accuracy: 84.69817105263158


HBox(children=(IntProgress(value=0, max=1563), HTML(value='')))

Eval —— Loss: 0.2646579755466898 | Accuracy: 81.99866
Train —— Loss :0.288902428063864 | Accuracy: 82.7747149122807


HBox(children=(IntProgress(value=0, max=1563), HTML(value='')))

Eval —— Loss: 0.16697292031099192 | Accuracy: 82.9404

正在进行第5轮迭代


HBox(children=(IntProgress(value=0, max=6251), HTML(value='')))

Train —— Loss :0.3254817540886282 | Accuracy: 83.04169790104947


HBox(children=(IntProgress(value=0, max=1563), HTML(value='')))

Eval —— Loss: 0.6711847766049799 | Accuracy: 83.93384
Train —— Loss :0.49186845202724266 | Accuracy: 83.2628947368421


HBox(children=(IntProgress(value=0, max=1563), HTML(value='')))

Eval —— Loss: 0.31624078437019304 | Accuracy: 79.43772
Train —— Loss :0.31726004743730996 | Accuracy: 81.5718552631579


HBox(children=(IntProgress(value=0, max=1563), HTML(value='')))

Eval —— Loss: 0.17808174225334883 | Accuracy: 81.57734

正在进行第6轮迭代


HBox(children=(IntProgress(value=0, max=6251), HTML(value='')))

Train —— Loss :0.31233641195452133 | Accuracy: 82.59617847326336


HBox(children=(IntProgress(value=0, max=1563), HTML(value='')))

Eval —— Loss: 0.843932226799179 | Accuracy: 82.22409
Train —— Loss :0.571223736449916 | Accuracy: 82.49966228070176


HBox(children=(IntProgress(value=0, max=1563), HTML(value='')))

Eval —— Loss: 0.2988073540922702 | Accuracy: 82.45345
Train —— Loss :0.3007196737932893 | Accuracy: 82.55760087719298


HBox(children=(IntProgress(value=0, max=1563), HTML(value='')))

Eval —— Loss: 0.18833049461357276 | Accuracy: 81.84122

正在进行第7轮迭代


HBox(children=(IntProgress(value=0, max=6251), HTML(value='')))

Train —— Loss :0.302502596336624 | Accuracy: 82.23849793853073


HBox(children=(IntProgress(value=0, max=1563), HTML(value='')))

Eval —— Loss: 0.996330663718205 | Accuracy: 78.62033
Train —— Loss :0.6405067614758501 | Accuracy: 80.76768421052631


HBox(children=(IntProgress(value=0, max=1563), HTML(value='')))

Eval —— Loss: 0.3223456324144948 | Accuracy: 79.91648
Train —— Loss :0.3123720161836558 | Accuracy: 81.2518201754386


HBox(children=(IntProgress(value=0, max=1563), HTML(value='')))

Eval —— Loss: 0.2408879016403595 | Accuracy: 78.0584

正在进行第8轮迭代


HBox(children=(IntProgress(value=0, max=6251), HTML(value='')))

Train —— Loss :0.2843734566820436 | Accuracy: 82.07568871814092


HBox(children=(IntProgress(value=0, max=1563), HTML(value='')))

Eval —— Loss: 1.0484538955250005 | Accuracy: 78.87948
Train —— Loss :0.6632098289627011 | Accuracy: 80.54392543859649


HBox(children=(IntProgress(value=0, max=1563), HTML(value='')))

Eval —— Loss: 0.4262785829922105 | Accuracy: 79.64195
Train —— Loss :0.37697628520117504 | Accuracy: 81.08609649122808


HBox(children=(IntProgress(value=0, max=1563), HTML(value='')))

Eval —— Loss: 0.3293951388260381 | Accuracy: 77.95703

正在进行第9轮迭代


HBox(children=(IntProgress(value=0, max=6251), HTML(value='')))

Train —— Loss :0.2746424403028569 | Accuracy: 81.76287637431284


HBox(children=(IntProgress(value=0, max=1563), HTML(value='')))

Eval —— Loss: 1.6256983145840582 | Accuracy: 74.62709
Train —— Loss :0.9450258329804914 | Accuracy: 78.65723245614035


HBox(children=(IntProgress(value=0, max=1563), HTML(value='')))

Eval —— Loss: 0.35887231031408073 | Accuracy: 78.3993
Train —— Loss :0.32884399297385986 | Accuracy: 80.36802631578948


HBox(children=(IntProgress(value=0, max=1563), HTML(value='')))

Eval —— Loss: 0.46688741927265304 | Accuracy: 79.50935

正在进行第10轮迭代


HBox(children=(IntProgress(value=0, max=6251), HTML(value='')))

Train —— Loss :0.2629533233619046 | Accuracy: 81.65399331584207


HBox(children=(IntProgress(value=0, max=1563), HTML(value='')))

Eval —— Loss: 1.2648145658322896 | Accuracy: 78.56187
Train —— Loss :0.7597171249232629 | Accuracy: 80.28966666666666


HBox(children=(IntProgress(value=0, max=1563), HTML(value='')))

Eval —— Loss: 0.655410549858754 | Accuracy: 74.17361
Train —— Loss :0.5247043628649719 | Accuracy: 78.33515789473684


HBox(children=(IntProgress(value=0, max=1563), HTML(value='')))

Eval —— Loss: 0.38211267265830906 | Accuracy: 74.72354



随着训练的加深，出现了过拟合