In [2]:
# 导入使用的库
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split


定义load_data函数，用于加载数据集，并进行预处理，返回之后的结果

In [3]:
def load_data(path):
    """载数据集，并进行预处理
        
    Paramters
    ---
    path：str
        数据集的路径
    
    Retruens
        (X,y): tuple
        特征矩阵X与对应的标签y
    ---
    """
    # 加载数据集，并为没有标题的数据集添加标题行
    data=pd.read_csv(path,header=None)
    # 将加载的数据集
    X,y = data.iloc[:,:-1],data.iloc[:,-1]
    # 对特征矩阵进行编码
    lb = LabelEncoder()
    X = X.apply(lambda col: lb.fit_transform(col))
    # 进行one-hot编码
    ohe = OneHotEncoder()
    X = pd.DataFrame(ohe.fit_transform(X).toarray())
    # tensorflwo 不支持数值列 需要转换
    X.columns = X.columns.map(lambda x:f"c{x}")
    return X,y

定义train_input_fn函数 构建训练集

In [4]:
def train_input_fn(features,labels):
    """定义训练函数，用于训练使用
    
    Pamaraters:
    ---
    features: 类数组类型。 形状[本数量，特征数量]
        用于训练的特征矩阵
        
    labels : 类数组类型。形状[样本数量]
        每个样本对应的标签（分类）
        
    Returns:
        dataset:tf.data.Dataset
        数据集
    ---
    """
    dataset = tf.data.Dataset.from_tensor_slices((dict(features),labels))
    dataset = dataset.shuffle(10000,seed=0).repeat(10).batch(50)
    return dataset

定义eval_input_fn评估函数，用于评估模型效果，或对新数据进行预测

In [6]:
def eval_input_fn(features,labels=None):
    """定义评估函数，用于评估或预测
    Parameters
    ---
      features: 类数组类型。 形状[本数量，特征数量]
        用于训练的特征矩阵
        
    labels : 类数组类型。形状[样本数量]
        每个样本对应的标签（分类）
        
    Returns:
        dataset:tf.data.Dataset
        数据集
    ---
    """
    # 将特征转换成字典类型
    features = dict(features)
    # 如果进行未知数据的预测，则没有标签
    if labels is None:
        inputs = features
    else:
        inputs = (features,labels)
    # 创建数据集
    dataset = tf.data.Dataset.from_tensor_slices(inputs)
    # 每次去除100条记录
    dataset = dataset.batch(100)
    return dataset

In [9]:
X,y = load_data(r'data.csv')
train_X,test_X,train_y,test_y = train_test_split(X,y,test_size=0.25,random_state=0)
# 定义特诊列列表
my_feature_columns = []

for key in train_X.keys():
    # 创建tensorflow特征列，并加入到特征列表当中
    my_feature_columns.append(tf.feature_column.numeric_column(key=key))
classifier = tf.estimator.DNNClassifier(feature_columns=my_feature_columns,hidden_units=[512]*2,n_classes=10,optimizer="SGD")
classifier.train(input_fn=lambda:train_input_fn(train_X,train_y))
classifier.evaluate(input_fn=lambda:eval_input_fn(test_X,test_y))

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'C:\\Users\\ADMINI~1\\AppData\\Local\\Temp\\tmp_drkknh8', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x00000234E98045C0>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create Che

{'accuracy': 0.99200386,
 'average_loss': 0.04789417,
 'loss': 4.7536864,
 'global_step': 3752}