In [36]:
%matplotlib inline
%config Completer.use_jedi = False
import numpy as np
import pandas as pd
import torch
from torch import nn
import torch.nn.functional as F
from torch.utils import data
from matplotlib_inline import backend_inline
from matplotlib import pyplot as plt
import math
# from transformers import pipeline
#!pip install d2l
#from d2l import torch as d2l

In [37]:
class Net(nn.Module):
    global num_inputs, num_outputs, num_hiddens1, num_hiddens2, dropout1, dropout2
    '''
    the net framework
    '''
    def __init__(self, num_inputs, num_outputs, num_hiddens1, num_hiddens2, dropout1, dropout2):
        super(Net, self).__init__()
        self.num_inputs = num_inputs
        self.lin1 = nn.Linear(num_inputs, num_hiddens1)
        self.lin2 = nn.Linear(num_hiddens1, num_hiddens2)
        self.lin3 = nn.Linear(num_hiddens2, num_outputs)
        self.relu = nn.ReLU()
        self.dropout1 = dropout1
        self.dropout2 = dropout2 

        
    def forward(self, X):
        X = self.relu(self.lin1(X.reshape((-1, self.num_inputs))))
        # 在全连接层之后添加一个dropout层
        X = F.dropout(X, self.dropout1)
        X = self.relu(self.lin2(X))
        X = F.dropout(X, self.dropout2)
        X = self.lin3(X)
        return X



In [38]:
'''
read data and pretreatment
'''
train_data = pd.read_csv('../input/house-prices-advanced-regression-techniques/train.csv')
test_data = pd.read_csv('../input/house-prices-advanced-regression-techniques/test.csv')

val_data, train_data = train_data.iloc[:round(0.2*train_data.shape[0]),:], train_data.iloc[round(0.2*train_data.shape[0]):,:]
'''
numerical feature pretreatment respectively
'''

def numeric_pretreatment_fillna(all_features):
    numeric_features = all_features.dtypes[all_features.dtypes != 'object'].index
    
    '''apply log in big feature'''
    big_feature = ['LotArea','MiscVal']
    all_features[big_feature] = all_features[big_feature].astype(float).apply(
        lambda x: np.log10((x+0.001)))
    
    ''''''
    del_feature = ['MoSold','YrSold']
    all_features.drop(del_feature,axis=1)
    
    all_features[numeric_features] = all_features[numeric_features].apply(
        lambda x: (x - x.mean()) / (x.std()))
    
    #delete some columns that full of NA
    all_features.dropna(thresh=0.9*all_features.shape[1],axis=1)
    
    
    all_features[numeric_features] = all_features[numeric_features].fillna(all_features[numeric_features].median())
    return all_features

train_features, val_features, test_features = train_data.iloc[:, 1:-1], val_data.iloc[:, 1:-1], test_data.iloc[:, 1:]
train_features, val_features, test_features = numeric_pretreatment_fillna(train_features), numeric_pretreatment_fillna(val_features), numeric_pretreatment_fillna(test_features)

'''
non-numerical feature pretreatment
'''
# “Dummy_na=True”将“na”（缺失值）视为有效的特征值，并为其创建指示符特征
all_features = pd.concat((train_features, val_features, test_features))
all_features = pd.get_dummies(all_features, dummy_na=True)
#all_features.shape
'''
divide train and test dataset 
''' 
n_train = train_data.shape[0]
n_val = val_data.shape[0]
train_features = torch.tensor(all_features[:n_train].values, dtype=torch.float32)
val_features = torch.tensor(all_features[n_train:n_train+n_val].values, dtype=torch.float32)
test_features = torch.tensor(all_features[n_train+n_val:].values, dtype=torch.float32)

def label_zscore(label):
    label_mean = label.mean()
    label_std = np.std(label)
    label = (label-label_mean)/label_std
    return label
train_label_mean = (train_data.SalePrice.values.reshape(-1, 1)).mean()
train_label_std = np.std(train_data.SalePrice.values.reshape(-1, 1))
train_labels = torch.tensor(label_zscore(train_data.SalePrice.values.reshape(-1, 1)), dtype=torch.float32)
val_labels = torch.tensor(label_zscore(val_data.SalePrice.values.reshape(-1, 1)), dtype=torch.float32)

In [57]:
'''
some small and limited functions from Li Mu 
'''
def load_array(data_arrays, batch_size, is_train=True):
    """构造一个PyTorch数据迭代器
    Defined in :numref:`sec_linear_concise`"""
    dataset = data.TensorDataset(*data_arrays)
    return data.DataLoader(dataset, batch_size, shuffle=is_train)


def log_rmse(net, features, labels):
    # 为了在取对数时进一步稳定该值，将小于1的值设置为1
    net.eval()
    clipped_preds = torch.clamp(net(features), 1, float('inf'))
    rmse = torch.sqrt(loss((clipped_preds),
                           (labels)))
    return rmse.item()


def d2l_plot(X, Y=None, xlabel=None, ylabel=None, legend=None, xlim=None,
         ylim=None, xscale='linear', yscale='linear',
         fmts=('-', 'm--', 'g-.', 'r:'), figsize=(3.5, 2.5), axes=None):
    """绘制数据点
    Defined in :numref:`sec_calculus`"""
    def use_svg_display():
        """使用svg格式在Jupyter中显示绘图
        Defined in :numref:`sec_calculus`"""
        backend_inline.set_matplotlib_formats('svg')

    def set_figsize(figsize=(3.5, 2.5)):
        """设置matplotlib的图表大小
        Defined in :numref:`sec_calculus`"""
        use_svg_display()
        plt.rcParams['figure.figsize'] = figsize
        
    def set_axes(axes, xlabel, ylabel, xlim, ylim, xscale, yscale, legend):
        """设置matplotlib的轴
        Defined in :numref:`sec_calculus`"""
        axes.set_xlabel(xlabel)
        axes.set_ylabel(ylabel)
        axes.set_xscale(xscale)
        axes.set_yscale(yscale)
        axes.set_xlim(xlim)
        axes.set_ylim(ylim)
        if legend:
            axes.legend(legend)
        axes.grid()
    
    if legend is None:
        legend = []

    set_figsize(figsize)
    axes = axes if axes else plt.gca()
    
    # 如果X有一个轴，输出True
    def has_one_axis(X):
        return (hasattr(X, "ndim") and X.ndim == 1 or isinstance(X, list)
                and not hasattr(X[0], "__len__"))

    if has_one_axis(X):
        X = [X]
    if Y is None:
        X, Y = [[]] * len(X), X
    elif has_one_axis(Y):
        Y = [Y]
    if len(X) != len(Y):
        X = X * len(Y)
    axes.cla()
    for x, y, fmt in zip(X, Y, fmts):
        if len(x):
            axes.plot(x, y, fmt)
        else:
            axes.plot(y, fmt)
    set_axes(axes, xlabel, ylabel, xlim, ylim, xscale, yscale, legend)


In [58]:
def train(model, input_optimizer,train_features, train_labels, val_features, val_labels,
          num_epochs, learning_rate, batch_size,loss, weight_decay):
    '''
    function to train model
    
    model: input network
    input_optimizer: a optimizer without parameters
    train_features,train_labels,test_features,test_labels: tensor with these data
    num_epoches: int, epoch number
    learning_rate: function, can change with the increase of epoch
    batch_size: int, the size of a batch
    loss: function,loss function
    '''
    global device
    train_ls, val_ls = [], []
    train_iter = load_array((train_features, train_labels), batch_size)
    val_iter = load_array((val_features, val_labels), batch_size)
    optimizer = input_optimizer(model.parameters(), lr=learning_rate(0), weight_decay = weight_decay)
    best_val_score = 1000000
    for epoch in range(num_epochs):
        for X, y in train_iter:
            X, y = X.to(device), y.to(device)
            optimizer = input_optimizer(model.parameters(), lr=learning_rate(epoch))
            optimizer.zero_grad()
            model.train()
            l = loss(model(X), y)
            l.backward()
            optimizer.step()
        train_ls.append(log_rmse(model, train_features.to(device), train_labels.to(device)))
        val_score=log_rmse(model, val_features.to(device), val_labels.to(device))
        val_ls.append(val_score)
        if best_val_score > val_score:
            best_val_score = val_score
            best_net = model
            
    return train_ls, val_ls, best_net


def train_and_pred(optimizer,train_features, val_features, test_features, train_labels, val_labels, test_data,
                   num_epochs, learning_rate, batch_size, loss, weight_decay):
    global num_inputs, num_outputs, num_hiddens1, num_hiddens2, dropout1, dropout2, device, train_label_mean, train_label_std
    net = Net(num_inputs, num_outputs, num_hiddens1, num_hiddens2, dropout1, dropout2).to(device)
    train_ls, val_ls, best_net= train(net, optimizer, train_features, train_labels, val_features, val_labels,
                                      num_epochs, learning_rate, batch_size, loss, weight_decay)
    d2l_plot(np.arange(1, num_epochs + 1), [train_ls,val_ls], xlabel='epoch',
             ylabel='log rmse', xlim=[1, num_epochs], yscale='log')

    print(f'训练log rmse：{float(train_ls[-1]):f}')
    print(f'验证log rmse：{float(min(val_ls)):f}')
    # 将网络应用于测试集。
    best_net.eval()
    preds = best_net(test_features.to(device)).detach().cpu().numpy()
    # 将其重新格式化以导出到Kaggle
    preds = (train_label_std*preds.reshape(1, -1)[0])+train_label_mean
    test_data['SalePrice'] = pd.Series(preds)
    submission = pd.concat([test_data['Id'], test_data['SalePrice']], axis=1)
    submission.to_csv('submission.csv', index=False)

In [41]:
'''
k-fold function, written by Li Mu
'''
def get_k_fold_data(k, i, X, y):
    assert k > 1
    fold_size = X.shape[0] // k
    X_train, y_train = None, None
    for j in range(k):
        idx = slice(j * fold_size, (j + 1) * fold_size)
        X_part, y_part = X[idx, :], y[idx]
        if j == i:
            X_valid, y_valid = X_part, y_part
        elif X_train is None:
            X_train, y_train = X_part, y_part
        else:
            X_train = torch.cat([X_train, X_part], 0)
            y_train = torch.cat([y_train, y_part], 0)
    return X_train, y_train, X_valid, y_valid


def k_fold(k, X_train, y_train, num_epochs, learning_rate, input_optimizer, loss,
           batch_size):
    global num_inputs, num_outputs, num_hiddens1, num_hiddens2, dropout1, dropout2
    train_l_sum, valid_l_sum = 0, 0
    for i in range(k):
        data = get_k_fold_data(k, i, X_train, y_train)
        net = Net(num_inputs, num_outputs, num_hiddens1, num_hiddens2, dropout1, dropout2).to(device)
        train_ls, valid_ls = train(net, input_optimizer, *data, num_epochs, learning_rate,
                                   batch_size, loss=loss)
        train_l_sum += train_ls[-1]
        valid_l_sum += valid_ls[-1]
        if i == 0:
            d2l_plot(list(range(1, num_epochs + 1)), [train_ls, valid_ls],
                     xlabel='epoch', ylabel='rmse', xlim=[1, num_epochs],
                     legend=['train', 'valid'], yscale='log')
        print(f'折{i + 1}，训练log rmse{float(train_ls[-1]):f}, '
              f'验证log rmse{float(valid_ls[-1]):f}')
    return train_l_sum / k, valid_l_sum / k

In [162]:
'''
some hyperparameters settings
'''

device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
epochs, start_decline_epochs =400, 30
weight_decay =  5
num_inputs, num_outputs, num_hiddens1, num_hiddens2, dropout1, dropout2 = 331, 1, 512, 64, 0, 0
model = Net(num_inputs, num_outputs, num_hiddens1, num_hiddens2, dropout1, dropout2).to(device)
optimizer = torch.optim.Adam
loss = nn.SmoothL1Loss()
batch_size = 128
k = 5

def learning_rate(temp_epochs):
    global epochs,start_decline_epochs
    if temp_epochs < start_decline_epochs: return 5e-6
    else: 
        return 5e-6*math.cos((temp_epochs-start_decline_epochs)/(epochs-start_decline_epochs)*math.pi/2)
    
def learning_rate(temp_epochs):
    global epochs,start_decline_epochs
    if temp_epochs < start_decline_epochs: return 1e-5
    else: 
        return 1e-5-1e-5 *(temp_epochs)/epochs

#train_l, valid_l = k_fold(k, train_features, train_labels, epochs, learning_rate, optimizer,
#                         loss, batch_size)
#print(f'{k}-折验证: 平均训练log rmse: {float(train_l):f}, '
#      f'平均验证log rmse: {float(valid_l):f}')
      
train_and_pred(optimizer,train_features, val_features, test_features, train_labels, val_labels, test_data,
               epochs, learning_rate, batch_size, loss,weight_decay)

在label做zscore处理后，dropout会使学习曲线波动明显
好看的学习率曲线