In [1]:
import pandas as pd
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso,ElasticNet
from sklearn.neural_network import MLPRegressor
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error,mean_absolute_error,r2_score
from pprint import pprint

In [2]:
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif']=['Microsoft YaHei'] #用来正常显示中文标签
plt.rcParams['axes.unicode_minus']=True

In [3]:
np_CHI = np.load('./np_regression/np_CHI.npy')

In [22]:
np_CHI.shape

(3629, 68)

In [4]:
# 把数据切分成训练集和测试集 输入没有标准化的数据
def split_data(data):
    X = data[:,0:17]
    y = data[:,-1]
    return train_test_split(X,y,test_size=0.2)

# 输入不包含因变量的训练数据和测试数据
def standard_data(train_data,test_data):
    print('train_data_size:{}'.format(train_data.shape))
    print('test_data_size:{}'.format(test_data.shape))
    scaler = StandardScaler()
    
    scaler1 = scaler.fit(train_data[:,0:2])
    np1_train = scaler1.transform(train_data[:,0:2])
    np1_test = scaler1.transform(test_data[:,0:2])
    
    np2_train = train_data[:,2:17]
    np2_test = test_data[:,2:17]
    

    np_train_data = np.hstack((np1_train,np2_train))
    np_test_data = np.hstack((np1_test,np2_test))
    print('train_data_scale_size:{}'.format(np_train_data.shape))
    print('test_data_scale_size:{}'.format(np_test_data.shape))
    return np_train_data,np_test_data


# 传入标准化的数据
def get_model_score(train_x,train_y,test_x,test_y,model):
    ans = {}
    rgs = model.fit(train_x,train_y)
    ans['MSE'] = mean_squared_error(test_y,rgs.predict(test_x))
    ans['MAE'] = mean_absolute_error(test_y,rgs.predict(test_x))
    ans['R2'] = r2_score(test_y,rgs.predict(test_x))
    return ans

In [10]:
model_el = ElasticNet(alpha=0.01)
model_mlp = MLPRegressor(hidden_layer_sizes=(100,),
                      activation='relu',
                      solver='adam',
                      alpha=1,
                      batch_size=128,
                      random_state=None,
                      early_stopping=True,
                      validation_fraction=0.1,
                      max_iter=4000)

In [6]:
train_x,test_x,train_y,test_y = split_data(np_CHI)
train_x,test_x = standard_data(train_x,test_x)
get_model_score(train_x,train_y,test_x,test_y,model_el)

train_data_size:(2903, 17)
test_data_size:(726, 17)
train_data_scale_size:(2903, 17)
test_data_scale_size:(726, 17)


{'MSE': 1.9768592485081293,
 'MAE': 1.0198765103185319,
 'R2': 0.27329565205177686}

In [14]:
train_x,test_x,train_y,test_y = split_data(np_CHI)
train_x,test_x = standard_data(train_x,test_x)
get_model_score(train_x,train_y,test_x,test_y,model_mlp)

train_data_size:(2903, 17)
test_data_size:(726, 17)
train_data_scale_size:(2903, 17)
test_data_scale_size:(726, 17)


{'MSE': 1.6028371656603406,
 'MAE': 0.9457783749752335,
 'R2': 0.3100540076295225}

In [16]:
import joblib

In [17]:
# 保存模型

joblib.dump(model_mlp,'./rgs_model/mlp.model')

['./rgs_model/mlp.model']

In [18]:
r = joblib.load('./rgs_model/mlp.model')

In [19]:
r.predict(test_x)

array([2.0399856 , 2.08491172, 3.258272  , 2.51765998, 2.36259856,
       2.05743892, 3.21427092, 1.98960632, 1.75191808, 2.48136066,
       2.83078072, 3.8402771 , 5.04394118, 5.05722546, 3.93214458,
       3.8370912 , 3.05821731, 5.01518019, 4.18086661, 2.95039453,
       2.80654227, 2.33633682, 2.9547546 , 2.96270327, 1.95924806,
       3.11023956, 3.86226621, 3.33389737, 3.05516783, 3.21518111,
       3.14379421, 3.40051412, 2.00637309, 3.11935733, 1.82081273,
       3.00293972, 3.0819188 , 5.00374975, 3.27505758, 3.07924812,
       3.83195533, 4.82902616, 3.30959636, 3.87054206, 2.55194383,
       2.47397912, 2.79244684, 3.96411616, 3.85316649, 3.93444997,
       1.91143434, 2.7754991 , 5.09102782, 3.13771759, 5.29053955,
       3.0978746 , 3.96820504, 2.96105537, 3.18131371, 5.20798113,
       3.99234225, 3.33413048, 3.62200742, 3.20516344, 2.75675622,
       2.69060354, 3.26913282, 4.82890015, 3.78451789, 3.02044023,
       3.926736  , 3.83290959, 3.16090488, 3.48407956, 1.95894

In [20]:
test_x[0]

array([-0.78176227, -0.04389012,  1.        ,  0.        ,  0.        ,
        1.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ])

In [21]:
test_x.shape

(726, 17)