In [1]:
import pandas as pd
from sklearn.neural_network import MLPRegressor
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error,mean_absolute_error,r2_score
from pprint import pprint

In [2]:
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif']=['Microsoft YaHei'] #用来正常显示中文标签
plt.rcParams['axes.unicode_minus']=True

In [3]:
np_CHI = np.load('./np_regression/np_CHI.npy')
np_CHI.shape

(3629, 68)

In [40]:
age_bmi_mean = []
age_bmi_var = []
text_mean = []
text_var = []

In [44]:
# 把数据切分成训练集和测试集 输入没有标准化的数据
def split_data(data):
    X = data[:,0:-1]
    y = data[:,-1]
    return train_test_split(X,y,test_size=0.2)



# 输入不包含因变量的训练数据和测试数据
def standard_data(train_data,test_data):
    print('train_data_size:{}'.format(train_data.shape))
    print('test_data_size:{}'.format(test_data.shape))
    scaler = StandardScaler()
    
    scaler1 = scaler.fit(train_data[:,0:2])
    age_bmi_mean = scaler1.mean_
    print(type(scaler1.var_))
    np1_train = scaler1.transform(train_data[:,0:2])
    np1_test = scaler1.transform(test_data[:,0:2])
    
    np2_train = train_data[:,2:17]
    np2_test = test_data[:,2:17]
    
    scaler2 = scaler.fit(train_data[:,17:67])
    print(scaler2.mean_)
    print(scaler2.var_)
    np3_train = scaler2.transform(train_data[:,17:67])
    np3_test = scaler2.transform(test_data[:,17:67])

    np_train_data = np.hstack((np1_train,np2_train,np3_train))
    np_test_data = np.hstack((np1_test,np2_test,np3_test))
    print('train_data_scale_size:{}'.format(np_train_data.shape))
    print('test_data_scale_size:{}'.format(np_test_data.shape))
    return np_train_data,np_test_data,scaler1.mean_,scaler1.var_,scaler2.mean_,scaler2.var_

# 传入标准化的数据
def get_model_score(train_x,train_y,test_x,test_y,model):
    ans = {}
    rgs = model.fit(train_x,train_y)
    ans['MSE'] = mean_squared_error(test_y,rgs.predict(test_x))
    ans['MAE'] = mean_absolute_error(test_y,rgs.predict(test_x))
    ans['R2'] = r2_score(test_y,rgs.predict(test_x))
    return ans

In [9]:
model_mlp = MLPRegressor(hidden_layer_sizes=(100,),
                      activation='relu',
                      solver='adam',
                      alpha=1,
                      batch_size=128,
                      random_state=None,
                      early_stopping=True,
                      validation_fraction=0.1,
                      max_iter=4000)

In [45]:
train_x,test_x,train_y,test_y = split_data(np_CHI)
train_x,test_x,age_bmi_mean,age_bmi_var,text_mean,text_var = standard_data(train_x,test_x)
get_model_score(train_x,train_y,test_x,test_y,model_mlp)

train_data_size:(2903, 67)
test_data_size:(726, 67)
<class 'numpy.ndarray'>
[  5.87281939  -8.78933527 -10.81610807  13.75853465 -13.9883556
   6.1053949   -0.77055654   2.72025796  10.91618044  -3.02764052
   3.4761151   -7.6233918    7.08175578  11.20428136  10.65745715
  -7.27558488   7.02625977   8.5244003  -13.01030673  14.47961694
   7.82200346 -14.77682297  -4.73750742  -6.76079966   4.55272313
  -3.97981785   6.05763288  -9.09029018  -5.51616275   0.9313837
  -0.61697792   9.8896995   -3.03119805 -13.97031274  -6.22534528
   3.04788654  19.76248889   6.96420868  -0.61707741  -8.95801352
 -16.44305336   5.33100474  -6.90545336  -4.79778223  -0.74866953
   7.52611596   1.09834257  11.35318681   5.97498879  -7.81807625]
[ 532.63146166  346.91877739  537.37173504  585.91973793  878.32758325
  905.08016428  397.30555371  431.34722278  580.32128744 1209.37060286
  708.11646711  289.91867844  497.28966959  525.97908194  441.86554425
  480.88330454  379.86920661  653.19315321  218.9762

{'MSE': 1.7531557408402696,
 'MAE': 0.8952045054435566,
 'R2': 0.4322882735602531}

In [11]:
import joblib
# 保存模型

joblib.dump(model_mlp,'./rgs_model/mlp.model')

['./rgs_model/mlp.model']

In [85]:
m = joblib.load('./rgs_model/mlp.model')

In [50]:
age_bmi_mean.shape

(50,)

In [37]:
# 保存 scaler

age_bmi_mean = [46.71271099 26.78170167]
age_bmi_var = [307.05008645 4380.11560942]
text_mean = [  5.78319234  -8.85477524 -11.18332533  13.24430953 -14.03840216
   5.92872342  -0.5807985    2.67184518  10.91372395  -2.16600298
   3.31011785  -7.6499757    6.63622798  10.87115673  10.93886998
  -7.0071298    6.68922889   8.74470932 -12.91713456  14.58682235
   7.9348512  -14.92744062  -4.73395167  -6.81328155   4.57999136
  -4.44998419   5.9468436   -8.84345986  -5.69941261   0.79656961
  -0.58977357   9.42828669  -2.44308429 -13.70565802  -6.06578553
   2.38506312  19.8871731    6.75727422  -0.65908221  -8.72554222
 -16.10573274   5.42644868  -6.5042737   -4.91789346  -1.06052643
   7.19115892   1.13553465  10.98653116   6.26498818  -8.18497908]
text_var = [ 503.51649304  344.32430952  536.19400135  560.34977723  874.78181541
  905.50137033  398.88862333  432.50655695  586.68475922 1227.7418232
  696.08758903  276.90478734  480.20584714  523.37080939  451.52410704
  470.21142519  380.501073    663.95959396  232.96644368  437.31514092
  562.74919731  507.62319427  221.62505034  217.23789535  387.11119729
 1103.72244245  529.95430078  256.29155489  264.96408922  411.14543277
  238.49549306  564.68296709  567.00890131  667.18494736  289.66726188
  397.05006239  518.48949348  815.84752616  470.94153791  460.09651697
  297.80183164  385.79203351  538.88753298  681.61806547  828.82614269
  770.83409569  670.78671903  561.38805954  409.42725297  629.05855452]

SyntaxError: invalid syntax (<ipython-input-37-78c7fedf9a0e>, line 3)

In [33]:
age_bmi_scaler.mean_

array([  5.84079513,  -8.84689548, -10.72505208,  13.82386703,
       -14.00457768,   5.91110695,  -0.84449987,   2.50960331,
        10.52024393,  -2.49240035,   3.76873588,  -7.444968  ,
         6.79722519,  10.96277227,  10.85981216,  -7.48260831,
         6.81437958,   8.42056938, -13.09843401,  14.24801775,
         7.75055138, -14.91547501,  -5.19436745,  -7.14034177,
         4.69590376,  -3.99407311,   6.07042464,  -8.62430031,
        -5.91273242,   0.85766981,  -0.52861836,   9.24375171,
        -2.3917797 , -13.88422053,  -6.06211267,   2.74825986,
        19.5384501 ,   6.28884635,  -1.11936565,  -9.12071486,
       -16.30765784,   5.90933978,  -6.18307583,  -4.52527773,
        -0.47603031,   7.6678084 ,   0.68939883,  10.9491873 ,
         6.41532994,  -7.93376163])

In [34]:
train_x[:,0:2]

array([[81.        , 20.42241946],
       [55.        , 21.77112188],
       [26.        , 19.140625  ],
       ...,
       [61.        , 28.99013879],
       [51.        , 22.19103223],
       [55.        , 20.87198516]])

In [58]:
np_CHI.shape

(3629, 68)

In [72]:
scaler = StandardScaler()
X,_,_,_ = split_data(np_CHI)
age_bmi_scaler = scaler5.fit(X[:,0:2])

In [73]:
age_bmi_scaler.mean_

array([46.25180847, 29.55155469])

In [74]:
age_bmi_scaler.var_

array([  309.09436032, 36663.91601128])

In [75]:

joblib.dump(age_bmi_scaler,'./rgs_model/age_bmi_scaler.joblib')

['./rgs_model/age_bmi_scaler.joblib']

In [76]:
text_scaler = scaler5.fit(X[:,17:67])

In [79]:
joblib.dump(age_bmi_scaler,'./rgs_model/text_scaler.joblib')

['./rgs_model/text_scaler.joblib']

In [80]:
sc1 = joblib.load('./rgs_model/age_bmi_scaler.joblib')


In [83]:
sc1.transform(np.array([[50,24]]))

array([[ 0.21319462, -0.02899314]])