# 一、导入数据

In [None]:
import numpy as np
import pandas as pd
from sklearn.utils import shuffle
from sklearn.preprocessing import StandardScaler,MinMaxScaler
from sklearn.linear_model import LassoCV
from scipy.stats import ttest_ind, levene
from tensorflow import keras

data_LA = pd.read_csv("input/LA_total.csv",index_col=0)
data_XA = pd.read_csv("input/XA_total.csv",index_col=0)


data_LA_ = pd.DataFrame()
columns_LA = data_LA.columns
for col in columns_LA:
    try:
        df = data_LA[col].astype(np.float64)
        data_LA_ = pd.concat([data_LA_,df],axis=1)
    except:
        pass
    continue
    
data_XA_ = pd.DataFrame()
columns_XA = data_XA.columns
for col in columns_XA:
    try:
        df = data_XA[col].astype(np.float64)
        data_XA_ = pd.concat([data_XA_,df],axis=1)
    except:
        pass
    continue


# 方差齐性
index_ = []
for col in data_LA_.columns:
    if levene(data_LA_[col],data_XA_[col])[1] > 0.05:
        if ttest_ind(data_LA_[col],data_XA_[col])[1] < 0.05:
            index_.append(col)
    else:
        if ttest_ind(data_LA_[col],data_XA_[col],equal_var=False)[1] < 0.05:
            index_.append(col)


data_L_T = data_LA_[index_]
data_X_T = data_XA_[index_]


data = pd.concat([data_L_T,data_X_T])
data = shuffle(data)

target = data.iloc[:, 0]
features = data.iloc[:,1:]
features_bk = features.copy()

transfer = StandardScaler()
features_SS = transfer.fit_transform(features_bk)
# 数据标准化很有必要

alphas_ = np.logspace(-2,0,300)


lassocv = LassoCV(alphas = alphas_,cv = 10,max_iter = 100000).fit(features_SS,target)
lassocv.alpha_
features = data[features.columns[lassocv.coef_!=0]]
features_len = len(features.columns)
print(features_len)
# print(lassocv.coef_[lassocv.coef_!=0])
features_SS =features_SS[:, pd.DataFrame(features_SS).columns[lassocv.coef_!=0]]

# 三、拆分数据集

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
train_features_full, test_features, train_target_full, test_target = train_test_split(
    features_SS, target, 
    test_size = 0.15)

In [None]:
print(train_features_full.shape)
print(train_target_full.shape)
print(test_features.shape)
print(test_target.shape)

In [None]:
train_features, validation_features, train_target, validation_target = train_test_split(
    train_features_full, train_target_full, 
    test_size = 0.15)

In [None]:
print(train_features.shape)
print(train_target.shape)
print(validation_features.shape)
print(validation_target.shape)
print(test_features.shape)
print(test_target.shape)

# 四、建立模型 

## 1. Wide 模型

In [None]:
input_  = keras.layers.Input(shape = train_features.shape[1:])
hidden1 = keras.layers.Dense(10,activation='selu')(input_)
hidden2 = keras.layers.Dense(5,activation='selu')(hidden1)
concat = keras.layers.Concatenate()([input_, hidden2])
output = keras.layers.Dense(1,activation='sigmoid')(concat)
model_wide = keras.Model(inputs=[input_], outputs=[output])

In [None]:
model_wide.summary()

In [None]:
# keras.utils.plot_model(model_wide, to_file='output/model_wide.png')

In [None]:
model_wide.compile(loss = 'binary_crossentropy', metrics = 'accuracy', optimizer='rmsprop')

In [None]:
model_wide.fit(train_features, train_target, batch_size = 20, epochs=200,
         validation_data=(validation_features, validation_target))

## 2. 多输入模型

In [None]:
train_features_A = train_features[:,:features_len//3]
train_features_B = train_features[:,features_len//3:]
validation_features_A = validation_features[:,:features_len//3]
validation_features_B = validation_features[:,features_len//3:]

In [None]:
input_A = keras.layers.Input(shape=[features_len//3], name="wide_input")
input_B = keras.layers.Input(shape=[features_len-features_len//3], name="deep_input")
hidden1_ = keras.layers.Dense(5, activation="selu")(input_B)
hidden2_ = keras.layers.Dense(3, activation="selu")(hidden1_)
concat_ = keras.layers.concatenate([input_A, hidden2_])
output_ = keras.layers.Dense(1,activation='sigmoid')(concat_)
model_combine = keras.Model(inputs=[input_A, input_B], outputs=[output_])

In [None]:
model_combine.summary()

In [None]:
# keras.utils.plot_model(model_combine,to_file='output/model_combine.png')

In [None]:
model_combine.compile(loss = 'binary_crossentropy', metrics = 'accuracy', optimizer='rmsprop')

In [None]:
model_combine.fit([train_features_A, train_features_B],train_target,batch_size = 20, epochs=150,
                 validation_data=((validation_features_A,validation_features_B), validation_target))

In [None]:
tensorboard_cb = keras.callbacks.TensorBoard(log_dir='output/logs',histogram_freq=1,embeddings_freq=1)

In [None]:
model_combine.fit((train_features_A, train_features_B),train_target,batch_size = 20, epochs=200,
                 validation_data=((validation_features_A,validation_features_B), validation_target),
                  callbacks=tensorboard_cb)