# 一、导入数据

In [1]:
import numpy as np
import pandas as pd
from sklearn.utils import shuffle
from sklearn.preprocessing import StandardScaler,MinMaxScaler
from sklearn.linear_model import LassoCV
from scipy.stats import ttest_ind, levene
from tensorflow import keras

data_LA = pd.read_csv("input/LA_total.csv",index_col=0)
data_XA = pd.read_csv("input/XA_total.csv",index_col=0)


data_LA_ = pd.DataFrame()
columns_LA = data_LA.columns
for col in columns_LA:
    try:
        df = data_LA[col].astype(np.float64)
        data_LA_ = pd.concat([data_LA_,df],axis=1)
    except:
        pass
    continue
    
data_XA_ = pd.DataFrame()
columns_XA = data_XA.columns
for col in columns_XA:
    try:
        df = data_XA[col].astype(np.float64)
        data_XA_ = pd.concat([data_XA_,df],axis=1)
    except:
        pass
    continue


# 方差齐性
index_ = []
for col in data_LA_.columns:
    if levene(data_LA_[col],data_XA_[col])[1] > 0.05:
        if ttest_ind(data_LA_[col],data_XA_[col])[1] < 0.05:
            index_.append(col)
    else:
        if ttest_ind(data_LA_[col],data_XA_[col],equal_var=False)[1] < 0.05:
            index_.append(col)


data_L_T = data_LA_[index_]
data_X_T = data_XA_[index_]


data = pd.concat([data_L_T,data_X_T])
data = shuffle(data)

target = data.iloc[:, 0]
features = data.iloc[:,1:]
features_bk = features.copy()

transfer = StandardScaler()
features_SS = transfer.fit_transform(features_bk)
# 数据标准化很有必要

alphas_ = np.logspace(-2,0,300)


lassocv = LassoCV(alphas = alphas_,cv = 10,max_iter = 100000).fit(features_SS,target)
lassocv.alpha_
features = data[features.columns[lassocv.coef_!=0]]
print(len(features.columns))
# print(lassocv.coef_[lassocv.coef_!=0])

features_SS =features_SS[:, pd.DataFrame(features_SS).columns[lassocv.coef_!=0]]

2024-04-01 18:41:23.645123: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  W = numer / denom
  if ttest_ind(data_LA_[col],data_XA_[col],equal_var=False)[1] < 0.05:


8


# 三、拆分数据集

In [2]:
from sklearn.model_selection import train_test_split

In [3]:
train_features_full, test_features, train_target_full, test_target = train_test_split(
    features_SS, target, 
    test_size = 0.15)

In [4]:
print(train_features_full.shape)
print(train_target_full.shape)
print(test_features.shape)
print(test_target.shape)

(117, 8)
(117,)
(21, 8)
(21,)


In [5]:
train_features, validation_features, train_target, validation_target = train_test_split(
    train_features_full, train_target_full, 
    test_size = 0.15)

In [6]:
print(train_features.shape)
print(train_target.shape)
print(validation_features.shape)
print(validation_target.shape)
print(test_features.shape)
print(test_target.shape)

(99, 8)
(99,)
(18, 8)
(18,)
(21, 8)
(21,)


# 四、建立模型

In [7]:
clf = keras.Sequential()
clf.add(keras.layers.InputLayer( input_shape = train_features.shape[1:]))
clf.add(keras.layers.Dense(10,activation='selu'))
clf.add(keras.layers.Dense(5,activation='selu'))
clf.add(keras.layers.Dense(1,activation='sigmoid'))

2024-04-01 18:41:29.577865: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.


In [8]:
clf.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 10)                90        
                                                                 
 dense_1 (Dense)             (None, 5)                 55        
                                                                 
 dense_2 (Dense)             (None, 1)                 6         
                                                                 
Total params: 151
Trainable params: 151
Non-trainable params: 0
_________________________________________________________________


In [9]:
# keras.utils.plot_model(clf,to_file='output/nn.png')

# 五、编辑训练模型

In [10]:
clf.compile(loss = 'binary_crossentropy', metrics = 'accuracy', optimizer='rmsprop')

In [11]:
clf.fit(train_features, train_target, batch_size = 10, epochs=100,
         validation_data=(validation_features, validation_target))

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x7f26203e0e50>

In [12]:
clf.evaluate(test_features, test_target)



[0.4706073999404907, 0.8571428656578064]

In [13]:
clf.predict(test_features)



array([[0.70817214],
       [0.8570714 ],
       [0.71779776],
       [0.9750911 ],
       [0.3568454 ],
       [0.94999677],
       [0.9793155 ],
       [0.35433143],
       [0.7132036 ],
       [0.6652212 ],
       [0.6025863 ],
       [0.4319135 ],
       [0.59809804],
       [0.51310515],
       [0.3814942 ],
       [0.47189143],
       [0.4207037 ],
       [0.5277746 ],
       [0.90612197],
       [0.5110991 ],
       [0.7407737 ]], dtype=float32)

In [14]:
# clf.predict_classes(test_features)

predict_x=clf.predict(test_features) 
classes_x=np.argmax(predict_x,axis=1)



In [15]:
(clf.predict(test_features) > 0.5).astype("int32")



array([[1],
       [1],
       [1],
       [1],
       [0],
       [1],
       [1],
       [0],
       [1],
       [1],
       [1],
       [0],
       [1],
       [1],
       [0],
       [0],
       [0],
       [1],
       [1],
       [1],
       [1]], dtype=int32)

In [16]:
from sklearn.metrics import accuracy_score

In [17]:
accuracy_score(test_target,(clf.predict(test_features) > 0.5).astype("int32"))



0.8571428571428571

# 六、callbacks

In [18]:
checkpoint_cb = keras.callbacks.ModelCheckpoint("output/clf.h5", save_best_only=True, monitor='val_loss')
clf.fit(train_features, train_target, batch_size = 10, epochs=100,
         validation_data=(validation_features, validation_target),
         callbacks = checkpoint_cb)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x7f26201ea0d0>

In [19]:
early_stopping_cb = keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)
clf.fit(train_features, train_target, batch_size = 20, epochs=200,
         validation_data=(validation_features, validation_target),
         callbacks = early_stopping_cb)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200


<keras.callbacks.History at 0x7f26255ec590>

In [20]:
clf.compile(loss = 'binary_crossentropy', metrics = 'accuracy', 
              optimizer=keras.optimizers.RMSprop(learning_rate = 0.0001))

In [21]:
lr_scheduler = keras.callbacks.ReduceLROnPlateau(factor=0.5, patience=5)
clf.fit(train_features, train_target, batch_size = 10, epochs=100,
         validation_data=(validation_features, validation_target),
         callbacks = [lr_scheduler,checkpoint_cb])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x7f2618191d50>

In [22]:
clf.save("output/clf_1.h5")

In [23]:
clf = keras.models.load_model("output/clf.h5")