# 机器学习大作业：基于多种降维方法的心脏病诊断算法研究
## 概况：
## 八种机器学习模型（逻辑回归、决策树、随机森林、KNN、NB、SVM、XGBoost、LightGBM）
## 四种降维可视化方法（PCA、NCA、UMAP、t-SNE）
## 尝试与展望：神经网络/深度学习（MLP、CNN…)
## From：吴泽澜-2023217454

#### 调用必要的包

### 尝试与展望（神经网络/深度学习）
#### 数据集准备

In [198]:
# 将数据重塑为适合 1D CNN 的形状
X_cnn = X.reshape(X.shape[0], X.shape[1], 1)
# 划分数据集
X_cnn_train, X_cnn_test, y_cnn_train, y_cnn_test = train_test_split(X_cnn, y, test_size=0.3, random_state=42)

#### 定义神经网络模型（MLP和CNN并尝试多种神经网络）

In [199]:

# 定义 MLP 模型
def create_mlp_model(input_dim):
    model = Sequential([
        Dense(128, activation='relu', input_shape=(input_dim,)),
        Dropout(0.2),
        Dense(64, activation='relu'),
        Dropout(0.2),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])
    return model

# 定义 CNN 模型
def create_cnn_model(input_dim):
    model = Sequential([
        Reshape((input_dim, 1), input_shape=(input_dim,)),
        Conv1D(32, 3, activation='relu'),
        MaxPooling1D(2),
        Conv1D(64, 3, activation='relu'),
        MaxPooling1D(2),
        Flatten(),
        Dense(128, activation='relu'),
        Dropout(0.2),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])
    return model

# 定义神经网络模型创建函数
def create_model_1(input_dim):
    model = Sequential([
        Dense(50, activation='relu', input_shape=(input_dim,)),
        Dropout(0.2),
        Dense(40, activation='relu'),
        Dropout(0.2),
        Dense(30, activation='relu'),
        Dropout(0.2),
        Dense(20, activation='relu'),
        Dropout(0.2),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])
    return model

def create_model_2(input_dim):
    model = Sequential([
        Dense(300, activation='relu', input_shape=(input_dim,)),
        Dropout(0.2),
        Dense(100, activation='relu'),
        Dropout(0.2),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])
    return model

def create_model_3(input_dim):
    model = Sequential([
        Dense(50, activation='relu', input_shape=(input_dim,)),
        Dropout(0.2),
        Dense(20, activation='relu'),
        Dropout(0.2),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])
    return model

def create_model_4(input_dim):
    model = Sequential([
        Dense(50, activation='relu', input_shape=(input_dim,)),
        Dropout(0.2),
        Dense(50, activation='relu'),
        Dropout(0.2),
        Dense(50, activation='relu'),
        Dropout(0.2),
        Dense(40, activation='relu'),
        Dropout(0.2),
        Dense(40, activation='relu'),
        Dropout(0.2),
        Dense(30, activation='relu'),
        Dropout(0.2),
        Dense(20, activation='relu'),
        Dropout(0.2),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])
    return model

#### 神经网络模型效果评估

In [218]:
# 进行十倍交叉验证
def evaluate_nn_models(X, y, model_fn):
    skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
    accuracy_scores = []

    for train_index, val_index in skf.split(X, y):
        X_train_fold, X_val_fold = X[train_index], X[val_index]
        y_train_fold, y_val_fold = y[train_index], y[val_index]

        # 构建和训练模型
        model = model_fn(X_train_fold.shape[1])
        model.fit(X_train_fold, y_train_fold, epochs=100, batch_size=10, verbose=0)

        # 评估模型
        score = model.evaluate(X_val_fold, y_val_fold, verbose=0)[1]
        accuracy_scores.append(score)

    mean_accuracy = np.mean(accuracy_scores)
    std_accuracy = np.std(accuracy_scores)
    return mean_accuracy, std_accuracy

# 定义神经网络模型
nn_models = {
    'MLP': create_mlp_model,
    'CNN': create_cnn_model,
    'NN_Model_1': create_model_1,
    'NN_Model_2': create_model_2,
    'NN_Model_3': create_model_3,
    'NN_Model_4': create_model_4
}

# 评估神经网络模型
results_nn = {}
for name, model_fn in nn_models.items():
    mean_accuracy, std_accuracy = evaluate_nn_models(X_train, y_train, model_fn)
    results_nn[name] = (mean_accuracy, std_accuracy)
    print(f"{name}: Mean accuracy={mean_accuracy:.4f}, Std={std_accuracy:.4f}")

MLP: Mean accuracy=0.9819, Std=0.0153
CNN: Mean accuracy=0.9791, Std=0.0143
NN_Model_1: Mean accuracy=0.9721, Std=0.0216
NN_Model_2: Mean accuracy=0.9819, Std=0.0140
NN_Model_3: Mean accuracy=0.9666, Std=0.0217
NN_Model_4: Mean accuracy=0.9819, Std=0.0153
