In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

# 读取原始数据
#df_0 = pd.read_csv("..\\temp\\K-means++聚类.csv")
df_0 = pd.read_csv("..\\temp\\DBSCAN聚类.csv")

# 提取测试集
X_0_test = df_0.drop('Cluster', axis=1)#'Cluster(K-means)'
y_0_test = df_0['Cluster']

# 创建一个函数，为每个值增加正态扰动
def add_normal_disturbances(row, num_samples=50):
    disturbances = np.random.normal(0, 0.5, (num_samples, len(row) - 1))  # 均值为0，标准差为0.5
    return np.tile(row[:-1], (num_samples, 1)) + disturbances


# 对每一行应用扰动函数，并将其添加到新数据列表中
new_data = []

for i in range(len(df_0)):
    row = df_0.iloc[i].values
    disturbances = add_normal_disturbances(row, num_samples=10)
    for j in range(len(disturbances)):
        new_data.append(list(disturbances[j]) + [row[-1]])

# 创建一个新的数据框，包含新数据
new_columns = df_0.columns.tolist()[:-1] + ['Cluster']
df = pd.DataFrame(new_data, columns=new_columns)


In [None]:
#   SVM

# 定义模型
model = SVC(random_state=42)

# 进行蒙特卡洛重抽样
results = []

num_iterations = 500
sample_size = len(df)  # 保持和原始数据集相同的样本数量

for _ in range(num_iterations):
    # 从原始数据集中随机抽取样本
    sampled_data = df.sample(n=sample_size, replace=True)

    # 划分特征和标签
    X = sampled_data.drop('Cluster', axis=1)
    y = sampled_data['Cluster']

    # 划分训练集和测试集
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    X_test = X_0_test
    y_test = y_0_test
    # 训练模型
    model.fit(X_train, y_train)

    # 预测并评估模型
    y_pred = model.predict(X_test)

    # 计算混淆矩阵
    conf_matrix = confusion_matrix(y_test, y_pred)

    # 计算准确度
    accuracy = accuracy_score(y_test, y_pred)

    # 计算精确度
    precision = precision_score(y_test, y_pred, average='weighted')

    # 计算召回率
    recall = recall_score(y_test, y_pred, average='weighted')

    # 计算F1 Score
    f1 = f1_score(y_test, y_pred, average='weighted')

    # 将结果存储起来
    results.append({
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'confusion_matrix': conf_matrix
    })

# 计算蒙特卡洛模拟的平均准确度
monte_carlo_avg_accuracy = np.mean([result['accuracy'] for result in results])

# 打印结果
print(f"Monte Carlo Average Accuracy: {monte_carlo_avg_accuracy}")

# 打印其他性能指标的平均值
avg_precision = np.mean([result['precision'] for result in results])
avg_recall = np.mean([result['recall'] for result in results])
avg_f1 = np.mean([result['f1'] for result in results])

print(f"Average Precision: {avg_precision}")
print(f"Average Recall: {avg_recall}")
print(f"Average F1 Score: {avg_f1}")


In [None]:
#  决策树

#  定义模型  
model = DecisionTreeClassifier(random_state=42)

# 进行蒙特卡洛重抽样
results = []

num_iterations = 500
sample_size = len(df)  # 保持和原始数据集相同的样本数量

for _ in range(num_iterations):
    # 从原始数据集中随机抽取样本
    sampled_data = df.sample(n=sample_size, replace=True)

    # 划分特征和标签
    X = sampled_data.drop('Cluster', axis=1)
    y = sampled_data['Cluster']

    # 划分训练集和测试集
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    X_test = X_0_test
    y_test = y_0_test
    # 训练模型
    model.fit(X_train, y_train)

    # 预测并评估模型
    y_pred = model.predict(X_test)

    # 计算混淆矩阵
    conf_matrix = confusion_matrix(y_test, y_pred)

    # 计算准确度
    accuracy = accuracy_score(y_test, y_pred)

    # 计算精确度
    precision = precision_score(y_test, y_pred, average='weighted')

    # 计算召回率
    recall = recall_score(y_test, y_pred, average='weighted')

    # 计算F1 Score
    f1 = f1_score(y_test, y_pred, average='weighted')

    # 将结果存储起来
    results.append({
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'confusion_matrix': conf_matrix
    })

# 计算蒙特卡洛模拟的平均准确度
monte_carlo_avg_accuracy = np.mean([result['accuracy'] for result in results])

# 打印结果
print(f"Monte Carlo Average Accuracy: {monte_carlo_avg_accuracy}")

# 打印其他性能指标的平均值
avg_precision = np.mean([result['precision'] for result in results])
avg_recall = np.mean([result['recall'] for result in results])
avg_f1 = np.mean([result['f1'] for result in results])

print(f"Average Precision: {avg_precision}")
print(f"Average Recall: {avg_recall}")
print(f"Average F1 Score: {avg_f1}")


In [None]:
#  SVM + 决策树混合模型

# 定义SVM模型
svm_model = SVC(random_state=42)

# 定义决策树模型
tree_model = DecisionTreeClassifier(random_state=42)

# 定义VotingClassifier
voting_model = VotingClassifier(estimators=[('svm', svm_model), ('tree', tree_model)], voting='hard')

# 进行蒙特卡洛重抽样
results = []

num_iterations = 500
sample_size = len(df)  # 保持和原始数据集相同的样本数量

for _ in range(num_iterations):
    # 从原始数据集中随机抽取样本
    sampled_data = df.sample(n=sample_size, replace=True)

    # 划分特征和标签
    X = sampled_data.drop('Cluster', axis=1)
    y = sampled_data['Cluster']

    # 划分训练集和测试集
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # 训练VotingClassifier模型
    voting_model.fit(X_train, y_train)

    # 预测并评估模型
    y_pred = voting_model.predict(X_test)

    # 计算混淆矩阵
    conf_matrix = confusion_matrix(y_test, y_pred)

    # 计算准确度
    accuracy = accuracy_score(y_test, y_pred)

    # 计算精确度
    precision = precision_score(y_test, y_pred, average='weighted')

    # 计算召回率
    recall = recall_score(y_test, y_pred, average='weighted')

    # 计算F1 Score
    f1 = f1_score(y_test, y_pred, average='weighted')

    # 将结果存储起来
    results.append({
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'confusion_matrix': conf_matrix
    })

# 计算蒙特卡洛模拟的平均准确度
monte_carlo_avg_accuracy = np.mean([result['accuracy'] for result in results])

# 打印结果
print(f"Monte Carlo Average Accuracy: {monte_carlo_avg_accuracy}")

# 打印其他性能指标的平均值
avg_precision = np.mean([result['precision'] for result in results])
avg_recall = np.mean([result['recall'] for result in results])
avg_f1 = np.mean([result['f1'] for result in results])

print(f"Average Precision: {avg_precision}")
print(f"Average Recall: {avg_recall}")
print(f"Average F1 Score: {avg_f1}")


In [None]:
#  SVM + 决策树 + 朴素贝叶斯混合模型

# 定义模型
svm_model = SVC(random_state=42)
nb_model = GaussianNB()
dt_model = DecisionTreeClassifier(random_state=42)

# 定义投票分类器
voting_model = VotingClassifier(estimators=[
    ('svm', svm_model),
    ('naive_bayes', nb_model),
    ('decision_tree', dt_model)
], voting='hard')

# 进行蒙特卡洛重抽样
results = []

num_iterations = 500
sample_size = len(df)  # 保持和原始数据集相同的样本数量

for _ in range(num_iterations):
    # 从原始数据集中随机抽取样本
    sampled_data = df.sample(n=sample_size, replace=True)

    # 划分特征和标签
    X = sampled_data.drop('Cluster', axis=1)
    y = sampled_data['Cluster']

    # 划分训练集和测试集
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # 训练模型
    voting_model.fit(X_train, y_train)

    # 预测并评估模型
    y_pred = voting_model.predict(X_test)

    # 计算混淆矩阵
    conf_matrix = confusion_matrix(y_test, y_pred)

    # 计算准确度
    accuracy = accuracy_score(y_test, y_pred)

    # 计算精确度
    precision = precision_score(y_test, y_pred, average='weighted')

    # 计算召回率
    recall = recall_score(y_test, y_pred, average='weighted')

    # 计算F1 Score
    f1 = f1_score(y_test, y_pred, average='weighted')

    # 将结果存储起来
    results.append({
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'confusion_matrix': conf_matrix
    })

# 计算蒙特卡洛模拟的平均准确度
monte_carlo_avg_accuracy = np.mean([result['accuracy'] for result in results])

# 打印结果
print(f"Monte Carlo Average Accuracy: {monte_carlo_avg_accuracy}")

# 打印其他性能指标的平均值
avg_precision = np.mean([result['precision'] for result in results])
avg_recall = np.mean([result['recall'] for result in results])
avg_f1 = np.mean([result['f1'] for result in results])

print(f"Average Precision: {avg_precision}")
print(f"Average Recall: {avg_recall}")
print(f"Average F1 Score: {avg_f1}")
