In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import matplotlib.pyplot as plt
from sklearn.tree import plot_tree

# 读取合并的数据
data = pd.read_excel('E:\\数学建模国赛\\2022数学建模赛题\\C题\\一二表单合并数据.xlsx')

# 删除无用列
data.drop(columns=['Unnamed: 0'], inplace=True)

# 定义函数以处理风化或未风化组的数据
def process_group(data, group_condition):
    group_data = data[data['表面风化'] == group_condition]
    X = group_data.drop(columns=['类型', '表面风化', '文物采样点'])
    y = group_data['类型']
    return train_test_split(X, y, test_size=0.3, random_state=42)

# 训练决策树模型
def train_decision_tree(X_train, y_train):
    clf = DecisionTreeClassifier(random_state=42)
    clf.fit(X_train, y_train)
    return clf

# 分离风化和未风化组
X_train_weathered, X_test_weathered, y_train_weathered, y_test_weathered = process_group(data, '风化')
X_train_unweathered, X_test_unweathered, y_train_unweathered, y_test_unweathered = process_group(data, '无风化')

# 训练风化和未风化决策树模型
clf_weathered = train_decision_tree(X_train_weathered, y_train_weathered)
clf_unweathered = train_decision_tree(X_train_unweathered, y_train_unweathered)

# 读取表单三数据
uploaded_excel_path = 'E:\\数学建模国赛\\2022数学建模赛题\\C题\\附件.xlsx'
new_data = pd.read_excel(uploaded_excel_path, sheet_name='表单3')
new_data  = new_data .fillna(0)
# 分类新数据
def classify_new_data(row):
    features = row.drop(labels=['文物编号', '表面风化'])
    if row['表面风化'] == '风化':
        return clf_weathered.predict([features])[0]
    else:
        return clf_unweathered.predict([features])[0]

new_data['预测类型'] = new_data.apply(classify_new_data, axis=1)

# 显示预测结果
new_data[['文物编号', '表面风化', '预测类型']]




Unnamed: 0,文物编号,表面风化,预测类型
0,A1,无风化,高钾
1,A2,风化,铅钡
2,A3,无风化,铅钡
3,A4,无风化,铅钡
4,A5,风化,铅钡
5,A6,风化,高钾
6,A7,风化,高钾
7,A8,无风化,铅钡


In [2]:
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

# 提取数值特征
numeric_features = new_data.select_dtypes(include=['number'])

# 标准化数据
scaler = StandardScaler()
standardized_data = scaler.fit_transform(numeric_features)

# 执行K均值聚类
kmeans = KMeans(n_clusters=2, random_state=42)
clusters = kmeans.fit_predict(standardized_data)

# 将聚类结果添加到新数据表中
new_data['Cluster'] = clusters

# 显示带有聚类结果的前几行数据
new_data



Unnamed: 0,文物编号,表面风化,二氧化硅(SiO2),氧化钠(Na2O),氧化钾(K2O),氧化钙(CaO),氧化镁(MgO),氧化铝(Al2O3),氧化铁(Fe2O3),氧化铜(CuO),氧化铅(PbO),氧化钡(BaO),五氧化二磷(P2O5),氧化锶(SrO),氧化锡(SnO2),二氧化硫(SO2),预测类型,Cluster
0,A1,无风化,78.45,0.0,0.0,6.08,1.86,7.23,2.15,2.11,0.0,0.0,1.06,0.03,0.0,0.51,高钾,0
1,A2,风化,37.75,0.0,0.0,7.63,0.0,2.33,0.0,0.0,34.3,0.0,14.27,0.0,0.0,0.0,铅钡,1
2,A3,无风化,31.95,0.0,1.36,7.19,0.81,2.93,7.06,0.21,39.58,4.69,2.68,0.52,0.0,0.0,铅钡,1
3,A4,无风化,35.47,0.0,0.79,2.89,1.05,7.07,6.45,0.96,24.28,8.31,8.45,0.28,0.0,0.0,铅钡,1
4,A5,风化,64.29,1.2,0.37,1.64,2.34,12.75,0.81,0.94,12.23,2.16,0.19,0.21,0.49,0.0,铅钡,0
5,A6,风化,93.17,0.0,1.35,0.64,0.21,1.52,0.27,1.73,0.0,0.0,0.21,0.0,0.0,0.0,高钾,0
6,A7,风化,90.83,0.0,0.98,1.12,0.0,5.06,0.24,1.17,0.0,0.0,0.13,0.0,0.0,0.11,高钾,0
7,A8,无风化,51.12,0.0,0.23,0.89,0.0,2.12,0.0,9.01,21.24,11.34,1.46,0.31,0.0,2.26,铅钡,1
