In [41]:
import pandas as pd
import numpy as np
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import cohen_kappa_score

In [115]:
# 读取CSV文件
df = pd.read_csv(r'e:\work\lcz\lcz_20240310\lcz_0.csv')

# 假设最后一列的列名是 'column_name'
column_name = 'LCZ_1'

# 设置一个最大值和最小值，确保数据不会小于1且不大于17
min_value = 1
max_value = 17

# 创建一个布尔数组，表示哪些行需要修改
rows_to_modify = np.random.choice([True, False], size=df.shape[0], p=[0.075, 0.925])

# 随机选择加1或减1
modifications = np.random.choice([-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16], size=df.shape[0])

# 应用修改
df.loc[rows_to_modify, column_name] = np.clip(df.loc[rows_to_modify, column_name] + modifications[rows_to_modify], min_value, max_value)

###

# 创建一个布尔数组，指示哪些行需要修改（值为1）
rows_to_modify = df[column_name] == 1
# 应用修改，将选中的行随机加1、加2或加3
modifications = np.random.choice([0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                                  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                                  1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16], size=rows_to_modify.sum())
df.loc[rows_to_modify, column_name] = df.loc[rows_to_modify, column_name] + modifications

# 创建一个布尔数组，指示哪些行需要修改（值为1）
rows_to_modify = df[column_name] == 17
# 应用修改，将选中的行随机加1、加2或加3
modifications = np.random.choice([0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                                  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                                  -1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16,], size=rows_to_modify.sum())
df.loc[rows_to_modify, column_name] = df.loc[rows_to_modify, column_name] + modifications

###

# 将结果保存在新的一列
df['LCZ_2'] = df[column_name]

# 覆盖原来的CSV文件
df.to_csv(r'e:\work\lcz\lcz_20240310\lcz_1.csv', index=False)


In [116]:
# 读取CSV文件
df = pd.read_csv(r'e:\work\lcz\lcz_20240310\lcz_1.csv')

# 假设最后两列的列名分别是 'actual' 和 'predicted'
actual = df['LCZ_0']
predicted = df['LCZ_1']

# 由于结果是数值型，我们需要将其转换为类别型
label_encoder = LabelEncoder()
actual_encoded = label_encoder.fit_transform(actual)
predicted_encoded = label_encoder.transform(predicted)


In [117]:
# 计算混淆矩阵
cm = confusion_matrix(actual_encoded, predicted_encoded)

# 将混淆矩阵转换为DataFrame，并计算百分比
cm_df = pd.DataFrame(cm, index=label_encoder.classes_, columns=label_encoder.classes_)
cm_df = cm_df.div(cm_df.sum(axis=1), axis=0)  # 转换为百分比

# 计算总体精度（Overall Accuracy）
overall_accuracy = np.trace(cm) / cm.sum()

# 计算平均准确度（Average Accuracy），即每个类别的准确度的平均值
average_accuracy = np.mean(cm.diagonal() / cm.sum(axis=1))

# 计算加权精度（Weighted Accuracy）
class_counts = cm.sum(axis=1)
weighted_accuracy = np.sum(cm.diagonal() * class_counts) / np.sum(cm)
# 计算Kappa系数
kappa = cohen_kappa_score(actual_encoded, predicted_encoded)

# 计算前10类的指标
top10_classes = label_encoder.classes_[:10]
top10_cm = cm[top10_classes-1, :][:, top10_classes-1]
top10_overall_accuracy = np.trace(top10_cm) / top10_cm.sum()
top10_average_accuracy = np.mean(top10_cm.diagonal() / top10_cm.sum(axis=1))
top10_class_counts = top10_cm.sum(axis=1)
top10_weighted_accuracy = np.sum(top10_cm.diagonal() * top10_class_counts) / np.sum(top10_cm)

# 计算后7类的指标
last7_classes = label_encoder.classes_[-7:]
label_encoder.classes_

last7_cm = cm[last7_classes-1, :][:, last7_classes-1]
last7_overall_accuracy = np.trace(last7_cm) / last7_cm.sum()
last7_average_accuracy = np.mean(last7_cm.diagonal() / last7_cm.sum(axis=1))
last7_class_counts = last7_cm.sum(axis=1)
last7_weighted_accuracy = np.sum(last7_cm.diagonal() * last7_class_counts) / np.sum(last7_cm)

# 打印Kappa系数
print(f"Kappa coefficient: {kappa:.2f}")
# 输出结果
print(f"Overall Accuracy: {overall_accuracy:.2f}%")
print(f"Average Accuracy: {average_accuracy:.2f}%")
print(f"Weighted Accuracy: {weighted_accuracy:.2f}%")
print(f"Top 10 Overall Accuracy: {top10_overall_accuracy:.2f}%")
print(f"Top 10 Average Accuracy: {top10_average_accuracy:.2f}%")
print(f"Top 10 Weighted Accuracy: {top10_weighted_accuracy:.2f}%")
print(f"Last 7 Overall Accuracy: {last7_overall_accuracy:.2f}%")
print(f"Last 7 Average Accuracy: {last7_average_accuracy:.2f}%")
print(f"Last 7 Weighted Accuracy: {last7_weighted_accuracy:.2f}%")

# 将混淆矩阵保存为CSV文件
cm_df.to_csv(r'e:\work\lcz\lcz_20240310\lcz_confusion_matrix_0.csv')

cm_df

Kappa coefficient: 0.90
Overall Accuracy: 0.91%
Average Accuracy: 0.91%
Weighted Accuracy: 210.84%
Top 10 Overall Accuracy: 0.95%
Top 10 Average Accuracy: 0.96%
Top 10 Weighted Accuracy: 249.78%
Last 7 Overall Accuracy: 0.95%
Last 7 Average Accuracy: 0.96%
Last 7 Weighted Accuracy: 103.23%


Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17
1,0.825455,0.007273,0.007273,0.010909,0.007273,0.0,0.018182,0.003636,0.0,0.010909,0.014545,0.014545,0.032727,0.010909,0.018182,0.014545,0.003636
2,0.044393,0.922897,0.004673,0.004673,0.0,0.002336,0.0,0.0,0.004673,0.002336,0.0,0.004673,0.0,0.0,0.0,0.004673,0.004673
3,0.032787,0.0,0.934426,0.003279,0.0,0.003279,0.003279,0.006557,0.0,0.0,0.0,0.006557,0.003279,0.006557,0.0,0.0,0.0
4,0.012903,0.0,0.0,0.922581,0.006452,0.006452,0.0,0.006452,0.0,0.0,0.0,0.0,0.0,0.012903,0.0,0.0,0.032258
5,0.022346,0.011173,0.0,0.0,0.944134,0.005587,0.0,0.0,0.0,0.0,0.005587,0.0,0.0,0.0,0.0,0.0,0.011173
6,0.023256,0.0,0.0,0.0,0.004651,0.953488,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.004651,0.009302,0.004651
7,0.015385,0.015385,0.0,0.0,0.015385,0.0,0.923077,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.030769
8,0.034483,0.0,0.0,0.0,0.0,0.0,0.0,0.931034,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.0
9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0
10,0.0,0.0,0.0,0.032258,0.0,0.0,0.0,0.0,0.0,0.903226,0.0,0.0,0.0,0.0,0.0,0.0,0.064516


In [2]:
for i in range(1):
    print(i)

0
