In [None]:
import time
start_time = time.time()

# 在原始空间中求解

In [None]:
import glob
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold, cross_val_score
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import classification_report, accuracy_score

### 设置训练集和测试集数据

In [None]:
# 1 2 3
target = 3
# 对偶空间或是原空间
is_dual = True

if target==1:
    data_name = '0618'
elif target==2:
    data_name = '0854'
elif target==3:
    data_name = '1066'

# 设置训练集数据
# 获取指定的CSV文件
csv_files = glob.glob(f'./RGB_data/data_{data_name}.csv')

# 设置测试集数据
new_image_path = f'./input_data/{data_name}.png'

# TRAIN

In [None]:
# 初始化一个空的DataFrame来存储合并后的数据
data = pd.DataFrame()

# 遍历所有CSV文件并将它们合并
for file in csv_files:
    df = pd.read_csv(file)
    data = pd.concat([data, df], ignore_index=True)

# 现在all_data包含了所有CSV文件的数据
print(data.shape)

In [None]:
# 读入特征和标签
features = data.drop('Label', axis=1) # 按列操作取名为’Label以外的列
labels = data['Label']
features.shape,labels.shape,type(features),type(labels)

In [None]:
# 归一化
scaler = StandardScaler()

print(features.shape)
print(type(features))

features = scaler.fit_transform(features)
features = pd.DataFrame(features)

print(features.shape)
print(type(features))

features.shape,labels.shape,
type(features),type(labels)

In [None]:
# 划分数据集
X_train, X_test, y_train, y_test = train_test_split(features, labels,
                                     test_size=0.25, random_state=42)

## 线性SVM 原始空间

In [None]:
#搜索C
param_grid = {
    'C': [1e-4, 1e-3, 1e-2, 0.1, 1, 10, 100]
}

#创建svm分类器
#dual参数决定了是否求解对偶问题,True表示算法将在对偶空间中求解False表示算法将在原空间中求解
clf = svm.LinearSVC(dual=is_dual, random_state=42)
#通过GridSearch设置参数搜索和k折交叉验证
grid_search = GridSearchCV(estimator=clf, param_grid=param_grid
                           , cv=10, scoring='accuracy')
#模型训练
grid_search.fit(X_train, y_train)

# 输出最佳参数和最佳分数
print("Best parameters:", grid_search.best_params_)
print("Best cross-validation score:", grid_search.best_score_)

# 使用最佳参数的模型在测试集(实际上是验证集)上进行预测
clf = grid_search.best_estimator_
y_pred = clf.predict(X_test)

In [None]:
#输出训练结果
# 数据量
positive_count = data[data['Label'] == 1]['Label'].count()
negative_count = data[data['Label'] == 0]['Label'].count()

print(f'共{data.shape[0]}个数据, {positive_count}个正样本，{negative_count}个负样本\n')

# 计算并打印准确率
accuracy = accuracy_score(y_test, y_pred)
print(f'Model accuracy: {accuracy:.2f}\n')

# 计算并打印分类报告
report = classification_report(y_test, y_pred)
print('Classification Report:\n', report)

In [None]:
from joblib import dump, load

##假设 clf 是训练好的 SVM 模型
##clf = SVC().fit(X_train, y_train)

# 保存模型
dump(clf, './model/svm_model_origin.joblib')

# 加载模型
clf_loaded = load('./model/svm_model_origin.joblib')

# TEST

In [None]:
import cv2
import os
from joblib import load

In [None]:
def load_model(model_path):
    """加载训练好的SVM模型"""
    return load(model_path)

In [None]:
def preprocess_image(image):
    h, w, _ = image.shape
    
    blue_channel = image[:, :, 0].reshape(-1)
    green_channel = image[:, :, 1].reshape(-1)
    red_channel = image[:, :, 2].reshape(-1)

    img_array = np.stack((blue_channel, green_channel, red_channel), axis=1)

    print(img_array.shape)
    
    return img_array

In [None]:
# 假设你已经有了一个训练好的模型 clf 和一个 StandardScaler 对象 scaler
model_path = './model/svm_model_origin.joblib'
clf = load_model(model_path)

# 加载新的图片
new_image = cv2.imread(new_image_path)
new_image.shape, type(new_image)

In [None]:
# 将图片数据转换为模型可以处理的格式
new_features = preprocess_image(new_image) 

In [None]:
# 使用相同的StandardScaler进行标准化
new_features = scaler.transform(new_features)
new_features.shape, type(new_features)

In [None]:
# 使用模型进行预测
prediction = clf.predict(new_features)

In [None]:
#使用模型进行预测
#new_features 已经是归一化后的数组，并且形状正确
predictions = clf.predict(new_features)

#将预测结果应用到图片上
#original_image 是原始图片的NumPy数组
original_image = new_image
height, width, _ = original_image.shape
predicted_image = np.zeros_like(original_image)

for i in range(height):
    for j in range(width):
        #获取当前像素的索引
        pixel_index = i * width + j
        
        #根据预测结果设置像素的颜色
        if predictions[pixel_index] == 1:
            predicted_image[i, j] = (255, 255, 255)  # 白色
        else:
            predicted_image[i, j] = (0, 0, 0)  # 黑色

In [None]:
# 显示原始图片
plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.imshow(cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB))
plt.title('Original Image')
plt.axis('off')

# 显示预测后的图片
plt.subplot(1, 2, 2)
plt.imshow(cv2.cvtColor(predicted_image, cv2.COLOR_BGR2RGB))
plt.title('Predicted Image')
plt.axis('off')

# 显示图像
plt.show()
    
# 保存预测后的图片
new_image_name = os.path.splitext(os.path.basename(new_image_path))[0]

if is_dual == False:
    save_is_dual = 'origin'
elif is_dual == True:
    save_is_dual = 'dual'

save_path = f'./result_new/pred_{new_image_name}_{save_is_dual}_.jpg'
cv2.imwrite(save_path, predicted_image)
print(f'Predicted image saved to {save_path}')

In [None]:
end_time = time.time()
# 计算并打印运行时间
elapsed_time = end_time - start_time
print(f"Elapsed time: {elapsed_time} seconds")

In [None]:
import cv2
import numpy as np
from sklearn.metrics import confusion_matrix

def calculate_performance_metrics(y_true, y_pred):
    # 计算混淆矩阵
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
    
    # 计算像素准确率（PA）
    pa = (tp + tn) / (tp + tn + fp + fn)
    
    # 计算交并比（IoU）
    iou = tp / (tp + fp + fn)
    
    return pa, iou

def load_and_mask_binary_image(image_path):
    # 加载图像
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if image is None:
        raise ValueError(f"无法加载图像：{image_path}")
    
    # 创建掩码以仅保留二值像素（0或255）
    mask = (image == 0) | (image == 255)
    # 应用掩码并转换为二值图像
    binary_image = image.copy()
    binary_image[~mask] = 0  # 将非二值像素设置为0
    return binary_image, mask

# 输入图像的路径
test_image_path = save_path  # 测试得到的图像路径
annotated_image_path = f'./input_data/{data_name}_label.png'  # 标注的图像路径

# 加载并处理图像
y_true, true_mask = load_and_mask_binary_image(annotated_image_path)
y_pred, _ = load_and_mask_binary_image(test_image_path)

# 将图像转换为二值数组（0或1）
y_true_binary = (y_true > 0).astype(int)
y_pred_binary = (y_pred > 0).astype(int)

# 应用掩码以仅考虑二值像素
y_true_binary = y_true_binary[true_mask]
y_pred_binary = y_pred_binary[true_mask]

# 计算性能指标
pa, iou = calculate_performance_metrics(y_true_binary, y_pred_binary)

print(f"Pixel Accuracy (PA): {pa:.4f}")
print(f"Intersection over Union (IoU): {iou:.4f}")