In [None]:
import yaml
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
from sklearn.preprocessing import PowerTransformer
import seaborn as sns
from scipy.integrate import simps
from scipy.stats import gaussian_kde
from matplotlib import rcParams
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"

# 设置全局字体和字号
# rcParams['font.family'] = 'Times New Roman'
rcParams['font.size'] = 20
rcParams['axes.titlesize'] = 20
rcParams['axes.labelsize'] = 20


def rgb_to_normalized(r,g,b):
    return (r / 255, g / 255, b / 255)
DATASET= 'CWRU'
SRC_COLOR = '#7577A1'
TRG_COLOR = '#CEA86A'
classes_set=None

yaml_file = f"{DATASET}/{DATASET}.yml"
yaml_path = os.path.join("../dataset", yaml_file)
with open(yaml_path, 'r') as file:
    classes_set = yaml.safe_load(file)['classes_set']

# all scenarios
scenarios = list(classes_set.keys())

DA_source_feature_dict = dict()
DA_source_labels_dict = dict()
DA_target_feature_dict = dict()
DA_target_labels_dict = dict()
NoDA_source_feature_dict = dict()
NoDA_source_labels_dict = dict()
NoDA_target_feature_dict = dict()
NoDA_target_labels_dict = dict()

src_index = [2,3,3,2,2,2,1,1,1,1]
trg_index=[0,2,0,0,1,1,0,0,0,0]
class_index = [1,4,1,5,5,1,6,5,1,2]
invert_index = [1,0,1,1,0,1,0,0,1,1]
# invert_index = [0,0,0,0,0,0,0,0,0,0]

for i in range(len(src_index)):
    src_id=src_index[i]
    trg_id=trg_index[i]
    cls_id=class_index[i]
    invert=invert_index[i]
    # 加载数据
    DA_source_feature_dict[src_id]=torch.load(f'./distribution/{DATASET}/DA_features_{DATASET}_{src_id}.pth').cpu() # Ns,192
    DA_source_labels_dict[src_id]=torch.load(f'./distribution/{DATASET}/DA_labels_{DATASET}_{src_id}.pth').cpu() # Ns
    DA_target_feature_dict[trg_id]=torch.load(f'./distribution/{DATASET}/DA_features_{DATASET}_{trg_id}.pth').cpu() # Nt,192
    DA_target_labels_dict[trg_id]=torch.load(f'./distribution/{DATASET}/DA_labels_{DATASET}_{trg_id}.pth').cpu() # Nt
    NoDA_source_feature_dict[src_id]=torch.load(f'./distribution/{DATASET}/NoDA_features_{DATASET}_{src_id}.pth').cpu() # Ns,192
    NoDA_source_labels_dict[src_id]=torch.load(f'./distribution/{DATASET}/NoDA_labels_{DATASET}_{src_id}.pth').cpu() # Ns
    NoDA_target_feature_dict[trg_id]=torch.load(f'./distribution/{DATASET}/NoDA_features_{DATASET}_{trg_id}.pth').cpu() # Nt,192
    NoDA_target_labels_dict[trg_id]=torch.load(f'./distribution/{DATASET}/NoDA_labels_{DATASET}_{trg_id}.pth').cpu() # Nt 
    scaler = PowerTransformer(method='yeo-johnson') 
    # noDA下同类的分布直方图看差异
    NoDA_src = NoDA_source_feature_dict[src_id][NoDA_source_labels_dict[src_id]==cls_id]
    NoDA_trg = NoDA_target_feature_dict[trg_id][NoDA_target_labels_dict[trg_id]==cls_id]
    gaussian_NoDA_src = scaler.fit_transform(NoDA_src.numpy())
    gaussian_NoDA_trg = scaler.fit_transform(NoDA_trg.numpy())
    # DA下同类的分布直方图看差异
    DA_src = DA_source_feature_dict[src_id][DA_source_labels_dict[src_id]==cls_id]
    DA_trg = DA_target_feature_dict[trg_id][DA_target_labels_dict[trg_id]==cls_id]
    gaussian_DA_src = scaler.fit_transform(DA_src.numpy())
    gaussian_DA_trg = scaler.fit_transform(DA_trg.numpy())
    
    # KDE估计
    kde_NoDA_src = gaussian_kde(gaussian_NoDA_src[:,6])
    kde_NoDA_trg = gaussian_kde(gaussian_NoDA_trg[:,6])
    kde_DA_src = gaussian_kde(gaussian_DA_src[:,6])
    kde_DA_trg = gaussian_kde(gaussian_DA_trg[:,6])
    # 各自积分
    x_NoDA_src = np.linspace(gaussian_NoDA_src[:, 6].min(), gaussian_NoDA_src[:, 6].max(), 500)
    y_NoDA_src = kde_NoDA_src.evaluate(x_NoDA_src)
    x_NoDA_trg = np.linspace(gaussian_NoDA_trg[:, 6].min(), gaussian_NoDA_trg[:, 6].max(), 500)
    y_NoDA_trg = kde_NoDA_trg.evaluate(x_NoDA_trg)
    # 交叉部分统一x的范围
    x_NoDA_min = min(x_NoDA_src.min(), x_NoDA_trg.min())
    x_NoDA_max = max(x_NoDA_src.max(), x_NoDA_trg.max())
    x_NoDA_vals = np.linspace(x_NoDA_min, x_NoDA_max, 500)
    
    # 交叉部分面积
    y_jiaocha_NoDA_src = kde_NoDA_src.evaluate(x_NoDA_vals)
    y_jiaocha_NoDA_trg = kde_NoDA_trg.evaluate(x_NoDA_vals)
    overlap_NoDA = simps(np.minimum(y_NoDA_src, y_NoDA_trg), x_NoDA_vals)
    
    # 各自积分
    x_DA_src = np.linspace(gaussian_DA_src[:, 6].min(), gaussian_DA_src[:, 6].max(), 500)
    y_DA_src = kde_DA_src.evaluate(x_DA_src)
    x_DA_trg = np.linspace(gaussian_DA_trg[:, 6].min(), gaussian_DA_trg[:, 6].max(), 500)
    y_DA_trg = kde_DA_trg.evaluate(x_DA_trg)
    # 交叉部分统一x的范围
    x_DA_min = min(x_DA_src.min(), x_DA_trg.min())
    x_DA_max = max(x_DA_src.max(), x_DA_trg.max())
    x_DA_vals = np.linspace(x_DA_min, x_DA_max, 500)
    
    # 交叉部分面积
    
    y_jiaocha_DA_src = kde_DA_src.evaluate(x_DA_vals)
    y_jiaocha_DA_trg = kde_DA_trg.evaluate(x_DA_vals)
    overlap_DA = simps(np.minimum(y_DA_src, y_DA_trg), x_DA_vals)
    
    
    NoDA_src_area = simps(y_NoDA_src, x_NoDA_src)
    NoDA_trg_area = simps(y_NoDA_trg, x_NoDA_trg)
    DA_src_area = simps(y_DA_src, x_DA_src)
    DA_trg_area = simps(y_DA_trg, x_DA_trg)
    
    a= overlap_DA/DA_src_area
    b= overlap_DA/DA_trg_area
    c= overlap_NoDA/NoDA_src_area
    d= overlap_NoDA/NoDA_trg_area
    
    if invert:
        fig, axes = plt.subplots(1, 2, figsize=(12, 8))
        
        sns.kdeplot(gaussian_DA_src[:,6], label=f'Src',
                    ax=axes[0],color=rgb_to_normalized(66,146,197),fill=True, alpha=0.4)
        sns.kdeplot(gaussian_DA_trg[:,6], label=f'Trg',
                    ax=axes[0],color=rgb_to_normalized(177,49,51),fill=True, alpha=0.4)
        axes[0].legend()
        axes[0].set_title('Baseline')
        sns.kdeplot(gaussian_NoDA_src[:,6], label=f'Src',
                                 ax=axes[1],color=rgb_to_normalized(66,146,197),fill=True, alpha=0.4)
        sns.kdeplot(gaussian_NoDA_trg[:,6], label=f'Trg',
                                 ax=axes[1],color=rgb_to_normalized(177,49,51),fill=True, alpha=0.4)
        axes[1].legend()
        axes[1].set_title('TSFA')
        # fig.suptitle(f'NoDA_vs_DA_{DATASET}_feature6_{src_id}_{trg_id}_{cls_id}')
        plt.show()

        print(f'NoDA_src_area:{DA_src_area}, NoDA_trg_area:{DA_trg_area},overlap_NoDA:{overlap_DA},src:{a},trg:{b}')
        print(f'DA_src_area:{NoDA_src_area}, DA_trg_area:{NoDA_trg_area},overlap_DA:{overlap_NoDA},src:{c},trg:{d}')

        fig.savefig(f'./distribution_pic/{DATASET}/NoDA_vs_DA_{DATASET}_feature6_src{src_id}_trg{trg_id}_class{cls_id}.png',dpi=1200)      
        plt.close()
        
    else:
        fig, axes = plt.subplots(1, 2, figsize=(16, 8))
        sns.kdeplot(gaussian_NoDA_src[:,6], label=f'Src',
                    ax=axes[0],color=rgb_to_normalized(66,146,197),fill=True, alpha=0.4)
        sns.kdeplot(gaussian_NoDA_trg[:,6], label=f'Trg',
                    ax=axes[0],color=rgb_to_normalized(177,49,51),fill=True, alpha=0.4)
        axes[0].set_title('Before Domain Adaptation')
        axes[0].legend()
        sns.kdeplot(gaussian_DA_src[:,6], label=f'Src',
                    ax=axes[1],color=rgb_to_normalized(66,146,197),fill=True, alpha=0.4)
        sns.kdeplot(gaussian_DA_trg[:,6], label=f'Trg',
                    ax=axes[1],color=rgb_to_normalized(177,49,51),fill=True, alpha=0.4)
        axes[1].legend()
        axes[1].set_title('After Domain Adaptation')
        # fig.suptitle(f'NoDA_vs_DA_{DATASET}_feature6_{src_id}_{trg_id}_{cls_id}')
        plt.show()

        print(f'NoDA_src_area:{NoDA_src_area}, NoDA_trg_area:{NoDA_trg_area},overlap_NoDA:{overlap_NoDA},src:{c},trg:{d}')
        print(f'DA_src_area:{DA_src_area}, DA_trg_area:{DA_trg_area},overlap_DA:{overlap_DA},src:{a},trg:{b}')
        fig.savefig(f'./distribution_pic/{DATASET}/NoDA_vs_DA_{DATASET}_feature6_src{src_id}_trg{trg_id}_class{cls_id}.pdf',
                    dpi=300,
                    bbox_inches='tight',
                    format='pdf')      
        plt.close()