## 计算FID分数

In [1]:
import os
import re
import shutil
import random
import time
from datetime import datetime
import subprocess
import pandas as pd
from PIL import Image
from IPython.display import display

In [2]:
# 检测训练数据集有多少张图片
dataset_folder = 'Train_Dataset'  # 替换为您的文件夹路径

# 支持的图片格式列表
supported_formats = ['.jpg', '.jpeg', '.png']

# 计算图片数量
image_count = 0
for file in os.listdir(dataset_folder):
    if any(file.lower().endswith(ext) for ext in supported_formats):
        image_count += 1
        file_path = os.path.join(dataset_folder, file)
        
        # 打开图片
        with Image.open(file_path) as img:
            # 检查图片尺寸
            if img.size != (512, 512):
                # 调整图片尺寸为 512x512
                img_resized = img.resize((512, 512))
                
                # 保存调整后的图片
                img_resized.save(file_path)

print(f"Processed {image_count} images.")

# 设置根源文件夹和根目标文件夹路径
root_source_folder = 'Original_Images'  
root_target_folder = 'Selected_Images'

# 遍历根源文件夹中的所有子文件夹
for subfolder in os.listdir(root_source_folder):
    source_folder = os.path.join(root_source_folder, subfolder)
    target_folder = os.path.join(root_target_folder, subfolder)

    # 确保目标文件夹存在
    if os.path.exists(target_folder):
        # 如果文件夹存在，删除文件夹及其所有内容
        shutil.rmtree(target_folder)
    
    # 现在文件夹已经不存在，创建一个新的空文件夹
    os.makedirs(target_folder)

    # 获取源文件夹中所有文件的列表
    all_files = os.listdir(source_folder)

    # 筛选出图片文件，这里假设图片为 png 格式
    images = [file for file in all_files if file.endswith('.png')]

    # 设置随机种子
    random.seed(time.time())

    # 随机选择与训练数据集相同数量的图片
    selected_images = random.sample(images, min(len(images), image_count))
 
    # 将选中的图片复制到目标文件夹
    for image in selected_images:
        shutil.copy(os.path.join(source_folder, image), target_folder)

    #计算生成图片数据集与训练数据集之间的FID分数
    #!python -m pytorch_fid "Train_Dataset" "Selected_Images/loraModel_V1_0.4" --device cuda:0


Processed 43 images.


In [None]:
# 假设模型生成图片文件夹按照 "loraModel_V<version>_<weight>" 命名
root_models_folder = 'Selected_Images'  # 替换为包含模型文件夹的路径

## 提取模型和权重的正则表达式
model_pattern = re.compile(r'ArcadeFacadeV(\d+)_(\d+\.\d+)$')

# 检测模型文件夹并提取模型名称和权重
detected_info = set()

for folder_name in os.listdir(root_models_folder):
    match = model_pattern.match(folder_name)
    if match:
        # 提取模型版本和权重
        model_version = match.group(1)
        weight = float(match.group(2))
        
        # 添加到集合中（确保每个版本和权重的组合是唯一的）
        detected_info.add((f'ArcadeFacadeV{model_version}', weight))

# 从集合中分离模型名称和权重列表，并去重
models = sorted({model for model, weight in detected_info})
weights = sorted({weight for model, weight in detected_info})

print("Detected models:", models)
print("Detected weights:", weights)


# 初始化空的DataFrame
fid_scores = pd.DataFrame(index=weights, columns=models)

# 遍历模型和权重，执行命令，并收集FID分数
for weight in weights:
    for model in models:
        # 假设您已经准备好了对应权重和模型的生成图片文件夹
        generated_images_folder = f"Selected_Images/{model}_{weight}"
        
        # 执行命令
        result = !python -m pytorch_fid "Train_Dataset" "{generated_images_folder}" --device cuda:0
        
        # 解析FID分数
        fid_score_line = [line for line in result if 'FID:' in line]
        if fid_score_line:
            fid_score = float(fid_score_line[0].split()[-1])
            fid_scores.at[weight, model] = fid_score
        else:
            print(f"Could not find FID score for model {model} with weight {weight}")

# 显示结果
display(fid_scores)

# 获取当前时间并格式化为字符串
current_time = datetime.now().strftime("%Y%m%d_%H%M%S")

# 生成带时间戳的文件名
filename = f'fid_scores_{current_time}.csv'

# 保存DataFrame到CSV文件
fid_scores.to_csv(filename)

print(f'FID scores saved to {filename}')

## 绘制FID Scores折线图

In [None]:
import matplotlib.pyplot as plt

# 设置图表参数
colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b']

plt.figure(figsize=(20, 14))

# 绘制图表
for idx, model in enumerate(fid_scores.columns):
    plt.plot(fid_scores.index, fid_scores[model], label=model, 
             marker='o', linestyle='-', linewidth=2.5, color=colors[idx % len(colors)], markersize=8)

# Calculate Minimum FID adn Maximum
max_fid = max(fid_scores.iloc[:, 1:].max().max())
min_fid = min(fid_scores.iloc[:, 1:].min().min())

# Adding dashed horizontal lines for Minimum and Maximum FID scores
plt.axhline(y=min_fid, color='b', linestyle='--', linewidth=5.5, label="Minimum FID")
plt.axhline(y=max_fid, color='m', linestyle='--', linewidth=5.5, label="Maximum FID")

plt.title('FID Scores of LoRA models', fontsize=32)
plt.xlabel('Weight', fontsize=30)
plt.ylabel('FID Score', fontsize=30)
plt.xticks(fontsize=30)
plt.yticks(fontsize=30)

plt.legend(loc='center left', bbox_to_anchor=(1, 0.5), fontsize=20)
plt.grid(True)
plt.tight_layout()
plt.show()

## 绘制小提琴图

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

def preprocess_data(fid_scores_file, lora_train_data_file):
    # Load data
    fid_scores = pd.read_csv(fid_scores_file)
    lora_train_data = pd.read_excel(lora_train_data_file)

    # Reshape FID scores data
    fid_scores_melted = fid_scores.melt(id_vars=fid_scores.columns[0], value_name='FID Score', var_name='Model')

    # Rename columns in lora training data for clarity
    lora_train_data.columns = ['Model', 'Batch size', 'Epoch', 'Network dimension', 'Repeat', 'Learning rate', 'Optimizer type']

    # Merge FID scores with training parameters
    combined_data_with_params = fid_scores_melted.merge(lora_train_data, on='Model', how='left')

    return combined_data_with_params

def plot_combined_violin_graphs(combined_data_with_params):
    # Setting style and palette
    sns.set_style("whitegrid")
    palette_pastel = sns.color_palette("pastel")

    # Plot combined violin graphs
    fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(20, 18), facecolor='white')
    axes = axes.ravel()
    parameters = ['Batch size', 'Repeat', 'Learning rate', 'Optimizer type']
    for i, param in enumerate(parameters):
        sns.violinplot(data=combined_data_with_params, x=param, y='FID Score', split=False,
                       inner="quartile", palette=palette_pastel, ax=axes[i])
        axes[i].set_title(f"Distribution of FID Scores by {param}", fontsize=26)
        axes[i].set_ylabel("FID Score", fontsize=26)
        axes[i].set_xlabel(param, fontsize=26)
        axes[i].tick_params(labelsize=20)

        # Setting the facecolor and adding a frame around each subplot
        axes[i].set_facecolor('#f5f5f5')
        for spine in axes[i].spines.values():
            spine.set_visible(True)
            spine.set_color('black')

    plt.tight_layout()
    plt.show()

def plot_individual_violin_graphs(combined_data_with_params):
    # Setting style and palette
    sns.set_style("whitegrid")
    palette_pastel = sns.color_palette("pastel")

    # Plot separate violin graphs for each training parameter
    parameters = ['Batch size', 'Repeat', 'Learning rate', 'Optimizer type']
    for param in parameters:
        plt.figure(figsize=(10, 8))
        sns.violinplot(data=combined_data_with_params, x=param, y='FID Score', split=True,
                       inner="quartile", palette=palette_pastel)
        plt.title(f"Distribution of FID Scores by {param}", fontsize=26)
        plt.ylabel("FID Score", fontsize=26)
        plt.xlabel(param, fontsize=26)
        plt.tick_params(labelsize=20)
        plt.show()

# 数据可视化生成:
combined_data_with_params = preprocess_data('fid_scores_20240101_183753.csv', 'Lora_Train_Data.xlsx') #替换两个文件路径，第一个是fid分数表格，第二个是LoRA训练参数表格
plot_combined_violin_graphs(combined_data_with_params)
#plot_individual_violin_graphs(combined_data_with_params)


### 绘制小提琴图（手动输入训练数据版）

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt


def preprocess_data(file):
    # Load data
    fid_scores = pd.read_excel(file)

    # Reshape data
    fid_scores.columns = fid_scores.iloc[0]
    fid_scores = fid_scores.drop(0)
    fid_scores_melted = fid_scores.melt(id_vars=['Weight'], value_name='FID Score', var_name='Model')

    # Merge with training parameters
    training_params = {
        'LoRA model': ['loraModel_V1', 'loraModel_V2', 'loraModel_V3', 'loraModel_V4','loraModel_V5', 'loraModel_V6'],
        'Batch size': [3, 3, 2, 3, 2, 2],
        'Epoch': [20, 20, 20, 20, 20, 20],
        'Repeat': [6, 6, 6, 8, 8, 10],
        'Learning rate': [0.0001, 0.0001, 0.0002, 0.0001, 0.0002, 0.0002],
        'Optimizer type': ['AdamW8bit', 'Lion', 'Lion', 'AdamW8bit', 'Lion', 'Lion']
    }
    params_df = pd.DataFrame(training_params)
    params_df['LoRA model'] = params_df['LoRA model'].str.replace("ArcadeFacade", "AcadeFacde")
    combined_data_with_params = combined_data.merge(params_df, left_on='Model', right_on='LoRA model', how='left').drop(
        columns='LoRA model')

    return combined_data_with_params


def plot_combined_violin_graphs(file1, file2):
    combined_data_with_params = preprocess_data(file1, file2)

    # Setting style and palette
    sns.set_style("whitegrid")
    palette_pastel = sns.color_palette("pastel")

    # Plot combined violin graphs
    fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(20, 18), facecolor='white')
    axes = axes.ravel()
    parameters = ['Batch size', 'Repeat', 'Learning rate', 'Optimizer type']
    for i, param in enumerate(parameters):
        sns.violinplot(data=combined_data_with_params, x=param, y='FID Score', hue='Experiment', split=True,
                       inner="quartile", palette=palette_pastel, ax=axes[i])
        axes[i].set_title(f"Distribution of FID Scores by {param}", fontsize=26)
        axes[i].set_ylabel("FID Score", fontsize=26)
        axes[i].set_xlabel(param, fontsize=26)
        axes[i].legend(title='Experiment', fontsize=18, title_fontsize=18)
        axes[i].tick_params(labelsize=26)

        # Setting the facecolor to #f5f5f5 and adding a black frame around each subplot
        axes[i].set_facecolor('#f5f5f5')
        for spine in axes[i].spines.values():
            spine.set_visible(True)
            spine.set_color('black')

    plt.tight_layout()
    plt.show()


def plot_individual_violin_graphs(file1, file2):
    combined_data_with_params = preprocess_data(file1, file2)

    # Setting style and palette
    sns.set_style("whitegrid")
    palette_pastel = sns.color_palette("pastel")

    # Plot separate violin graphs for each training parameter
    parameters = ['Batch size', 'Repeat', 'Learning rate', 'Optimizer type']
    for param in parameters:
        plt.figure(figsize=(10, 8))
        sns.violinplot(data=combined_data_with_params, x=param, y='FID Score', hue='Experiment', split=True,
                       inner="quartile", palette=palette_pastel)
        plt.title(f"Distribution of FID Scores by {param}", fontsize=26)
        plt.ylabel("FID Score", fontsize=26)
        plt.xlabel(param, fontsize=26)

# To run the functions, just provide the paths to the two files:
# plot_combined_violin_graphs("path_to_file1.xlsx", "path_to_file2.xlsx")
# plot_individual_violin_graphs("path_to_file1.xlsx", "path_to_file2.xlsx")


## 制作FID分数的折线图(文件夹版)

In [6]:
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
# Load the data
file_path = 'fid_scores.csv'   # 替换为包含模型文件夹的路径
fid_scores_data = pd.read_csv(file_path, index_col=0)
fid_scores_data.index.name = 'Weight'  # 给索引列命名为 'Weight'
print(fid_scores_data.head())

In [None]:
# Softer color palette for better distinction
colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b']

# Setting the figure size for the plot
plt.figure(figsize=(20, 14))

# Plotting 
for idx, model in enumerate(fid_scores_data.columns):
    plt.plot(fid_scores_data.index, fid_scores_data[model], label=model, 
             marker='o', linestyle='-', linewidth=2.5, color=colors[idx % len(colors)], markersize=8)

# Calculate Minimum FID adn Maximum
max_fid = max(fid_scores_data.iloc[:, 1:].max().max())
min_fid = min(fid_scores_data.iloc[:, 1:].min().min())

# Adding dashed horizontal lines for Minimum and Maximum FID scores
plt.axhline(y=min_fid, color='b', linestyle='--', linewidth=5.5, label="Minimum FID")
plt.axhline(y=max_fid, color='m', linestyle='--', linewidth=5.5, label="Maximum FID")

# Setting titles and labels with font sizes
plt.title('FID Scores of LoRA models', fontsize=32)
plt.xlabel('Weight', fontsize=30)
plt.ylabel('FID Score', fontsize=30)
plt.xticks(fontsize=30)
plt.yticks(fontsize=30)

# Adjusting the legend with font size and moving it outside the plot
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5), fontsize=20)

# Displaying a grid
plt.grid(True)

# Adjusting the layout to ensure no clipping
plt.tight_layout()

# Displaying the plot
plt.show()