## 计算CLIP分数

In [34]:
import os
import re
import random
import time
import pandas as pd
import numpy as np
import subprocess
from io import StringIO
from PIL import Image
from IPython.display import display
from datetime import datetime
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# 设置数据集文件夹路径
dataset_folder = 'CLIP_Dataset'  # 替换为您的文件夹路径

# 遍历dataset_folder中的每个子文件夹
for folder_name in os.listdir(dataset_folder):
    # 获取原始子文件夹的完整路径
    original_folder_path = os.path.join(dataset_folder, folder_name)

    # 跳过非文件夹的项目
    if not os.path.isdir(original_folder_path):
        continue

    # 创建新文件夹名（原始文件夹名 + "_prompt"）
    new_folder_name = f"{folder_name}_prompt"
    new_folder_path = os.path.join(dataset_folder, new_folder_name)

    # 创建新文件夹（如果它还不存在）
    if not os.path.exists(new_folder_path):
        os.makedirs(new_folder_path)

    # 为原始文件夹中的每个文件在新文件夹中创建同名的.txt文件
    for file_name in os.listdir(original_folder_path):
        # 创建.txt文件并写入内容
        new_file_path = os.path.join(new_folder_path, f"{os.path.splitext(file_name)[0]}.txt")
        with open(new_file_path, 'w') as f:
            # 在这里写入您希望添加到txt文件中的内容
            f.write("arcade facade, scenery, building, outdoors, road, shop, city, window, day, street, house, shops in hallway, Roman orders, undulating parapet, convex balcony, concave baclcony, blue sky, clear, Corinthian Column, bright, tiny, east asian architecture")

print("Done processing.")

In [None]:


# 假设模型生成图片文件夹按照 "ArcadeFacadeV<version>_<weight>" 命名
root_models_folder = 'CLIP_Dataset'  # 替换为包含模型文件夹的实际路径

# 提取模型和权重的正则表达式
model_pattern = re.compile(r'ArcadeFacadeV(\d+)_(\d+\.\d+)$')

# 检测模型文件夹并提取模型名称和权重
detected_models = {}
for folder_name in os.listdir(root_models_folder):
    match = model_pattern.match(folder_name)
    if match:
        model_version = match.group(1)
        weight = float(match.group(2))
        detected_models.setdefault(f'ArcadeFacadeV{model_version}', []).append(weight)

# 对权重排序
for model in detected_models:
    detected_models[model] = sorted(detected_models[model])

# 使用模型名和权重生成多级列
column_tuples = [(model, weight) for model in detected_models for weight in detected_models[model]]
multi_index = pd.MultiIndex.from_tuples(column_tuples, names=['ArcadeFacade', 'Weight'])
df = pd.DataFrame(columns=multi_index)

# 对于每个模型和权重组合，运行评估脚本
for model, weights in detected_models.items():
    for weight in weights:
        generated_images_folder = f"{root_models_folder}/{model}_{weight}"
        generated_prompt_folder = f"{root_models_folder}/{model}_{weight}_prompt"

        cmd = f'python -m clip_score "{generated_images_folder}" "{generated_prompt_folder}"'
        proc = subprocess.run(cmd, shell=True, capture_output=True)

        # 解码输出以获取分数
        stdout_output = proc.stdout.decode('utf-8') if proc.stdout else ""
        stderr_output = proc.stderr.decode('utf-8') if proc.stderr else ""

        # 解析输出以获取分数
        clip_scores = re.findall(r"CLIP Score for image \d+: ([\d\.]+)", stdout_output)
        if not clip_scores:
            clip_scores = re.findall(r"CLIP Score for image \d+: ([\d\.]+)", stderr_output)
        
        if clip_scores:
            clip_scores = [float(score) for score in clip_scores]
            # 确保每个分数都有一个对应的行
            for i, score in enumerate(clip_scores, start=1):
                index = f'CLIPScore{i}'
                if index not in df.index:
                    df.loc[index] = pd.Series(dtype='float64')  # 初始化行，确保dtype正确
                df.at[index, (model, weight)] = score

# 显示或保存 DataFrame
print(df)


In [None]:
# 保存DataFrame到CSV文件
current_time = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f'clip_scores_{current_time}.csv'
df.to_csv(filename)
print(f'CLIP scores saved to {filename}')

## 绘制箱型图

In [None]:
# Load the data file
file_path = "clip_scores_20240103_124724.csv" #替换刚刚生成的文件路径
df = pd.read_csv(file_path, header=[0, 1], index_col=0)

# Melt the DataFrame to long format for easier plotting
df_long = df.melt(ignore_index=False, var_name=['ArcadeFacade', 'Weight'], value_name='Score')

# Reset the index to get CLIPScore as a column
df_long = df_long.reset_index()

# Convert 'Weight' and 'Score' to numeric
df_long['Weight'] = pd.to_numeric(df_long['Weight'], errors='coerce')
df_long['Score'] = pd.to_numeric(df_long['Score'], errors='coerce')

# Define custom colors for the boxplot
colors = ['#E2EFDA', '#B4C6E7', '#F8CBAD', '#FFE699', '#BDD7EE']

# Plot the boxplot
plt.figure(figsize=(4380/300, 3179/300))
sns.boxplot(x='ArcadeFacade', y='Score', hue='Weight', data=df_long, linewidth=1.2, palette=colors)
plt.title('CLIP Scores Across Different LoraModel Versions and Weights', fontsize=22)
plt.xlabel('ArcadeFacade Version', fontsize=20)
plt.ylabel('CLIP Score', fontsize=20)
plt.xticks(fontsize=18) 
plt.yticks(fontsize=18)  # Setting y-axis tick labels with font size
plt.legend(title='Weight', title_fontsize='18', fontsize='16',  loc='lower right')
#plt.xticks(rotation=45)  # Rotate model names for better readability
plt.tight_layout()
plt.show()

## 绘制小提琴图

In [None]:
#请自行更换lora训练数据

def generate_full_plots(file_path_1, file_path_2):
    # Load the data
    data1 = pd.read_excel(file_path_1)
    data2 = pd.read_excel(file_path_2)

    # Combine the data from the two experiments
    data1['Experiment'] = '1.0'
    data2['Experiment'] = '2.0'
    combined_data_all = pd.concat([data1, data2])

    # Defining the necessary variables and reprocessing the data
    models = ['ArcadeFacadeV2.1', 'ArcadeFacadeV2.2', 'ArcadeFacadeV2.3', 'ArcadeFacadeV2.4', 'ArcadeFacadeV2.5',
              'ArcadeFacadeV2.6']
    weights = ['Weight 1', 'Weight 2', 'Weight 3', 'Weight 4', 'Weight 5']
    parameters = ['Batch size', 'Repeat', 'Learning rate', 'Optimizer type']
    param_mapping = {
        'ArcadeFacadeV2.1': {'Batch size': 3, 'Learning rate': 0.0001, 'Optimizer type': 'AdamW8bit', 'Repeat': 6},
        'ArcadeFacadeV2.2': {'Batch size': 3, 'Learning rate': 0.0001, 'Optimizer type': 'AdamW8bit', 'Repeat': 8},
        'ArcadeFacadeV2.3': {'Batch size': 3, 'Learning rate': 0.0001, 'Optimizer type': 'Lion', 'Repeat': 6},
        'ArcadeFacadeV2.4': {'Batch size': 2, 'Learning rate': 0.0002, 'Optimizer type': 'Lion', 'Repeat': 6},
        'ArcadeFacadeV2.5': {'Batch size': 2, 'Learning rate': 0.0002, 'Optimizer type': 'Lion', 'Repeat': 10},
        'ArcadeFacadeV2.6': {'Batch size': 2, 'Learning rate': 0.0002, 'Optimizer type': 'Lion', 'Repeat': 8}
    }

    # Reformatting the data
    all_data = []
    for model in models:
        start_col = combined_data_all.columns.get_loc(model)
        end_col = start_col + 5
        model_data = combined_data_all.iloc[:, start_col:end_col]
        model_data.columns = weights
        model_data['Model'] = model
        model_data['Experiment'] = combined_data_all['Experiment']
        all_data.append(model_data)
    formatted_data = pd.concat(all_data)

    # Melting the data for plotting
    melted_data = formatted_data.melt(id_vars=['Model', 'Experiment'], value_vars=weights, var_name='Weight',
                                      value_name='CLIPScore')

    # Mapping training parameters to the data
    for param in parameters:
        melted_data[param] = melted_data['Model'].map(lambda x: param_mapping[x][param])

    # Define a new comfortable color palette
    palette_comfortable = sns.color_palette("pastel")

    # Generate the combined plot
    fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(22, 20), facecolor='white')
    axes = axes.ravel()

    for i, param in enumerate(parameters):
        sns.violinplot(x=param, y='CLIPScore', hue='Experiment', data=melted_data, split=True, inner="quartile",
                       palette=palette_comfortable, ax=axes[i])
        axes[i].set_title(f"Distribution of CLIPScores by {param}", fontsize=28)
        axes[i].set_ylabel("CLIPScore", fontsize=28)
        axes[i].set_xlabel(param, fontsize=28)
        axes[i].legend(title='Experiment', fontsize=18, title_fontsize=18)
        axes[i].tick_params(labelsize=28)

        # Setting the facecolor to white and adding a black frame around each subplot
        axes[i].set_facecolor('white')
        for spine in axes[i].spines.values():
            spine.set_visible(True)
            spine.set_color('black')

    plt.tight_layout()
    plt.show()

