In [8]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [6]:
import os
import json
from PIL import Image
from transformers import CLIPProcessor, CLIPModel
import csv
# THIS IS FOR TRAIN IDIOM DATASET
# 加载模型和处理器
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

# 指定主文件夹路径和 JSON 文件路径
main_folder_path = "/content/drive/MyDrive/train"  # 替换为你的实际路径
json_file_path = "/content/drive/MyDrive/train_idiom_mean.json"   # 替换为你的 JSON 文件路径


# 加载 JSON 文件
with open(json_file_path, "r", encoding="utf-8") as json_file:
    compound_definitions = json.load(json_file)

# 初始化结果存储
results = []

# 遍历 JSON 中的 compound 定义
for compound_name, compound_text in compound_definitions.items():
    compound_path = os.path.join(main_folder_path, compound_name)

    # 检查文件夹是否存在
    if not os.path.isdir(compound_path):
        print(f"Skipping compound: {compound_name} (folder does not exist)")
        continue

    print(f"Processing compound: {compound_name}")

    # 将 compound 和解释拼接为一个完整文本
    combined_text = f"Compound: {compound_name}. Explanation: {compound_text}"

    # 用于存储当前 compound 的图片及其 logits
    image_logits = []

    # 遍历子文件夹中的图片
    for filename in os.listdir(compound_path):
        if filename.lower().endswith((".png", ".jpg", ".jpeg", ".bmp", ".gif")):  # 仅处理图片文件
            image_path = os.path.join(compound_path, filename)

            # 打开图片
            try:
                image = Image.open(image_path)
            except Exception as e:
                print(f"Error opening image {filename}: {e}")
                continue

            # 使用 compound 和解释拼接后的文本作为输入
            inputs = processor(
                text=[combined_text],  # 拼接后的文本输入
                images=image,
                return_tensors="pt",
                padding=True
            )

            # 获取模型输出
            outputs = model(**inputs)
            logits_per_image = outputs.logits_per_image  # 获取 logits 值

            # 打印日志信息
            print(f"Compound: {compound_name}")
            print(f"Image ID: {filename}")  # 保留文件名及后缀
            print(f"Input Text: {combined_text}")
            print(f"Logits: {logits_per_image}")
            print("-" * 50)

            # 保存图片文件名（包含后缀）和 logits
            image_logits.append((filename, logits_per_image.item()))

    # 按 logits 从大到小排序
    sorted_images = sorted(image_logits, key=lambda x: x[1], reverse=True)

    # 提取排序后的图片文件名列表
    expected_order = [img[0] for img in sorted_images]

    # 保存 compound 和排序列表
    results.append([compound_name, expected_order])

# 将结果保存为 TSV 文件
output_file = "train_idiom.tsv"
with open(output_file, "w", newline="", encoding="utf-8") as tsvfile:
    writer = csv.writer(tsvfile, delimiter="\t")
    writer.writerow(["compound", "expected_order"])  # 写入表头
    for compound, order in results:
        writer.writerow([compound, order])

print(f"Results saved to {output_file}")

Processing compound: elbow grease
Compound: elbow grease
Image ID: 74852536462.png
Input Text: Compound: elbow grease. Explanation: Hard physical effort or manual labor required to complete a task.
Logits: tensor([[25.1573]], grad_fn=<TBackward0>)
--------------------------------------------------
Compound: elbow grease
Image ID: 53378381715.png
Input Text: Compound: elbow grease. Explanation: Hard physical effort or manual labor required to complete a task.
Logits: tensor([[30.1589]], grad_fn=<TBackward0>)
--------------------------------------------------
Compound: elbow grease
Image ID: 39938261459.png
Input Text: Compound: elbow grease. Explanation: Hard physical effort or manual labor required to complete a task.
Logits: tensor([[31.1292]], grad_fn=<TBackward0>)
--------------------------------------------------
Compound: elbow grease
Image ID: 54879908369.png
Input Text: Compound: elbow grease. Explanation: Hard physical effort or manual labor required to complete a task.
Logits:

In [7]:
import os
import json
from PIL import Image
from transformers import CLIPProcessor, CLIPModel
import csv

# 加载模型和处理器
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

# 指定主文件夹路径和 JSON 文件路径
main_folder_path = "/content/drive/MyDrive/train"  # 替换为你的实际路径
json_file_path = "/content/drive/MyDrive/train_literal_mean.json"   # 替换为你的 JSON 文件路径


# 加载 JSON 文件
with open(json_file_path, "r", encoding="utf-8") as json_file:
    compound_definitions = json.load(json_file)

# 初始化结果存储
results = []

# 遍历 JSON 中的 compound 定义
for compound_name, compound_text in compound_definitions.items():
    compound_path = os.path.join(main_folder_path, compound_name)

    # 检查文件夹是否存在
    if not os.path.isdir(compound_path):
        print(f"Skipping compound: {compound_name} (folder does not exist)")
        continue

    print(f"Processing compound: {compound_name}")

    # 将 compound 和解释拼接为一个完整文本
    combined_text = f"Compound: {compound_name}. Explanation: {compound_text}"

    # 用于存储当前 compound 的图片及其 logits
    image_logits = []

    # 遍历子文件夹中的图片
    for filename in os.listdir(compound_path):
        if filename.lower().endswith((".png", ".jpg", ".jpeg", ".bmp", ".gif")):  # 仅处理图片文件
            image_path = os.path.join(compound_path, filename)

            # 打开图片
            try:
                image = Image.open(image_path)
            except Exception as e:
                print(f"Error opening image {filename}: {e}")
                continue

            # 使用 compound 和解释拼接后的文本作为输入
            inputs = processor(
                text=[combined_text],  # 拼接后的文本输入
                images=image,
                return_tensors="pt",
                padding=True
            )

            # 获取模型输出
            outputs = model(**inputs)
            logits_per_image = outputs.logits_per_image  # 获取 logits 值

            # 打印日志信息
            print(f"Compound: {compound_name}")
            print(f"Image ID: {filename}")  # 保留文件名及后缀
            print(f"Input Text: {combined_text}")
            print(f"Logits: {logits_per_image}")
            print("-" * 50)

            # 保存图片文件名（包含后缀）和 logits
            image_logits.append((filename, logits_per_image.item()))

    # 按 logits 从大到小排序
    sorted_images = sorted(image_logits, key=lambda x: x[1], reverse=True)

    # 提取排序后的图片文件名列表
    expected_order = [img[0] for img in sorted_images]

    # 保存 compound 和排序列表
    results.append([compound_name, expected_order])

# 将结果保存为 TSV 文件
output_file = "train_literal.tsv"
with open(output_file, "w", newline="", encoding="utf-8") as tsvfile:
    writer = csv.writer(tsvfile, delimiter="\t")
    writer.writerow(["compound", "expected_order"])  # 写入表头
    for compound, order in results:
        writer.writerow([compound, order])

print(f"Results saved to {output_file}")


Processing compound: green fingers
Compound: green fingers
Image ID: 88406435457.png
Input Text: Compound: green fingers. Explanation: Fingers that are colored green, either due to paint, dye, or other substances.
Logits: tensor([[27.0648]], grad_fn=<TBackward0>)
--------------------------------------------------
Compound: green fingers
Image ID: 88683770392.png
Input Text: Compound: green fingers. Explanation: Fingers that are colored green, either due to paint, dye, or other substances.
Logits: tensor([[21.0476]], grad_fn=<TBackward0>)
--------------------------------------------------
Compound: green fingers
Image ID: 10027562830.png
Input Text: Compound: green fingers. Explanation: Fingers that are colored green, either due to paint, dye, or other substances.
Logits: tensor([[25.2555]], grad_fn=<TBackward0>)
--------------------------------------------------
Compound: green fingers
Image ID: 92628846416.png
Input Text: Compound: green fingers. Explanation: Fingers that are colored 

In [3]:
import os
import json
from PIL import Image
from transformers import CLIPProcessor, CLIPModel
import csv

# 加载模型和处理器
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

# 指定主文件夹路径和 JSON 文件路径
main_folder_path = "/content/drive/MyDrive/AdMIRe Subtask A Dev/dev"  # 替换为你的实际路径
json_file_path = "/content/drive/MyDrive/dev_mean.json"   # 替换为你的 JSON 文件路径


# 加载 JSON 文件
with open(json_file_path, "r", encoding="utf-8") as json_file:
    compound_definitions = json.load(json_file)

# 初始化结果存储
results = []

# 遍历 JSON 中的 compound 定义
for compound_name, compound_text in compound_definitions.items():
    compound_path = os.path.join(main_folder_path, compound_name)

    # 检查文件夹是否存在
    if not os.path.isdir(compound_path):
        print(f"Skipping compound: {compound_name} (folder does not exist)")
        continue

    print(f"Processing compound: {compound_name}")

    # 将 compound 和解释拼接为一个完整文本
    combined_text = f"Compound: {compound_name}. Explanation: {compound_text}"

    # 用于存储当前 compound 的图片及其 logits
    image_logits = []

    # 遍历子文件夹中的图片
    for filename in os.listdir(compound_path):
        if filename.lower().endswith((".png", ".jpg", ".jpeg", ".bmp", ".gif")):  # 仅处理图片文件
            image_path = os.path.join(compound_path, filename)

            # 打开图片
            try:
                image = Image.open(image_path)
            except Exception as e:
                print(f"Error opening image {filename}: {e}")
                continue

            # 使用 compound 和解释拼接后的文本作为输入
            inputs = processor(
                text=[combined_text],  # 拼接后的文本输入
                images=image,
                return_tensors="pt",
                padding=True
            )

            # 获取模型输出
            outputs = model(**inputs)
            logits_per_image = outputs.logits_per_image  # 获取 logits 值

            # 打印日志信息
            print(f"Compound: {compound_name}")
            print(f"Image ID: {filename}")  # 保留文件名及后缀
            print(f"Input Text: {combined_text}")
            print(f"Logits: {logits_per_image}")
            print("-" * 50)

            # 保存图片文件名（包含后缀）和 logits
            image_logits.append((filename, logits_per_image.item()))

    # 按 logits 从大到小排序
    sorted_images = sorted(image_logits, key=lambda x: x[1], reverse=True)

    # 提取排序后的图片文件名列表
    expected_order = [img[0] for img in sorted_images]

    # 保存 compound 和排序列表
    results.append([compound_name, expected_order])

# 将结果保存为 TSV 文件
output_file = "dev_with_description.tsv"
with open(output_file, "w", newline="", encoding="utf-8") as tsvfile:
    writer = csv.writer(tsvfile, delimiter="\t")
    writer.writerow(["compound", "expected_order"])  # 写入表头
    for compound, order in results:
        writer.writerow([compound, order])

print(f"Results saved to {output_file}")


Processing compound: monkey business
Compound: monkey business
Image ID: 61570020623
Definition: Dishonest, mischievous, or deceitful behavior; foolish or silly actions.
Logits: tensor([[22.0505]], grad_fn=<TBackward0>)
--------------------------------------------------
Compound: monkey business
Image ID: 94990180734
Definition: Dishonest, mischievous, or deceitful behavior; foolish or silly actions.
Logits: tensor([[22.8387]], grad_fn=<TBackward0>)
--------------------------------------------------
Compound: monkey business
Image ID: 33778559524
Definition: Dishonest, mischievous, or deceitful behavior; foolish or silly actions.
Logits: tensor([[23.0462]], grad_fn=<TBackward0>)
--------------------------------------------------
Compound: monkey business
Image ID: 04129294826
Definition: Dishonest, mischievous, or deceitful behavior; foolish or silly actions.
Logits: tensor([[22.3907]], grad_fn=<TBackward0>)
--------------------------------------------------
Compound: monkey business
I