In [9]:
import pandas as pd
import glob
from PIL import Image
import json

In [10]:
excel_filename = "画像300枚.xlsx"

image_top_dir = "/autofs/diamond2/share/users/tuchiya/300image"
sheet2dir = {"ポジティブ": "pogtive", "ネガティブ": "negative", "その他": "sonnota"}
sheet2cname = {"ポジティブ": "positive", "ネガティブ": "negative", "その他": "other"}

image_output_dir = "./images"
output_json_filename = "./captions.json"

In [11]:
# 画像のフルパスを取得しておく
sheet2image_paths = {}
for sheet_name, sheet_dir in sheet2dir.items():
    image_dir = f"{image_top_dir}/{sheet_dir}"
    image_paths = glob.glob(f"{image_dir}/*")
    sheet2image_paths[sheet_name] = image_paths

In [12]:
new_image_id = 0
new_caption_id = 0

# 元画像と新画像の対応表
image_info_orig2new = {}

# シートごとの処理
results = []

# シート名
for sheet_name in sheet2dir.keys():
    # シートの読み込み
    df = pd.read_excel(excel_filename, sheet_name=sheet_name)
    for i, row in df.iterrows():
        # 画像ファイルのパスを取得
        image_filename = row.iloc[1]
        # ファイル名が数字だけだと文字列では無いのでタイプを変更する        
        if type(image_filename) != str:
            image_filename = str(image_filename)
        # 拡張子がない場合はjpgを付与する
        if '.' not in image_filename:
            image_filename = f"{image_filename}.jpg"
        # 実際の画像のフルパス
        image_path = f"{image_top_dir}/{sheet2dir[sheet_name]}/{image_filename}"
        # ファイルが見つからなければスキップ
        if image_path not in sheet2image_paths[sheet_name]:
            print(f"image_path: {image_path} not found")
            continue 
        # すでに読み込み済みの画像だった場合は，書き出した画像の情報を取得する
        if image_path in image_info_orig2new:
            print(f"image_path: {image_path} already exists")
            new_image_info = image_info_orig2new[image_path]["image_path"]
            new_image_path = new_image_info["image_path"]
            new_image_width = new_image_info["width"]
            new_image_height = new_image_info["height"]
        else:
            new_image_path = f"{image_output_dir}/{new_image_id:04d}.png"
            image = Image.open(image_path)
            # 画像の幅，高さの大きい方を300にする
            if image.size[0] > image.size[1]:
                new_image_width = 300
                new_image_height = int(image.size[1] * 300 / image.size[0])
            else:
                new_image_width = int(image.size[0] * 300 / image.size[1])
                new_image_height = 300
            image = image.resize((new_image_width, new_image_height))
            image.save(new_image_path, "PNG")
            image_info_orig2new[image_path] = {
                "image_path": new_image_path, 
                "width": image.size[0], 
                "height": image.size[1]}
            new_image_id = new_image_id + 1
        # キャプションを取得
        texts = row.iloc[2:5].to_list()
        texts = [text for text in texts if not pd.isnull(text)]
        # キャプションの結果を書き出す
        for text in texts:
            results.append({
                "id": new_caption_id,
                "image_original_path": image_path, 
                "image_path": new_image_path, 
                "caption": text, 
                "category": sheet2cname[sheet_name],
                "width": image_info_orig2new[image_path]["width"],
                "height": image_info_orig2new[image_path]["height"],
            })
            new_caption_id += 1

image_path: /autofs/diamond2/share/users/tuchiya/300image/pogtive/MV5BMTk1NGFhNDctMWQ4MC00ZmUxLWEwZWQtNGQ3MzI4ZjJmYzI1XkEyXkFqcGdeQXVyODI2MDA4NQ@@._V1_ not found
image_path: /autofs/diamond2/share/users/tuchiya/300image/pogtive/nan.jpg not found
image_path: /autofs/diamond2/share/users/tuchiya/300image/pogtive/nan.jpg not found
image_path: /autofs/diamond2/share/users/tuchiya/300image/pogtive/nan.jpg not found
image_path: /autofs/diamond2/share/users/tuchiya/300image/pogtive/nan.jpg not found
image_path: /autofs/diamond2/share/users/tuchiya/300image/pogtive/nan.jpg not found
image_path: /autofs/diamond2/share/users/tuchiya/300image/pogtive/nan.jpg not found
image_path: /autofs/diamond2/share/users/tuchiya/300image/pogtive/nan.jpg not found
image_path: /autofs/diamond2/share/users/tuchiya/300image/negative/11jgXoKcFgsj6Xh_gikoA_61.jpg not found
image_path: /autofs/diamond2/share/users/tuchiya/300image/negative/cutebabyfb.blogspot.com_ not found
image_path: /autofs/diamond2/share/users/t

In [13]:
json.dump(results, open(output_json_filename, "w"), indent=4, ensure_ascii=False)