In [1]:
import datasets

dataset = datasets.load_dataset("wanghaofan/pokemon-wiki-captions")

In [2]:
dataset['train'].features

{'image': Image(mode=None, decode=True, id=None),
 'name_en': Value(dtype='string', id=None),
 'name_zh': Value(dtype='string', id=None),
 'text_en': Value(dtype='string', id=None),
 'text_zh': Value(dtype='string', id=None)}

In [3]:
import os
from datasets import Dataset
import PIL.Image as Image

def save_image_and_get_path(example, save_dir):
    # Create a unique filename
    filename = f"{example['name_en']}_{hash(example['text_en'])}.png"
    filepath = os.path.join(save_dir, filename)
    
    # Save the image
    example['image'].save(filepath)
    
    # Add the filepath to the example
    example['image_filepath'] = filepath
    
    return example

def process_dataset(dataset, save_dir):
    # Ensure the save directory exists
    os.makedirs(save_dir, exist_ok=True)
    
    # Apply the save_image_and_get_path function to each example
    return dataset.map(lambda example: save_image_and_get_path(example, save_dir))

# Specify the directory where you want to save the images
save_directory = "./saved_images"

# Process the dataset
processed_dataset = process_dataset(dataset, save_directory)

In [4]:
processed_dataset['train'][0]

{'image': <PIL.PngImagePlugin.PngImageFile image mode=RGBA size=475x475>,
 'name_en': 'abomasnow',
 'name_zh': '暴雪王',
 'text_en': 'Grass attributes,Blizzard King standing on two feet, with white fluff all over, lavender eyes, and a few long strips of fur covering its mouth',
 'text_zh': '草属性，双脚站立的暴雪王，全身白色的绒毛，淡紫色的眼睛，几缕长条装的毛皮盖着它的嘴巴',
 'image_filepath': './saved_images/abomasnow_2058735963577945329.png'}

In [5]:
df = processed_dataset['train'].to_pandas()
df = df.drop(columns=['image'])
df

Unnamed: 0,name_en,name_zh,text_en,text_zh,image_filepath
0,abomasnow,暴雪王,"Grass attributes,Blizzard King standing on two...",草属性，双脚站立的暴雪王，全身白色的绒毛，淡紫色的眼睛，几缕长条装的毛皮盖着它的嘴巴,./saved_images/abomasnow_2058735963577945329.png
1,abra,凯西,"Super power attributes, the whole body is yell...",超能力属性，通体黄色，头部外形类似狐狸，尖尖鼻子，手和脚上都有三个指头，长尾巴末端带着一个褐色圆环,./saved_images/abra_5265223410023725368.png
2,absol,阿勃梭鲁,"Evil attribute, with white hair, blue-gray par...",恶属性，有白色毛发，没毛发的部分是蓝灰色，头右边类似弓的角，红色眼睛,./saved_images/absol_-8493509098281981613.png
3,accelgor,敏捷虫,"Insect attributes,upright agile insects, the w...",虫属性，直立型的敏捷虫，全身被粘膜包裹着，脖子下是灰色，头部有一个四角星形状，背后有眼神出去的带子,./saved_images/accelgor_-4254867256317794903.png
4,aegislash-shield,坚盾剑怪,"Steel attribute, huge sword body, hilt, sword ...",钢属性，巨大的剑身，剑柄，剑镡，和金黄色剑脊，深紫色眼睛和手掌，黑色手臂，锯齿状剑锋,./saved_images/aegislash-shield_-9192879603048...
...,...,...,...,...,...
893,zoroark,索罗亚克,"Evil attribute, looks like a gray fox, with a ...",恶属性，外形似灰色狐狸，头上深红色的鬃毛覆盖了后背，爪子、耳朵为红色，耳朵顶端为红色,./saved_images/zoroark_8540914901580778615.png
894,zorua,索罗亚,"Pokémon of the evil attribute, similar in appe...",恶属性，外形类似狐狸崽的宝可梦，身体呈灰黑色，瞳孔呈淡蓝色，脖子上的毛为黑色,./saved_images/zorua_-6649027328925904559.png
895,zubat,超音蝠,It is poisonous and looks like a young bat. It...,毒属性，外形似幼年蝙蝠，通体深蓝色，耳蜗和翼膜是紫色的，没有双眼，四颗白色的牙，尾巴，呈V字形,./saved_images/zubat_6458686427071631846.png
896,zweilous,双首暴龙,"Evil attribute, two heads, blue body, black fu...",恶属性，两个脑袋，身体为蓝色，前身有黑色的毛皮覆盖，背上有一对翅，头发覆盖了双眼,./saved_images/zweilous_-7595618527035249739.png


In [6]:
from sqlalchemy import create_engine

engine = create_engine('postgresql://pokemon_user@localhost:5432/pokemon_db')

In [7]:
df.to_sql('pokemons', engine, if_exists='replace', index=False)

898