In [45]:
import json
import requests
import concurrent.futures

from bs4 import BeautifulSoup
from urllib.request import Request, urlopen
import re

from PIL import Image

import replicate

In [26]:
# Scrape images from wikimedia

# https://commons.wikimedia.org/wiki/Codex_Borgia
# file pages are of the form:
# https://commons.wikimedia.org/wiki/File:Codex_Borgia_page_{i}.jpg
# for pages 1 to 76
# then we need to access the image file itself from the HTML links
# the final link should look like this:
# https://upload.wikimedia.org/wikipedia/commons/thumb/7/73/Codex_Borgia_page_1.jpg/512px-Codex_Borgia_page_1.jpg

headers = {
    'User-Agent': 'etiennefd@gamil.com'
}

for i in range(1, 77):
    img_name = f"image{i:03d}.jpg"
    img_page_url = f"https://commons.wikimedia.org/wiki/File:Codex_Borgia_page_{i}.jpg"
    request = Request(img_page_url)
    img_page = urlopen(request)
    soup = BeautifulSoup(img_page, "lxml")
    
    links = []
    for link in soup.findAll('a'):
        links.append(link.get('href'))
    
    img_url = next(x for x in links if "thumb" in str(x) and "600px-Codex" in str(x))
    img_url = img_url.replace("600", "512") # to get 512 pixels thumbnail

    # download the image
    print("Downloading", img_url)
    img_data = requests.get(img_url, headers=headers, stream=True).content
    with open("data/img/" + img_name, "wb") as f:
        f.write(img_data)


Downloading https://upload.wikimedia.org/wikipedia/commons/thumb/7/73/Codex_Borgia_page_1.jpg/512px-Codex_Borgia_page_1.jpg
Downloading https://upload.wikimedia.org/wikipedia/commons/thumb/4/41/Codex_Borgia_page_2.jpg/512px-Codex_Borgia_page_2.jpg
Downloading https://upload.wikimedia.org/wikipedia/commons/thumb/7/75/Codex_Borgia_page_3.jpg/512px-Codex_Borgia_page_3.jpg
Downloading https://upload.wikimedia.org/wikipedia/commons/thumb/4/40/Codex_Borgia_page_4.jpg/512px-Codex_Borgia_page_4.jpg
Downloading https://upload.wikimedia.org/wikipedia/commons/thumb/b/b6/Codex_Borgia_page_5.jpg/512px-Codex_Borgia_page_5.jpg
Downloading https://upload.wikimedia.org/wikipedia/commons/thumb/7/70/Codex_Borgia_page_6.jpg/512px-Codex_Borgia_page_6.jpg
Downloading https://upload.wikimedia.org/wikipedia/commons/thumb/9/95/Codex_Borgia_page_7.jpg/512px-Codex_Borgia_page_7.jpg
Downloading https://upload.wikimedia.org/wikipedia/commons/thumb/3/3c/Codex_Borgia_page_8.jpg/512px-Codex_Borgia_page_8.jpg
Download

Downloading https://upload.wikimedia.org/wikipedia/commons/thumb/5/59/Codex_Borgia_page_67.jpg/512px-Codex_Borgia_page_67.jpg
Downloading https://upload.wikimedia.org/wikipedia/commons/thumb/a/a5/Codex_Borgia_page_68.jpg/512px-Codex_Borgia_page_68.jpg
Downloading https://upload.wikimedia.org/wikipedia/commons/thumb/b/bc/Codex_Borgia_page_69.jpg/512px-Codex_Borgia_page_69.jpg
Downloading https://upload.wikimedia.org/wikipedia/commons/thumb/d/db/Codex_Borgia_page_70.jpg/512px-Codex_Borgia_page_70.jpg
Downloading https://upload.wikimedia.org/wikipedia/commons/thumb/b/bf/Codex_Borgia_page_71.jpg/512px-Codex_Borgia_page_71.jpg
Downloading https://upload.wikimedia.org/wikipedia/commons/thumb/3/38/Codex_Borgia_page_72.jpg/512px-Codex_Borgia_page_72.jpg
Downloading https://upload.wikimedia.org/wikipedia/commons/thumb/f/fb/Codex_Borgia_page_73.jpg/512px-Codex_Borgia_page_73.jpg
Downloading https://upload.wikimedia.org/wikipedia/commons/thumb/b/bb/Codex_Borgia_page_74.jpg/512px-Codex_Borgia_page

In [32]:
manual_descriptions = {
    1: "page 1 of the mesoamerican codex borgia, showing a list of day signs of the Tonalpohualli, the Central Mexican divinatory calendar; the page is damaged",
    2: "page 2 of the mesoamerican codex borgia, showing a list of day signs of the Tonalpohualli, the Central Mexican divinatory calendar",
    3: "page 3 of the mesoamerican codex borgia, showing a list of day signs of the Tonalpohualli, the Central Mexican divinatory calendar",
    4: "page 4 of the mesoamerican codex borgia, showing a list of day signs of the Tonalpohualli, the Central Mexican divinatory calendar",
    5: "page 5 of the mesoamerican codex borgia, showing a list of day signs of the Tonalpohualli, the Central Mexican divinatory calendar",
    6: "page 6 of the mesoamerican codex borgia, showing a list of day signs of the Tonalpohualli, the Central Mexican divinatory calendar",
    7: "page 7 of the mesoamerican codex borgia, showing a list of day signs of the Tonalpohualli, the Central Mexican divinatory calendar",
    8: "page 8 of the mesoamerican codex borgia, showing a list of day signs of the Tonalpohualli, the Central Mexican divinatory calendar",
    9: "page 9 of the mesoamerican codex borgia, showing the day signs of the Caiman and its patron deity Tonacatecuhtli, the Wind and its patron deity Ehecatl, the House and its patron deity Tepeyollotl, the Lizard and and its patron deity Huehuecoyotl, and associated mantic symbols, presumably as prognostications for individuals born in each of those day signs",
    10: "page 10 of the mesoamerican codex borgia, showing the day signs of Death and its patron deity Metztli, the Snake and its patron deity Chalchiuhtlicue, the Rabbit and its patron deity Chalchiuhtotolin, the Water and and its patron deity Tlaloc, and associated mantic symbols, presumably as prognostications for individuals born in each of those day signs",
    11: "page 11 of the mesoamerican codex borgia, showing the day signs of the Deer and its patron deity Xiuhtecuhtli, the Dog and its patron deity Mictlantecuhtli, the Monkey and its patron deity Xochipilli, the Grass and and its patron deity Patecatl, and associated mantic symbols, presumably as prognostications for individuals born in each of those day signs",
    12: "page 12 of the mesoamerican codex borgia, showing the day signs of the Reed and its patron deity Itztlacoliuhqui, the Jaguar and its patron deity Tlazolteotl, the Eagle and its patron deity Xipe Totec, the Vulture and and its patron deity Itzpapalotl, and associated mantic symbols, presumably as prognostications for individuals born in each of those day signs",
    13: "page 13 of the mesoamerican codex borgia, showing the day signs of the Movement and its patron deity Xolotl, the Flint and its patron deity Chalchiuhtotolin, Rain and its patron deity Tonatiuh, the Flower and and its patron deity Xochiquetzal, and associated mantic symbols, presumably as prognostications for individuals born in each of those day signs",
    14: "page 14 of the mesoamerican codex borgia, showing the nine Lords of the Night, pre-Hispanic deities which ruled nighttime: Xiuhtecuhtli, Tezcatlipoca, Pilzintecuhtli, Chalchiuhtlicue, Mictlantecuhtli, Centeotl, Tlazolteotl, Tepeyolotl, and Tlaloc",
    15: "page 15 of the mesoamerican codex borgia, showing deities associated with childbirth, and day signs",
    16: "page 16 of the mesoamerican codex borgia, showing deities associated with childbirth, and day signs",
    17: "page 17 of the mesoamerican codex borgia, showing deities associated with childbirth, and day signs, as well as a large depiction of Tezcatlipoca",
    18: "page 18 of the mesoamerican codex borgia, showing religious activities performed by the gods Tonatiuh and Ehecatl",
    19: "page 19 of the mesoamerican codex borgia, showing activities performed by gods: woodcutting by Tlahuizcalpantecuhtli",
    20: "page 20 of the mesoamerican codex borgia, showing activities performed by gods: agriculture by Tlaloc, and crossing a river by Chalchiuhtlicue",
    21: "page 21 of the mesoamerican codex borgia, showing activities performed by gods: travelling by red Tezcatlipoca, and the ball-game by black Tezcatlipoca",
    22: "page 22 of the mesoamerican codex borgia, showing two deer, one white, with closed eyes and surrounded by precious regalia, and other being pierced by a dart or arrow; as well as the ritual qualities of two of the twenty day-signs",
    23: "page 23 of the mesoamerican codex borgia, showing the ritual qualities of nine of the twenty day-signs",
    24: "page 24 of the mesoamerican codex borgia, showing the ritual qualities of nine of the twenty day-signs",
    25: "page 25 of the mesoamerican codex borgia, showing a directional almanac depicting the four deities Tlaloc, Xipe Totec, an unidentified Mixtec god, and Mixcoatl",
    26: "page 26 of the mesoamerican codex borgia, showing a directional almanac related to death, associated with four deities",
    27: "page 27 of the mesoamerican codex borgia, showing five depictions of the Postclassical period central Mexican rain god Tlaloc, as well as the qualities of the rains that he will bring, some destructive, some beneficial",
    28: "page 28 of the mesoamerican codex borgia, showing five depictions of the Postclassical period central Mexican rain god Tlaloc, as well as the qualities of the rains that he will bring, some destructive, some beneficial",
    29: "page 29 of the mesoamerican codex borgia, showing the cult of the wind gods",
    30: "page 30 of the mesoamerican codex borgia, showing the cult of the rain gods",
    31: "page 31 of the mesoamerican codex borgia, showing the cult of the maguey and the cult of the corn",
    32: "page 32 of the mesoamerican codex borgia, showing the cult of Tezcatlipoca",
    33: "page 33 of the mesoamerican codex borgia, showing the black temple and the opening of a ritual bundle",
    34: "page 34 of the mesoamerican codex borgia, showing a red temple",
    35: "page 35 of the mesoamerican codex borgia, showing some rituals",
    36: "page 36 of the mesoamerican codex borgia, showing some rituals",
    37: "page 37 of the mesoamerican codex borgia, showing some rituals",
    38: "page 38 of the mesoamerican codex borgia, showing some rituals",
    39: "page 39 of the mesoamerican codex borgia, showing a sacrifice to the sun",
    40: "page 40 of the mesoamerican codex borgia, showing a sacrifice to the sun",
    41: "page 41 of the mesoamerican codex borgia, showing a sacrifice to the Cihuapipiltin",
    42: "page 42 of the mesoamerican codex borgia, showing a sacrifice to the Cihuapipiltin",
    43: "page 43 of the mesoamerican codex borgia, showing a corn festival",
    44: "page 44 of the mesoamerican codex borgia, showing the enthronement of a prince",
    45: "page 45 of the mesoamerican codex borgia, showing the cult of the morning star Tlahuizcalpantecuhtli",
    46: "page 46 of the mesoamerican codex borgia, showing fire-drilling",
    47: "page 47 of the mesoamerican codex borgia, showing the Cihuateteo, the divinized spirits of the women that died in child-birth, and the Macuiltonaleque, minor spirits of excess, pleasure and violence",
    48: "page 48 of the mesoamerican codex borgia, showing the Cihuateteo, the divinized spirits of the women that died in child-birth, and the Macuiltonaleque, minor spirits of excess, pleasure and violence",
    49: "page 49 of the mesoamerican codex borgia, showing the four quarters of the universe and the centre, and their corresponding day signs, sacred trees, and mantic images",
    50: "page 50 of the mesoamerican codex borgia, showing the four quarters of the universe and the centre, and their corresponding day signs, sacred trees, and mantic images",
    51: "page 51 of the mesoamerican codex borgia, showing the four quarters of the universe and the centre, and their corresponding day signs, sacred trees, and mantic images",
    52: "page 52 of the mesoamerican codex borgia, showing the four quarters of the universe and the centre, and their corresponding day signs, sacred trees, and mantic images",
    53: "page 53 of the mesoamerican codex borgia, showing the four quarters of the universe and the centre, and their corresponding day signs, sacred trees, and mantic images; as well as the god Tlahuizcalpantecuhtli, representing the morning star Venus, piercing a woman",
    54: "page 54 of the mesoamerican codex borgia, showing Tlahuizcalpantecuhtli, representing the morning star Venus, piercing various characters",
    55: "page 55 of the mesoamerican codex borgia, showing day-signs associated to different deities represented as travellers or merchants, and their associated prognostications",
    56: "page 56 of the mesoamerican codex borgia, showing Mictlantecuhtli and Quetzalcoatl back to back",
    57: "page 57 of the mesoamerican codex borgia, showing prognostications for marriages",
    58: "page 58 of the mesoamerican codex borgia, showing prognostications for marriages",
    59: "page 59 of the mesoamerican codex borgia, showing prognostications for marriages",
    60: "page 60 of the mesoamerican codex borgia, showing prognostications for marriages",
    61: "page 61 of the mesoamerican codex borgia, showing two trecenas or 13-day weeks with their associated day signs and patron deities: Caiman, Tonacatecuhtli and Jaguar, Ehecatl",
    62: "page 62 of the mesoamerican codex borgia, showing two trecenas or 13-day weeks with their associated day signs and patron deities: Deer, Tepeyollotl and Flower, Huehuecoyotl",
    63: "page 63 of the mesoamerican codex borgia, showing two trecenas or 13-day weeks with their associated day signs and patron deities: Reed, Chalchiuhtlicue and Death, Tonatiuh",
    64: "page 64 of the mesoamerican codex borgia, showing two trecenas or 13-day weeks with their associated day signs and patron deities: Rain, Tlaloc and Grass, Mayahuel",
    65: "page 65 of the mesoamerican codex borgia, showing two trecenas or 13-day weeks with their associated day signs and patron deities: Snake, Xiuhtecuhtli and Flint, Mictlantecuhtli",
    66: "page 66 of the mesoamerican codex borgia, showing two trecenas or 13-day weeks with their associated day signs and patron deities: Monkey, Patecatl and Lizard, Ixtlacoliuhqui",
    67: "page 67 of the mesoamerican codex borgia, showing two trecenas or 13-day weeks with their associated day signs and patron deities: Movement, Tlazolteotl and Dog, Xipe Totec",
    68: "page 68 of the mesoamerican codex borgia, showing two trecenas or 13-day weeks with their associated day signs and patron deities: House, Itzpapalotl and Vulture, Xolotl",
    69: "page 69 of the mesoamerican codex borgia, showing two trecenas or 13-day weeks with their associated day signs and patron deities: Water, Chalchiuhtotolin and Wind, Chantico",
    70: "page 70 of the mesoamerican codex borgia, showing two trecenas or 13-day weeks with their associated day signs and patron deities: Eagle, Xochiquetzal and Rabbit, Xiuhtecuhtli",
    71: "page 71 of the mesoamerican codex borgia, showing the sun god, Tonatiuh, receiving offerings, and stating the sacred flying animals associated to each day",
    72: "page 72 of the mesoamerican codex borgia, showing 20 day-signs into quarters associated with deities and snakes forming a xicalcoliuhqui or meandering pattern",
    73: "page 73 of the mesoamerican codex borgia, showing Mictlantecuhtli and Quetzalcoatl back to back",
    74: "page 74 of the mesoamerican codex borgia, showing a Cihuapilli and a Macuiltonaleque, each associated with day-signs",
    75: "page 75 of the mesoamerican codex borgia, showing the ruling deities of half-trecena periods, enthroned, receiving cult and with associated mantic images; the page is damaged",
    76: "page 76 of the mesoamerican codex borgia, showing the ruling deities of half-trecena periods, enthroned, receiving cult and with associated mantic images; the page is damaged",
}

In [47]:
# create 4 kinds of captions

blip = replicate.models.get("salesforce/blip")

for i in range(1, 77):
    txt_file_name = f"image{i:03d}.txt"
    
    auto1_name = f"page {i} of the codex borgia"
    auto2_name = f"page {i} of the codex borgia, a pre-columbian mesoamerican pictorial manuscript from 16th century central mexico"
    #blip_name = model.predict(image=open(f"data/img/image{i:03d}.jpg", "rb")).replace("Caption: ", "")
    manual_name = manual_descriptions[i]
    
#     with open("data/txt/auto1/" + txt_file_name, "w") as f:
#         f.write(auto1_name)
#     with open("data/txt/auto2/" + txt_file_name, "w") as f:
#         f.write(auto2_name)
#     with open("data/txt/blip/" + txt_file_name, "w") as f:
#         f.write(blip_name)
#     with open("data/txt/manual/" + txt_file_name, "w") as f:
#         f.write(manual_name)

    