In [1]:
!pip uninstall fitz PyMuPDF --yes
!pip install PyMuPDF
#!pip install PyMuPDF==1.19.1  ## install a release later than 1.19.0
#!pip install PyMuPDF==1.18.19  ## don't install 1.19.0 because of Wrong x0/y0 values for bboxes in get_text("words"), see: https://github.com/pymupdf/PyMuPDF/issues/1328
# ## Releases of PyMuPDF as of Oct 18, 2021: 1.11.2, 1.12.5, 1.13.20, 1.14.19.post2, 1.14.20, 1.14.21, 1.16.0, 1.16.1, 1.16.2, 1.16.3, 1.16.4, 1.16.5, 1.16.6, 1.16.7, 1.16.8, 1.16.9, 1.16.10, 1.16.11, 1.16.12, 1.16.13, 1.16.14, 1.16.15, 1.16.16, 1.16.17, 1.16.18, 1.17.0, 1.17.1, 1.17.2, 1.17.3, 1.17.4, 1.17.5, 1.17.6, 1.17.7, 1.18.0, 1.18.1, 1.18.2, 1.18.3, 1.18.4, 1.18.5, 1.18.6, 1.18.7, 1.18.8, 1.18.9, 1.18.10, 1.18.11, 1.18.12, 1.18.13, 1.18.14, 1.18.15, 1.18.16, 1.18.17, 1.18.18, 1.18.19, 1.19.0


[0mFound existing installation: PyMuPDF 1.19.6
Uninstalling PyMuPDF-1.19.6:
  Successfully uninstalled PyMuPDF-1.19.6
Collecting PyMuPDF
  Using cached PyMuPDF-1.19.6-cp38-cp38-macosx_10_9_x86_64.whl (7.6 MB)
Installing collected packages: PyMuPDF
Successfully installed PyMuPDF-1.19.6
You should consider upgrading via the '/usr/local/anaconda3/bin/python -m pip install --upgrade pip' command.[0m[33m
[0m

In [10]:
!open -a Preview book-1.pdf book-2.pdf book-3.pdf

In [19]:
config = [
   {"filename" : "NOUVELLE FLORE DU LIBAN ET DE LA SYRIE 1.pdf",
    "name" : "book-1",
    "page_count" : 642,
    "sections" : {
      "preface" : [1, 78],
      "toponymique" : [49, 76],
      "abbrev" : [77, 78],
      "content" : [79, 607],
      "sample" : [79, 79+10],
      "index" : [617, 639]},
    },
   {"filename" : "NOUVELLE FLORE DU LIBAN ET DE LA SYRIE 2.pdf",
    "name" : "book-2",
    "page_count" : 725,
    "sections" : {
      "preface" : [1, 79],
      "abbrev" : [6, 7],
      "content" : [8, 700],
      "sample" : [8, 8+10],
      "index" : [704, 725]},
    },
   {"filename" : "NOUVELLE FLORE DU LIBAN ET DE LA SYRIE 3.pdf",
    "name" : "book-3",
    "page_count" : 588,
    "sections" : {
      "preface" : [1, 7],
      "abbrev" : [6, 7],
      "content" : [8, 554],
      "sample" : [8, 8+10],
      "index" : [556, 583],
      "familyidx" : [584, 585]},
    },
]


In [None]:
import sys, fitz, re
from pprint import pprint

## Purpose: this cell is a pre-processing step that will create a symlink (e.g. book-1.pdf)
##          that points to the original file, and then will create new PDF documents
##          that are excerpts of subsets of the whole book
##          as named sections such as book-1-index.pdf or book-1-abbrev.pdf
##          The file book-X-sample.pdf contains the first 10 pages of the book-1-content.pdf
##          and the sample is used to speed up development/testing later on

## https://pymupdf.readthedocs.io/en/latest/document.html#Document.delete_pages
##    In general, the performance of this method is dependent on the number of remaining pages
##    NOT on the number of deleted pages
## So, keeping fewer pages is faster, and deleting more pages at once is also faster 
def excerpt(infile, outfile, start, end):
  doc = fitz.open(infile)  # open document
  last_page = doc.page_count
  print(start, end, last_page, outfile)
  deleteme = []
  if start - 1 > 0:
    deleteme.extend(list(range(0, start - 1)))       
  if end < last_page:
    deleteme.extend(list(range(end, last_page)))
  # print(deleteme)
  doc.delete_pages(deleteme)
  doc.save(outfile, garbage=4, clean=True)

import os, os.path
for book in config:
   symlink = book["name"] + ".pdf"
   fname = book["filename"]
   if os.path.exists(book["filename"]) and not os.path.exists(symlink):
      print("Creating symlink: ", symlink, " ->", fname)
      os.symlink(book["filename"], symlink)
   for section in book["sections"]:
      #print(s)
      start, end = book["sections"][section];
      out_fname = book["name"] + "-" + section + ".pdf"
      excerpt(book["filename"], out_fname, start, end)

!open -a Preview book-[1,2,3]-*.pdf

## Runtime of this cell is ~11m38s

In [15]:
# !open -a Preview book-[1,2,3]-*.pdf


In [16]:
!open -a Preview book-[1,2,3]-index.pdf


In [44]:
# https://pymupdf.readthedocs.io/en/latest/faq/#how-to-analyze-font-characteristics

import fitz
import pprint, re

def flags_decomposer(flags):
    """Make font flags human readable."""
    l = []
    if flags & 2 ** 0:
        l.append("superscript")
    if flags & 2 ** 1:
        l.append("italic")
    if flags & 2 ** 2:
        l.append("serifed")
    else:
        l.append("sans")
    if flags & 2 ** 3:
        l.append("monospaced")
    else:
        l.append("proportional")
    if flags & 2 ** 4:
        l.append("bold")
    return ", ".join(l)

class Face:
    def __init__(self, font, size, color, flags=0):
        self.font = font
        self.size = size
        self.color = color
        self.flags = flags
        self.style = flags_decomposer(self.flags)
    def __str__(self):
        return f"Font: '{self.font}' ({self.style}), size {self.size:g}, color " + ("#%06x" % self.color)
print(Face("Times", 10.929, 0, 4))


Font: 'Times' (serifed, proportional), size 10.929, color #000000


In [45]:
import sys, fitz, re
from pprint import pprint
# fname = "book-1-sample.pdf"
fname = "book-1.pdf"
doc = fitz.open(fname)  # open document

from fitz.utils import getColor
#blue = getColor("aliceblue")

# txtpgoffset = 77
txtpgoffset = 0
# start_page = 1 + txtpgoffset
start_page = 1
#end_page = start_page + 11
#end_page = start_page + 150
#end_page = start_page + 530
end_page = doc.page_count
items = []
mode = "searching"
print(mode)
for i in range(start_page, end_page):
    page = doc[i]
    # read page text as a dictionary, suppressing extra spaces in CJK fonts
    flags = 0 | fitz.TEXT_INHIBIT_SPACES | fitz.TEXT_DEHYPHENATE
    blocks = page.get_text("dict", flags=flags)["blocks"]
    for x,b in enumerate(blocks):  # iterate through the text blocks
        br = fitz.Rect(b["bbox"])
        #annot = page.add_rect_annot(br)
        for y,l in enumerate(b["lines"]):  # iterate through the text lines
            lr = fitz.Rect(l["bbox"])
            for z,s in enumerate(l["spans"]):  # iterate through the text spans
                if z==0 and mode == "found":
                    mode = "spanning"
                #s["style"] = flags_decomposer(s["flags"])  # readable font flags
                face = Face(s["font"], s["size"], s["color"], s["flags"])
                t = s["text"]
                ## FIXME: [83, 'N  Cystopteris  filix-fragilis  (L.) Borb. —  Polypodiumfilix-fragile  L.,  Cyst,  fragilis']
                ## FIXMECLASS: OCR Error; a spurious mark in the text preceding the genus is interpreted as a letter 'N'
                if z==0 and mode != "found" and re.search(r"Times-Bold\b", str(face)) and s["size"] > 10.0:
                        r_rect = fitz.Rect(lr)
                        r_high = fitz.Rect(s["bbox"])
                        joined = " ".join(list(map(lambda x: x["text"].strip(), l["spans"])))  ## FIXMEDESC: ideally this simplistic code would work if not for OCR Errors
                        item = joined
                        # found = re.match(r"(?P<genus>([A-Z]\w+) (?P<species>\w+)", joined)
                        # d = found.groupdict()
                        matches = re.match(r"([A-Z]\w+) (\w+)", joined)
                        warning = "Possible OCR Error"
                        if matches and len(matches.groups()) == 2:
                            found = matches.groups()
                            warning = "Normal"
                            mode = "found"
                            # item = f"""${d['genus']} ${d['species']}"""
                            item = " ".join(found)
                        else:
                            warning = "Possible OCR Error"

                        print([i, x, y, z, item, warning])
                        annot_high = page.add_highlight_annot(r_high)
                        items.append([t, r_rect, r_high, i, z,s, y,l, x,b, warning])
print(len(items))
#print(i, items[0][:3])


searching
[34, 0, 0, 0, 'r', 'Possible OCR Error']
[59, 9, 0, 0, 'J', 'Possible OCR Error']
[77, 8, 5, 0, 'JL', 'Possible OCR Error']
[78, 8, 0, 0, 'Lycopodium cernuum', 'Normal']
[79, 1, 0, 0, 'Selaginella denticulata', 'Normal']
[79, 16, 0, 0, 'Isoetes hystrix', 'Normal']
[79, 24, 1, 0, 'Isoetes olympica', 'Normal']
[80, 13, 0, 0, 'Equisetum maximum', 'Normal']
[80, 18, 0, 0, 'Equisetum palustre', 'Normal']
[80, 25, 0, 0, 'Equisetum ramosissimum', 'Normal']
[81, 9, 0, 0, 'Ophioglossum vulgatum', 'Normal']
[81, 11, 3, 0, 'Ophioglossum lusitanicum', 'Normal']
[81, 21, 0, 0, 'Osmunda regalis', 'Normal']
[82, 9, 0, 0, 'Gymnogramma leptophylla', 'Normal']
[82, 17, 0, 0, 'Cheilanthes pteridioides', 'Normal']
[83, 28, 0, 0, 'Âdiantum capillus-veneris I (PL III, n. 4). — 2|. Rhizome rampant à écailles', 'Possible OCR Error']
[84, 10, 0, 0, 'Pteris longifolia', 'Normal']
[84, 16, 0, 0, 'Pteridium aquilmum', 'Normal']
[85, 1, 0, 0, 'Athyrium filix', 'Normal']
[85, 12, 0, 0, 'Dryopterîs aculeat

In [46]:
## This cell requires ~20 mins to calculate results from the first book

import math
doc = fitz.open(fname)  # open document
#subset = items[:6]
subset = items
boxes = []
for it, item in enumerate(subset):
    mode = "searching"
    if (it >= len(subset) - 1):
        break
    next_item = subset[it + 1]
    [t0, rr0, rh0, i0, z0,s0, y0,l0, x0,b0, warning0] = item
    [t, rr, rh, i, z,s, y,l, x,b, warning] = [t0, rr0, rh0, i0, z0,s0, y0,l0, x0,b0, warning0]
    [t1, rr1, rh1, i1, z1,s1, y1,l1, x1,b1, warning1] = next_item
    page = doc[i0]
    flags = 0 | fitz.TEXT_INHIBIT_SPACES | fitz.TEXT_DEHYPHENATE
    blocks = page.get_text("dict", flags=flags)["blocks"]
    blocks = blocks[x:]
    r_rect = rr0
    
    #print(t0)
    # if t0 == "Isoetes olympica":
    #print([int(it), len(subset), rr0, t0, "\n", t1])
    
    annot_rect = page.add_rect_annot(rr0)
    annot_rect.set_border(width=1, dashes=[1,2])
    annot_rect.update()

    
    for x,b in enumerate(blocks):  # iterate through the text blocks
        br = fitz.Rect(b["bbox"])
        #annot = page.add_rect_annot(br)
        for y,l in enumerate(b["lines"][y0:]):  # iterate through the text lines
            lr = fitz.Rect(l["bbox"])
            s = l["spans"][0]
            #for z,s in enumerate(l["spans"]):  # iterate through the text spans
            this_line = " ".join(list(map(lambda x: x["text"], l["spans"])))
            if mode != "found":
                if re.search("Aire g", this_line):
                    mode = s["font"] + " " + str(s["size"])
                    r_rect = r_rect.include_rect(lr)
                    #print(str((x, y, s["size"], s["font"])) + "FOUND: " + this_line)
                elif lr.intersects(rr1):                    
                    mode = "found"
                    #print(str((x, y, s["size"], s["font"])) + "FOUND: " + this_line)
                else:
                    if mode != "searching":
                        (font, size) = mode.split(" ")
                        isize = round(math.ceil(float(size)))
                        if s["size"] >= float(isize):
                            mode = "found"
                    if mode != "found":
                        r_rect = r_rect.include_rect(lr)
                        # if y >= y0:
                        #     r_rect = r_rect.include_rect(lr)
                    
    r_high = rh0
    annot_rect = page.add_rect_annot(r_rect)
    annot_high = page.add_highlight_annot(r_high)
    # boxes.append([t0, start_page + it, r_rect, item]) . ## FIXME: thinking it should be i0 not start_page + it
    boxes.append([t0, i0, r_rect, item])


# print(boxes[0])
print(len(boxes))

#doc.delete_pages(end_page, doc.page_count - 1)
#doc.delete_pages(0, start_page - 1)
marked_epithet_fname = "marked-pages-" + doc.name
doc.save(marked_epithet_fname, garbage=4, clean=True)
#!open -a Preview marked-pages-book-1-sample.pdf
!open -a Preview marked-pages-book-1.pdf



1064


In [50]:
import pprint
pp = pprint.PrettyPrinter(compact=True)

import math
doc = fitz.open(fname)  # open document
results = {}
for it, thing in enumerate(boxes):
    mode = "searching"
    #print(thing)
    [name, pageno, rrr, item] = thing
    page = doc[pageno]
    [t0, rr0, rh0, i0, z0,s0, y0,l0, x0,b0, warning0] = item
    flags = 0 | fitz.TEXT_INHIBIT_SPACES | fitz.TEXT_DEHYPHENATE
    blocks = page.get_text("dict", clip=rr0, flags=flags)["blocks"]    


    print(pageno - txtpgoffset, i0, t0)
    results[t0] = {}
    results[t0]["name"] = t0
    results[t0]["name_rects"] = [rr0]
    results[t0]["pdf_page"] = pageno
    results[t0]["book_page"] = pageno - txtpgoffset
    key = "Extra"
    mode = "searching"
    margins = [[(math.ceil(l["bbox"][0]), l["spans"][0]["text"].split(" ")[0]) for l in filter(lambda bl: bl.get("spans"), b["lines"])] for b in blocks]
    margins = [m for n in margins for m in n]
    targets = list(map(lambda n: n[0], filter(lambda m: m[1] in ["L.", "S.", "Aire"] , margins))) or [blocks[0]["lines"][0]["bbox"][0]]
    margins_max = max(targets or margins)
    margins_min = min(list(map(lambda n: n[0], margins)))

    for x,b in enumerate(blocks):  # iterate through the text blocks
        br = fitz.Rect(b["bbox"])
        for y,l in enumerate(b["lines"][y0:]):  # iterate through the text lines
            lr = fitz.Rect(l["bbox"])
            xloc = min(margins_max, math.ceil(lr.x0))
            this_line = re.sub(r"\s+", " ", " ".join(list(map(lambda x: x["text"].strip(), l["spans"])))).strip()
            # matches = re.match("^(?P<label>L\.|S\.|Aire g\w+\.|Not\w+\.|Fleurs|Fructif\.|Fructific|Floraison\w+)\s", this_line)
            #### FIXME: Phyllitis scolopendrium (L.) Newm. —Asplenium scolopendrium L., Scolopendrium
            #### FIXME: Juniper
            matches = re.match("^(?P<label>" + t0 + "|.*?)(\s|$)", this_line)
            if matches:
                d = matches.groupdict()
                if margins_max - xloc < 5:
                    key = d["label"]
                    if ' ' in key:
                        key = "Description"
                    elif key not in ["Description", "L.", "S.", "Aire"]:
                        key = "Extra"
                    elif key in ["Description", "L.", "S.", "Aire"]:
                        pass
                    else:
                        print("UNHANDLED: " + key)
                        key = "Extra"
                frag = results[t0].get(key) or ""
                results[t0][key] = " ".join([frag.strip(), this_line.strip()]).strip()
                key_rects = key + "_rects"
                if not results[t0].get(key_rects):
                    results[t0][key_rects] = []    
                results[t0][key_rects].append(lr)
                #print([x, y, xloc, mode, this_line])
            else:
                print(["ERROR:", x, y, xloc, mode, this_line])
    # pp.pprint(results[t0])
    print()
    #break
                    
    # r_high = rh0
    # annot_rect = page.add_rect_annot(r_rect)
    # annot_high = page.add_highlight_annot(r_high)
    # boxes.append([t0, r_rect, item])




34 34 r

59 59 J

77 77 JL

78 78 Lycopodium cernuum

79 79 Selaginella denticulata

79 79 Isoetes hystrix

79 79 Isoetes olympica

80 80 Equisetum maximum

80 80 Equisetum palustre

80 80 Equisetum ramosissimum

81 81 Ophioglossum vulgatum

81 81 Ophioglossum lusitanicum

81 81 Osmunda regalis

82 82 Gymnogramma leptophylla

82 82 Cheilanthes

83 83 Âdiantum capillus-veneris

84 84 Pteris longifolia

84 84 Pteridium aquilmum

85 85 Athyrium

85 85 Dryopterîs aculeata

85 85 Dryopteris

86 86 Blechnum

87 87 Ceterack officinarum

88 88 Asplenium bourgaei

88 88 Asplenium

88 88 Asplenium ruta-muraria

88 88 Asplenium

88 88 Asplenium

89 89 Phyllitis scolopendrium

89 89 Phyllitis hemionitis

90 90 Polypodium vulgare L.

90 90 Salvinia natans

92 92 Abies cilicica

92 92 Cedrus libani

93 93 Pinus pinea

93 93 Pina

93 93 Pinus halepensis

94 94 Cupressus sempervirens

95 95 Juniperus oxycedrus

96 96 Juniper

96 96 Juniperus

96 96 Arceuthos drupacea

97 97 Ephedra campylopoda

97 97 

In [51]:
from tqdm import tqdm
import math
# marked_fname = "marked-pages-book-1-sample.pdf"
marked_fname = "marked-pages-book-1.pdf"
#doc = fitz.open(fname)  # open document
doc = fitz.open(marked_fname)  # open document
print(len(results))

from fitz.utils import getColorList
cl = getColorList()
from fitz.utils import getColor

pink = getColor("lightpink")
green = getColor("aquamarine")
blue = getColor("lightskyblue")
gray = getColor("whitesmoke")
yellowish = getColor("antiquewhite")
fills= [pink, green, blue, gray, yellowish]

fc = {"L.":pink, "S.":blue, "Aire":green, "Description":gray, "Extra":yellowish}
for it, name in enumerate(tqdm(results.keys())):
    item = results[name]
    #print(item)
    #print(item["Description"])
    book_page = item["book_page"]
    pdf_page = item["pdf_page"]
    print([it, name, book_page, pdf_page])
    page = doc[book_page - 1]
    for k in filter(lambda x: not re.search(r"_page|_rect", x), item.keys()):
        rects = item[k + "_rects"]
        for r in rects:
            #print(k, r)
            
            if not page.rect.intersects(r):
                print(["ERROR: ", k, r])
            else:
                # #annot = page.add_highlight_annot(fitz.Rect(r))
                # annot_rect = page.add_rect_annot(fitz.Rect(r))
                # #annot_rect.set_colors(stroke=(0,1,0))
                # annot_rect.set_border(width=1, dashes=[1,2])
                # annot_rect.update()

                if k in ["L.", "S.", "Aire", "Description", "Extra"]:
                    print(["HIGHL: ", fc[k], k, r])
                    annot = page.add_highlight_annot(fitz.Rect(r))
                    annot.set_colors(stroke=fc[k])
                    annot.update()
                    #annot_rect = page.add_rect_annot(fitz.Rect(r))
                    #annot_rect.set_colors(stroke=green)
                    #annot_rect.set_border() #(width=1, dashes=[1,2])
                    #annot_rect.update()
                    #annot_rect.update(fill_color=red)


# doc.delete_pages(end_page, doc.page_count - 1)
# doc.delete_pages(0, start_page - 1)
doc_fname = "results-" + doc.name
doc.save(doc_fname, garbage=4, clean=True)
print(doc_fname)
# !open -a Preview results-marked-pages-book-1-sample.pdf
!open -a Preview results-marked-pages-book-1.pdf



  2%|▏         | 14/862 [00:00<00:06, 138.46it/s]

862
[0, 'r', 34, 34]
['HIGHL: ', (0.9803921568627451, 0.9215686274509803, 0.8431372549019608), 'Extra', Rect(19.200000762939453, -15.452678680419922, 30.185924530029297, 19.162628173828125)]
['HIGHL: ', (0.9803921568627451, 0.9215686274509803, 0.8431372549019608), 'Extra', Rect(259.9200134277344, 48.01697540283203, 346.7517395019531, 62.690975189208984)]
['HIGHL: ', (0.9803921568627451, 0.9215686274509803, 0.8431372549019608), 'Extra', Rect(515.729736328125, 48.01697540283203, 555.8638305664062, 62.690975189208984)]
['HIGHL: ', (0.9803921568627451, 0.9215686274509803, 0.8431372549019608), 'Extra', Rect(76.08000183105469, 76.53803253173828, 556.5704956054688, 95.21402740478516)]
['HIGHL: ', (0.9803921568627451, 0.9215686274509803, 0.8431372549019608), 'Extra', Rect(48.720001220703125, 91.89801788330078, 256.58343505859375, 110.57401275634766)]
['HIGHL: ', (0.9803921568627451, 0.9215686274509803, 0.8431372549019608), 'Extra', Rect(76.08000183105469, 111.81800079345703, 556.623046875, 130

  6%|▌         | 51/862 [00:00<00:04, 173.60it/s]


['HIGHL: ', (0.9803921568627451, 0.9215686274509803, 0.8431372549019608), 'Extra', Rect(38.15999984741211, 545.8829956054688, 228.19778442382812, 557.8889770507812)]
['HIGHL: ', (0.9803921568627451, 0.9215686274509803, 0.8431372549019608), 'Extra', Rect(38.15999984741211, 579.0029907226562, 282.1980285644531, 591.0089721679688)]
['HIGHL: ', (0.5294117647058824, 0.807843137254902, 0.9803921568627451), 'S.', Rect(38.15999984741211, 557.6430053710938, 405.2191467285156, 569.6489868164062)]
['HIGHL: ', (0.5294117647058824, 0.807843137254902, 0.9803921568627451), 'S.', Rect(20.8799991607666, 566.7229614257812, 99.93690490722656, 579.489013671875)]
['HIGHL: ', (0.4980392156862745, 1.0, 0.8313725490196079), 'Aire', Rect(37.68000030517578, 590.7630004882812, 313.6315002441406, 602.7689819335938)]
[30, 'Abies cilicica', 92, 92]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(42.0, 182.57699584960938, 405.0628967285156, 197.9110107421875)]
['HIGHL: 

 11%|█         | 95/862 [00:00<00:04, 188.93it/s]


['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(46.08000183105469, 500.697021484375, 430.4622802734375, 515.3710327148438)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(46.08000183105469, 512.4569702148438, 430.4931640625, 527.1309814453125)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(46.31999969482422, 524.2169799804688, 430.1470947265625, 538.8909912109375)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(46.31999969482422, 535.9769897460938, 430.6980285644531, 562.4110107421875)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(46.08000183105469, 559.4970092773438, 430.3951721191406, 574.1710205078125)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(45.84000015258789, 571.2570190429688, 430.299621582031

 16%|█▌        | 136/862 [00:00<00:04, 171.22it/s]

['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(30.959999084472656, 576.136962890625, 414.7733154296875, 590.8109741210938)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(31.200000762939453, 587.89697265625, 414.86187744140625, 602.5709838867188)]
[107, 'Stipa parviflora', 130, 130]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(34.79999923706055, 198.45700073242188, 397.3428649902344, 213.791015625)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(15.119999885559082, 210.21697998046875, 397.2687683105469, 224.8909912109375)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(14.880000114440918, 221.97698974609375, 397.2644958496094, 236.6510009765625)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(15.11999988555

 20%|██        | 175/862 [00:01<00:03, 172.05it/s]


['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(32.15999984741211, 568.0570068359375, 415.7304382324219, 582.7310180664062)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(32.15999984741211, 579.5769653320312, 415.7554016113281, 594.2509765625)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(32.15999984741211, 591.0969848632812, 171.892578125, 605.77099609375)]
[142, 'Eragrostis tatarica', 148, 148]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(31.68000030517578, 286.7769775390625, 394.45892333984375, 302.1109924316406)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(12.0, 298.5369873046875, 394.3845520019531, 324.97100830078125)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(12.0, 322.0570068359375, 394.38

 22%|██▏       | 193/862 [00:01<00:04, 148.74it/s]

['HIGHL: ', (0.9803921568627451, 0.9215686274509803, 0.8431372549019608), 'Extra', Rect(20.399999618530273, 462.177001953125, 403.9853820800781, 476.85101318359375)]
['HIGHL: ', (0.9803921568627451, 0.9215686274509803, 0.8431372549019608), 'Extra', Rect(20.639999389648438, 473.6969909667969, 107.33330535888672, 488.3710021972656)]
['HIGHL: ', (0.9803921568627451, 0.9215686274509803, 0.8431372549019608), 'Extra', Rect(37.91999816894531, 491.30999755859375, 268.302978515625, 504.6499938964844)]
['HIGHL: ', (1.0, 0.7137254901960784, 0.7568627450980392), 'L.', Rect(37.91999816894531, 505.2300109863281, 403.9042053222656, 518.5700073242188)]
['HIGHL: ', (1.0, 0.7137254901960784, 0.7568627450980392), 'L.', Rect(20.399999618530273, 514.8299560546875, 273.1014404296875, 528.1699829101562)]
['HIGHL: ', (0.5294117647058824, 0.807843137254902, 0.9803921568627451), 'S.', Rect(38.15999984741211, 528.75, 403.72796630859375, 542.0900268554688)]
['HIGHL: ', (0.5294117647058824, 0.807843137254902, 0.98

 25%|██▍       | 212/862 [00:01<00:04, 157.69it/s]


['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(20.399999618530273, 153.4169921875, 405.8908386230469, 168.09100341796875)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(20.639999389648438, 165.177001953125, 405.65478515625, 179.85101318359375)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(20.639999389648438, 176.93698120117188, 405.7402038574219, 191.61099243164062)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(20.15999984741211, 188.69699096679688, 405.9154052734375, 238.6510009765625)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(20.399999618530273, 235.73699951171875, 405.9129638671875, 250.4110107421875)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(20.639999389648438, 247.49697875976562, 127.15

 29%|██▊       | 247/862 [00:01<00:04, 134.21it/s]


[226, 'Vulpia dertonensis', 191, 191]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(50.15999984741211, 354.2989807128906, 412.84039306640625, 367.781005859375)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(28.31999969482422, 366.0589904785156, 412.24554443359375, 379.541015625)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(28.559999465942383, 377.8189697265625, 412.75994873046875, 391.3009948730469)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(28.079999923706055, 389.5789794921875, 412.91119384765625, 414.8210144042969)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(37.91999816894531, 413.1305236816406, 197.89796447753906, 426.5810241699219)]
['HIGHL: ', (0.9803921568627451, 0.9215686274509803, 0.8431372549019608), 'Extra', Rect(46.5600013732910

 32%|███▏      | 278/862 [00:01<00:04, 127.00it/s]


['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(36.0, 156.49697875976562, 421.8199462890625, 171.17098999023438)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(35.52000045776367, 168.25698852539062, 421.5022277832031, 206.45098876953125)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(35.52000045776367, 203.5369873046875, 421.26470947265625, 218.21099853515625)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(36.0, 215.2969970703125, 169.8778076171875, 229.97100830078125)]
['HIGHL: ', (0.9803921568627451, 0.9215686274509803, 0.8431372549019608), 'Extra', Rect(54.0, 229.4029998779297, 262.9912109375, 241.4090118408203)]
['HIGHL: ', (0.9803921568627451, 0.9215686274509803, 0.8431372549019608), 'Extra', Rect(54.0, 253.1630096435547, 421.5682373046875, 265.16900634765625)]
['HIGHL: ', (0.9803921568627451,

 34%|███▍      | 295/862 [00:01<00:04, 135.43it/s]


['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(39.84000015258789, 584.4569702148438, 422.9459533691406, 599.1309814453125)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(38.15999984741211, 596.2169799804688, 423.5005798339844, 610.8909912109375)]
[282, 'Aegilops biuncialis', 224, 224]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(41.040000915527344, 387.6969909667969, 403.73052978515625, 403.031005859375)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(20.399999618530273, 399.45697021484375, 404.2349853515625, 414.1309814453125)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(19.200000762939453, 411.21697998046875, 404.214111328125, 437.6510009765625)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(19.2000

 39%|███▊      | 333/862 [00:02<00:03, 151.35it/s]


['HIGHL: ', (0.9803921568627451, 0.9215686274509803, 0.8431372549019608), 'Extra', Rect(25.920000076293945, 324.4969787597656, 204.2965087890625, 339.1709899902344)]
['HIGHL: ', (0.9803921568627451, 0.9215686274509803, 0.8431372549019608), 'Extra', Rect(47.52000045776367, 341.7769775390625, 411.8638916015625, 357.1109924316406)]
['HIGHL: ', (0.9803921568627451, 0.9215686274509803, 0.8431372549019608), 'Extra', Rect(25.920000076293945, 353.2969970703125, 244.13282775878906, 367.97100830078125)]
['HIGHL: ', (0.9803921568627451, 0.9215686274509803, 0.8431372549019608), 'Extra', Rect(47.52000045776367, 370.5769958496094, 407.3299560546875, 385.9110107421875)]
['HIGHL: ', (0.9803921568627451, 0.9215686274509803, 0.8431372549019608), 'Extra', Rect(47.52000045776367, 387.61700439453125, 412.1451721191406, 402.9510192871094)]
['HIGHL: ', (0.9803921568627451, 0.9215686274509803, 0.8431372549019608), 'Extra', Rect(25.920000076293945, 399.37701416015625, 252.29339599609375, 414.051025390625)]
['

 44%|████▍     | 382/862 [00:02<00:02, 191.34it/s]


['HIGHL: ', (0.5294117647058824, 0.807843137254902, 0.9803921568627451), 'S.', Rect(40.560001373291016, 489.9630126953125, 270.6667785644531, 501.968994140625)]
['HIGHL: ', (0.4980392156862745, 1.0, 0.8313725490196079), 'Aire', Rect(58.560001373291016, 501.7230224609375, 167.22914123535156, 513.72900390625)]
[355, 'Lemna minor', 272, 272]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(38.15999984741211, 252.61700439453125, 401.3000793457031, 267.9510192871094)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(17.280000686645508, 265.3370056152344, 298.3743896484375, 280.0110168457031)]
['HIGHL: ', (0.9803921568627451, 0.9215686274509803, 0.8431372549019608), 'Extra', Rect(34.560001373291016, 279.1100158691406, 132.2229766845703, 292.45001220703125)]
['HIGHL: ', (1.0, 0.7137254901960784, 0.7568627450980392), 'L.', Rect(35.040000915527344, 291.8299865722656, 272.6253662109375, 305.16998291015625)

 49%|████▉     | 421/862 [00:02<00:02, 163.70it/s]


['HIGHL: ', (0.5294117647058824, 0.807843137254902, 0.9803921568627451), 'S.', Rect(39.84000015258789, 411.5099792480469, 405.3753662109375, 424.8499755859375)]
['HIGHL: ', (0.5294117647058824, 0.807843137254902, 0.9803921568627451), 'S.', Rect(22.559999465942383, 421.3500061035156, 116.38229370117188, 434.69000244140625)]
['HIGHL: ', (0.4980392156862745, 1.0, 0.8313725490196079), 'Aire', Rect(39.599998474121094, 432.8699951171875, 227.50596618652344, 446.2099914550781)]
[393, 'Eremurus inderiensis', 294, 294]
['HIGHL: ', (0.9803921568627451, 0.9215686274509803, 0.8431372549019608), 'Extra', Rect(22.799999237060547, 471.177001953125, 406.1449279785156, 485.85101318359375)]
['HIGHL: ', (0.9803921568627451, 0.9215686274509803, 0.8431372549019608), 'Extra', Rect(23.040000915527344, 482.6969909667969, 406.1462097167969, 497.3710021972656)]
['HIGHL: ', (0.9803921568627451, 0.9215686274509803, 0.8431372549019608), 'Extra', Rect(22.799999237060547, 494.21697998046875, 406.37255859375, 508.89

 51%|█████     | 441/862 [00:02<00:02, 172.82it/s]


['HIGHL: ', (1.0, 0.7137254901960784, 0.7568627450980392), 'L.', Rect(36.47999954223633, 175.1263885498047, 401.7102355957031, 186.6932830810547)]
['HIGHL: ', (1.0, 0.7137254901960784, 0.7568627450980392), 'L.', Rect(19.920000076293945, 184.7263946533203, 402.0281982421875, 196.2932891845703)]
['HIGHL: ', (1.0, 0.7137254901960784, 0.7568627450980392), 'L.', Rect(18.959999084472656, 194.32640075683594, 401.63043212890625, 205.89329528808594)]
['HIGHL: ', (1.0, 0.7137254901960784, 0.7568627450980392), 'L.', Rect(19.68000030517578, 203.92640686035156, 346.82818603515625, 215.49330139160156)]
['HIGHL: ', (0.5294117647058824, 0.807843137254902, 0.9803921568627451), 'S.', Rect(36.720001220703125, 215.92640686035156, 402.0703430175781, 227.49330139160156)]
['HIGHL: ', (0.5294117647058824, 0.807843137254902, 0.9803921568627451), 'S.', Rect(19.440000534057617, 225.28639221191406, 401.7567443847656, 236.85328674316406)]
['HIGHL: ', (0.5294117647058824, 0.807843137254902, 0.9803921568627451), 'S

 53%|█████▎    | 459/862 [00:03<00:03, 126.22it/s]


['HIGHL: ', (0.9803921568627451, 0.9215686274509803, 0.8431372549019608), 'Extra', Rect(17.760000228881836, 569.2389526367188, 390.42724609375, 580.8447265625)]
['HIGHL: ', (0.5294117647058824, 0.807843137254902, 0.9803921568627451), 'S.', Rect(36.0, 582.1989135742188, 402.6043701171875, 593.8046875)]
['HIGHL: ', (0.5294117647058824, 0.807843137254902, 0.9803921568627451), 'S.', Rect(17.520000457763672, 591.5588989257812, 402.7489929199219, 603.1646728515625)]
['HIGHL: ', (0.5294117647058824, 0.807843137254902, 0.9803921568627451), 'S.', Rect(18.479999542236328, 600.9189453125, 402.8089294433594, 612.5247192382812)]
[447, 'Muscari neglectum', 333, 333]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(58.79999923706055, 87.29700469970703, 420.6848449707031, 102.6310043334961)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(37.20000076293945, 99.05701446533203, 421.51715087890625, 125.49099731445

 57%|█████▋    | 490/862 [00:03<00:02, 128.53it/s]


['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(37.439998626708984, 505.73699951171875, 423.662841796875, 520.4110107421875)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(37.68000030517578, 517.2569580078125, 423.5741882324219, 531.9309692382812)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(37.68000030517578, 528.7769775390625, 423.5572204589844, 543.4509887695312)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(37.439998626708984, 540.2969970703125, 423.1840515136719, 554.9710083007812)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(37.91999816894531, 551.8170166015625, 423.0417175292969, 566.4910278320312)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(37.439998626708984, 563.3369750976562, 413.57476

 60%|██████    | 519/862 [00:03<00:02, 125.08it/s]


['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(17.280000686645508, 434.4169921875, 403.339599609375, 449.09100341796875)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(17.040000915527344, 445.93701171875, 402.7787170410156, 460.61102294921875)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(16.799999237060547, 457.45697021484375, 402.53375244140625, 472.1309814453125)]
['HIGHL: ', (1.0, 0.7137254901960784, 0.7568627450980392), 'L.', Rect(34.79999923706055, 494.75, 401.7801208496094, 508.0899963378906)]
['HIGHL: ', (1.0, 0.7137254901960784, 0.7568627450980392), 'L.', Rect(17.520000457763672, 504.3500061035156, 402.34600830078125, 517.6900024414062)]
['HIGHL: ', (1.0, 0.7137254901960784, 0.7568627450980392), 'L.', Rect(17.040000915527344, 513.949951171875, 401.9833068847656, 527.2899780273438)]
['HIGHL: ', (1.0, 0.7137254901960784, 0.7568627450980

 62%|██████▏   | 533/862 [00:03<00:03, 103.28it/s]


['HIGHL: ', (0.4980392156862745, 1.0, 0.8313725490196079), 'Aire', Rect(40.08000183105469, 460.7900085449219, 148.30384826660156, 474.1300048828125)]
[521, 'Iris barnumae', 392, 392]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(43.68000030517578, 477.0169982910156, 407.90386962890625, 492.35101318359375)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(23.760000228881836, 489.5899963378906, 408.45220947265625, 502.92999267578125)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(21.84000015258789, 502.1300048828125, 408.2025451660156, 514.3400268554688)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(22.799999237060547, 513.6500244140625, 408.4039001464844, 537.3800048828125)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(21.84000015258789, 536.690002441

 63%|██████▎   | 545/862 [00:03<00:03, 104.61it/s]


['HIGHL: ', (1.0, 0.7137254901960784, 0.7568627450980392), 'L.', Rect(15.600000381469727, 295.3500061035156, 274.78118896484375, 308.69000244140625)]
['HIGHL: ', (0.4980392156862745, 1.0, 0.8313725490196079), 'Aire', Rect(33.36000061035156, 308.07000732421875, 293.9825134277344, 321.4100036621094)]
[535, 'Limodorum', 400, 400]
['HIGHL: ', (0.9803921568627451, 0.9215686274509803, 0.8431372549019608), 'Extra', Rect(36.959999084472656, 415.73699951171875, 400.22021484375, 431.0710144042969)]
['HIGHL: ', (0.9803921568627451, 0.9215686274509803, 0.8431372549019608), 'Extra', Rect(17.280000686645508, 427.49700927734375, 400.7015380859375, 442.1710205078125)]
['HIGHL: ', (0.9803921568627451, 0.9215686274509803, 0.8431372549019608), 'Extra', Rect(16.31999969482422, 439.2569885253906, 400.6231994628906, 477.45098876953125)]
['HIGHL: ', (0.9803921568627451, 0.9215686274509803, 0.8431372549019608), 'Extra', Rect(16.559999465942383, 474.5369873046875, 401.09088134765625, 489.21099853515625)]
['HI

 66%|██████▌   | 568/862 [00:04<00:03, 96.12it/s] 


['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(42.0, 197.33700561523438, 426.1795349121094, 235.05099487304688)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(42.47999954223633, 231.89700317382812, 426.3478088378906, 246.57101440429688)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(43.20000076293945, 243.4169921875, 425.82733154296875, 258.7510070800781)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(42.2400016784668, 254.93698120117188, 426.0095520019531, 269.6109924316406)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(42.2400016784668, 266.4570007324219, 426.3818054199219, 281.1310119628906)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(42.47999954223633, 277.97698974609375, 237.16534423828125, 292

 70%|███████   | 606/862 [00:04<00:01, 136.73it/s]


[577, 'Salix libani', 427, 427]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(50.15999984741211, 526.5770263671875, 415.01312255859375, 541.9110107421875)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(29.040000915527344, 538.0969848632812, 414.62237548828125, 552.77099609375)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(27.84000015258789, 549.6170043945312, 414.49127197265625, 564.291015625)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(28.31999969482422, 561.136962890625, 414.697265625, 575.8109741210938)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(27.84000015258789, 572.656982421875, 415.0360412597656, 587.3309936523438)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(28.31999969482422, 584.177

 73%|███████▎  | 632/862 [00:04<00:03, 66.55it/s] 


['HIGHL: ', (0.5294117647058824, 0.807843137254902, 0.9803921568627451), 'S.', Rect(32.400001525878906, 353.4700012207031, 142.54244995117188, 366.80999755859375)]
['HIGHL: ', (0.4980392156862745, 1.0, 0.8313725490196079), 'Aire', Rect(49.68000030517578, 366.19000244140625, 416.4168701171875, 379.5299987792969)]
['HIGHL: ', (0.4980392156862745, 1.0, 0.8313725490196079), 'Aire', Rect(31.920000076293945, 376.0299987792969, 194.1441192626953, 389.3699951171875)]
[620, 'Thesium bergeri', 457, 457]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(53.279998779296875, 390.09698486328125, 416.95703125, 405.4309997558594)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(31.200000762939453, 401.61700439453125, 417.2634582519531, 416.291015625)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(31.440000534057617, 413.1369934082031, 416.9429626464844, 427.8110046

 74%|███████▍  | 642/862 [00:05<00:03, 70.52it/s]

['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(24.719999313354492, 67.45697784423828, 408.8589782714844, 82.1309814453125)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(24.959999084472656, 78.97699737548828, 408.6236267089844, 93.6510009765625)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(24.719999313354492, 90.49701690673828, 408.6202697753906, 105.1710205078125)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(24.719999313354492, 102.01697540283203, 408.6845703125, 116.69097900390625)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(24.479999542236328, 113.53699493408203, 408.2931213378906, 128.21099853515625)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(24.239999771118164, 125.05701446533203, 408.066

 78%|███████▊  | 669/862 [00:05<00:02, 93.14it/s]


['HIGHL: ', (0.9803921568627451, 0.9215686274509803, 0.8431372549019608), 'Extra', Rect(68.12745666503906, 365.177001953125, 406.3906555175781, 379.85101318359375)]
['HIGHL: ', (0.9803921568627451, 0.9215686274509803, 0.8431372549019608), 'Extra', Rect(22.079999923706055, 376.6969909667969, 262.60540771484375, 391.3710021972656)]
['HIGHL: ', (0.9803921568627451, 0.9215686274509803, 0.8431372549019608), 'Extra', Rect(274.08050537109375, 376.6969909667969, 406.88507080078125, 391.3710021972656)]
['HIGHL: ', (0.9803921568627451, 0.9215686274509803, 0.8431372549019608), 'Extra', Rect(22.079999923706055, 388.21697998046875, 406.6366271972656, 402.8909912109375)]
['HIGHL: ', (0.9803921568627451, 0.9215686274509803, 0.8431372549019608), 'Extra', Rect(21.600000381469727, 399.73699951171875, 406.4649658203125, 414.4110107421875)]
['HIGHL: ', (0.9803921568627451, 0.9215686274509803, 0.8431372549019608), 'Extra', Rect(21.84000015258789, 411.2569885253906, 406.4609375, 425.9309997558594)]
['HIGHL

 81%|████████▏ | 701/862 [00:05<00:01, 117.71it/s]


[682, 'Kochia scoparia', 495, 495]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(61.20000076293945, 448.656982421875, 424.61627197265625, 463.9909973144531)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(39.36000061035156, 460.4169921875, 424.616943359375, 475.09100341796875)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(39.84000015258789, 472.177001953125, 424.4610595703125, 486.85101318359375)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(39.84000015258789, 483.93701171875, 424.08905029296875, 498.61102294921875)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(39.84000015258789, 495.6969909667969, 424.38873291015625, 510.3710021972656)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(39.84000015258789

 84%|████████▍ | 723/862 [00:05<00:00, 143.18it/s]


['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(37.68000030517578, 294.2969970703125, 422.6041259765625, 308.97100830078125)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(37.68000030517578, 305.8169860839844, 422.7027587890625, 320.4909973144531)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(37.68000030517578, 317.3370056152344, 422.66033935546875, 332.0110168457031)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(37.68000030517578, 328.85699462890625, 423.0186767578125, 343.531005859375)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(37.68000030517578, 340.3769836425781, 422.69708251953125, 378.09100341796875)]
['HIGHL: ', (0.9803921568627451, 0.9215686274509803, 0.8431372549019608), 'Extra', Rect(55.439998626708984, 379.32061767578125, 220.54756164

 87%|████████▋ | 753/862 [00:05<00:00, 121.07it/s]


['HIGHL: ', (0.9803921568627451, 0.9215686274509803, 0.8431372549019608), 'Extra', Rect(54.47999954223633, 189.75, 421.4464111328125, 203.08999633789062)]
['HIGHL: ', (0.9803921568627451, 0.9215686274509803, 0.8431372549019608), 'Extra', Rect(36.47999954223633, 199.35000610351562, 125.5027847290039, 212.69000244140625)]
['HIGHL: ', (1.0, 0.7137254901960784, 0.7568627450980392), 'L.', Rect(54.47999954223633, 211.11001586914062, 421.48980712890625, 224.45001220703125)]
['HIGHL: ', (1.0, 0.7137254901960784, 0.7568627450980392), 'L.', Rect(36.47999954223633, 220.47000122070312, 96.2225341796875, 233.80999755859375)]
['HIGHL: ', (0.4980392156862745, 1.0, 0.8313725490196079), 'Aire', Rect(54.0, 232.23001098632812, 334.3077087402344, 245.57000732421875)]
[738, 'Phytolacca', 523, 523]
['HIGHL: ', (0.9803921568627451, 0.9215686274509803, 0.8431372549019608), 'Extra', Rect(57.84000015258789, 248.21697998046875, 422.10467529296875, 263.5509948730469)]
['HIGHL: ', (0.9803921568627451, 0.921568627

 92%|█████████▏| 790/862 [00:06<00:00, 148.39it/s]


['HIGHL: ', (0.9803921568627451, 0.9215686274509803, 0.8431372549019608), 'Extra', Rect(82.80000305175781, 550.32470703125, 431.3382263183594, 563.798095703125)]
['HIGHL: ', (0.9803921568627451, 0.9215686274509803, 0.8431372549019608), 'Extra', Rect(82.31999969482422, 560.6447143554688, 203.4068145751953, 574.1181030273438)]
['HIGHL: ', (0.9803921568627451, 0.9215686274509803, 0.8431372549019608), 'Extra', Rect(271.20001220703125, 570.9646606445312, 431.02545166015625, 584.4380493164062)]
['HIGHL: ', (0.9803921568627451, 0.9215686274509803, 0.8431372549019608), 'Extra', Rect(82.55999755859375, 582.7246704101562, 431.158935546875, 596.1980590820312)]
['HIGHL: ', (0.9803921568627451, 0.9215686274509803, 0.8431372549019608), 'Extra', Rect(82.08000183105469, 592.32470703125, 298.5622253417969, 605.798095703125)]
['HIGHL: ', (0.5294117647058824, 0.807843137254902, 0.9803921568627451), 'S.', Rect(372.0, 603.0830078125, 430.95361328125, 615.0889892578125)]
[767, 'Spergularia diandra', 536, 5

 96%|█████████▌| 829/862 [00:06<00:00, 172.64it/s]


['HIGHL: ', (0.9803921568627451, 0.9215686274509803, 0.8431372549019608), 'Extra', Rect(14.399999618530273, 484.3630065917969, 399.1833190917969, 496.3689880371094)]
['HIGHL: ', (0.9803921568627451, 0.9215686274509803, 0.8431372549019608), 'Extra', Rect(14.399999618530273, 493.7230224609375, 399.03985595703125, 505.72900390625)]
['HIGHL: ', (0.9803921568627451, 0.9215686274509803, 0.8431372549019608), 'Extra', Rect(14.399999618530273, 503.0830078125, 399.1338195800781, 515.0889892578125)]
['HIGHL: ', (0.9803921568627451, 0.9215686274509803, 0.8431372549019608), 'Extra', Rect(14.15999984741211, 512.4429931640625, 48.59714889526367, 524.448974609375)]
['HIGHL: ', (0.9803921568627451, 0.9215686274509803, 0.8431372549019608), 'Extra', Rect(32.400001525878906, 526.60302734375, 399.1040344238281, 538.6090087890625)]
['HIGHL: ', (0.9803921568627451, 0.9215686274509803, 0.8431372549019608), 'Extra', Rect(14.15999984741211, 536.2030029296875, 399.1568298339844, 548.208984375)]
['HIGHL: ', (0.9

 98%|█████████▊| 847/862 [00:06<00:00, 166.41it/s]


['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(16.31999969482422, 260.9369812011719, 400.6160888671875, 275.6109924316406)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(15.84000015258789, 272.4570007324219, 401.01507568359375, 287.1310119628906)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(16.079999923706055, 283.97698974609375, 400.8644104003906, 298.6510009765625)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(15.84000015258789, 295.4969787597656, 400.8550720214844, 310.1709899902344)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(16.559999465942383, 307.0169982910156, 400.9403991699219, 321.6910095214844)]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(16.079999923706055, 318.5369873046875, 249.892

100%|██████████| 862/862 [00:06<00:00, 129.32it/s]


['HIGHL: ', (0.9803921568627451, 0.9215686274509803, 0.8431372549019608), 'Extra', Rect(39.84000015258789, 560.4463500976562, 424.0772399902344, 572.0132446289062)]
['HIGHL: ', (0.9803921568627451, 0.9215686274509803, 0.8431372549019608), 'Extra', Rect(39.599998474121094, 569.806396484375, 431.3534240722656, 581.373291015625)]
['HIGHL: ', (0.9803921568627451, 0.9215686274509803, 0.8431372549019608), 'Extra', Rect(39.84000015258789, 579.1663818359375, 424.0456237792969, 590.7332763671875)]
['HIGHL: ', (0.9803921568627451, 0.9215686274509803, 0.8431372549019608), 'Extra', Rect(38.400001525878906, 588.5263671875, 424.1494140625, 600.09326171875)]
['HIGHL: ', (0.9803921568627451, 0.9215686274509803, 0.8431372549019608), 'Extra', Rect(39.599998474121094, 597.8864135742188, 424.3891296386719, 609.4533081054688)]
[858, 'Dianthus judaicus', 603, 603]
['HIGHL: ', (0.9607843137254902, 0.9607843137254902, 0.9607843137254902), 'Description', Rect(55.91999816894531, 42.05701446533203, 418.85345458




In [53]:
import pandas as pd

df = pd.DataFrame.from_dict(results.values()).fillna('')
df = df[df.columns.drop(list(df.filter(regex='_rects')))]
df.to_csv("book-1.csv", index=False)
df

Unnamed: 0,name,pdf_page,book_page,Extra,Description,L.,Aire,S.
0,r,34,34,r INTRODUCTION XXXVII Occupant des aires très ...,,,,
1,J,59,59,J Lettre alternant dans la transcription de mo...,,,,
2,JL,77,77,"Nab, N Nahal Nik Np Ol P Pb Pr Reese Russ Sam ...",,,,
3,Lycopodium cernuum,78,78,Fructification de novembre à mars. Sur grès tr...,Lycopodium cernuum L. var. capillaceum Willd (...,"L. Mi. Entre Nahr es Safa et 'Aïn Qa'a, 1942 e...",Aire géogr. —- Régions tropicales et subtropic...,
4,Selaginella denticulata,79,79,Végétation active de novembre à avril. Fructif...,Selaginella denticulata (L.) Link — Lycopodium...,"L. Ct. et ML, Ce. Saïda (Bl), Beyrouth et envi...",Aire géogr. —• Tour de la Méditerranée. Madère...,"S. Non signalée. Présence presque certaine, Ct..."
...,...,...,...,...,...,...,...,...
857,Dianthus strictus,599,599,Floraison: mai-décembre. CC. tous terrains. Le...,Dianthus strictus Banks et Sol. (non Sibth. et...,,,
858,Dianthus judaicus,603,603,Var. auraniticus (Post.) n. comb. — Calice 3 c...,Dianthus judaicus Boiss. — D. pattens Sibth. e...,L. Sy. Baalbeck (Wall). St. Qamou'at Hermel (P...,"Aire géogr. — Turquie sud, Syrie, Liban, Pales...","S. A.L. Ouadi-el-Qarn (Sam, Mt, Pb), Zebdani, ..."
859,Dianthus pachypetalus,603,603,,Dianthus pachypetalus Stapf— D. floribundus Bo...,,,
860,Dianthus crinitus,604,604,Floraison: mai-juin.,"Dianthus crinitus Smith (PI. CLXXXV, n. 4). — ...",,"Aire géogr. — Turquie, Géorgie, Iran, Bélouchi...",S. St. Zélaf (Pb).
