In [23]:
# -*- coding: utf-8 -*-
from sys import argv, exit
from os import path
import csv
from unidecode import unidecode
try:
    from PIL import Image, ImageFont, ImageDraw
except ModuleNotFoundError:
    print("Please install Pillow!\nrun 'pip3 install pillow'")
    exit(1)

# options (these are constants)
csvpath = './litclock_annotated.csv'      # csv file to read quotes from
imgdir = 'images/'                          # save location for images
imgformat = 'jpg'                           # format. jpeg is faster but lossy                 
imgsize = (2560,1600)                       # width/height of image (600,800)
# color_bg = (255,255,255)                              # white. color for the background
color_norm = (125,125,125,255)                            # grey. color for normal text
high_alpha = 0.2
color_high = (0,0,0,int(255*high_alpha))                              # clear. color for highlighted text
color_meta = (0,0,0,255)                              # black. color for the metadata
fntname_norm = 'bookerly.ttf'               # font for normal text
fntname_high = 'bookerlybold.ttf'           # font for highlighted text
fntname_mdata = 'baskervilleboldbt.ttf'     # font for the author/title
fntsize_mdata = 30                          # fontsize for the author/title
rad=50

image_input = './image_input.jpg'
# fnt = create_fnt('bookerly.ttf',40)
rectangle_percentage = 0.5
alpha_pct = 0.85
tint = (255,255,255,int(255*alpha_pct)) # white rounded square

# don't touch
imgnumber = 0
previoustime = ''


def TurnQuoteIntoImage(index:int, time:str, quote:str, timestring:str,
                                               author:str, title:str, base): # base is an image
    global imgnumber, previoustime

    x1, x2 = get_pct_coords(imgsize[0], rectangle_percentage)
    y1, y2 = get_pct_coords(imgsize[1], rectangle_percentage)

    savepath = imgdir
    box_length = imgsize[0]*rectangle_percentage
    box_height = imgsize[1]*rectangle_percentage
    quoteheight = box_height*0.87
    quotelength = box_length*0.95
    quotestart_y = y1+30
    quotestart_x = x1+50
    mdatalength = imgsize[0]*0.75
    mdatastart_y = y2
    mdatastart_x = x2-(((box_length/2)-(quotelength/2)))

    # create the object. mode 'L' restricts to 8bit greyscale
    canvas = Image.new(mode='RGBA', size=(imgsize), color=(0,0,0,0))
    d = ImageDraw.Draw(canvas)

    # d.text((10, 10), "Hello", font=fnt, fill=(255, 255, 255, 128))
    d.rounded_rectangle(((x1,y1),(x2,y2)), fill=tint, radius=rad)

    # draw the title and author name

    font_mdata = create_fnt(fntname_mdata, fntsize_mdata)
    metadata = f'—{title.strip()}, {author.strip()}'
    # wrap lines into a reasonable length and lower the maximum height the
    # quote can occupy according to the number of lines the credits use
    if font_mdata.getlength(metadata) > mdatalength:
        metadata = wrap_lines(metadata, font_mdata, mdatalength - 30)
    for line in metadata.splitlines():
        mdatastart_y -= font_mdata.getbbox("A")[3] + 4
    # quoteheight = mdatastart_y - 105 # normally 35
    mdata_y = mdatastart_y
    for line in metadata.splitlines():
        d.text((mdatastart_x, mdata_y), line, color_meta,
                                                font_mdata, anchor='rm')
        mdata_y += font_mdata.getbbox("A")[3] + 4


    # draw the quote (pretty)
    quote, fntsize = calc_fntsize(quotelength, quoteheight, quote, fntname_high)
    font_norm = create_fnt(fntname_norm, fntsize)
    font_high = create_fnt(fntname_high, fntsize)
    try:
        draw_quote(d, (quotestart_x,quotestart_y), quote,
                                timestring, font_norm, font_high)
    # warn and discard image if timestring is just not there
    except LookupError:
        print(f"WARNING: missing timestring at csv line {index+2}, skipping: {timestring}")
        return

    # increment a number if time is identical to the last one, so
    # images can't be overwritten
    # this assumes lines are actually chronological so inshallah
    if time == previoustime:
        imgnumber += 1
    else:
        imgnumber = 0
        previoustime = time
    time = time.replace(':','')
    savepath += f'quote_{time}_{imgnumber}.{imgformat}'
    savepath = path.normpath(savepath)
    # canvas.save(savepath)
    out = Image.alpha_composite(base, canvas)
    out_jpg = out.convert('RGB')
    #out.show()
    out_jpg.save(savepath)


def draw_quote(drawobj, anchors:tuple, text:str, substr:str,
        font_norm:ImageFont.truetype, font_high:ImageFont.truetype):
    # draws text with substr highlighted. doesn't check if it will fit the
    # image or anything else
    start_x = anchors[0]
    start_y = anchors[1]

    # search for the substring as if text were a single line, and
    # mark its starting and ending position for the upcoming write loop
    flattened = text.replace('\n',' ').replace("<br/>"," ").replace("<br>"," ")
    substr_starts = 0
    try:
        substr_starts = flattened.lower().index(substr.lower())
    except ValueError:
        raise LookupError
    substr_ends = substr_starts + len(substr)
    bookmark = '|'
    lines = text[:substr_starts]
    lines += f'{bookmark}{text[substr_starts:substr_ends]}{bookmark}'
    lines += text[substr_ends:]

    fntstyle_norm = (color_norm, font_norm)
    fntstyle_high = (color_high, font_high)
    current_style = fntstyle_norm
    marks_found = 0
    write = drawobj.text
    textlength = drawobj.textlength
    x = start_x
    y = start_y
    # this would be a LOT simpler if we didn't have to check the edges attached
    # to the substring. it might be easier to implement a char by char loop
    # in the future, using the kerning calculation method in this example:
    # https://pillow.readthedocs.io/en/stable/reference/ImageFont.html#PIL.ImageFont.FreeTypeFont.getlength
    for line in lines.splitlines():
        for word in line.split():
            word += ' '
            # if the entire substr is one contiguous word, split the
            # non-substr bits stuck to it and print the whole thing in 3 parts
            if word.count(bookmark) == 2:
                wordnow = word.split(bookmark)[0]
                write((x,y), wordnow, *fntstyle_norm)
                x += textlength(wordnow, font_norm)
                wordnow = word.split(bookmark)[1]
                write((x,y), wordnow, *fntstyle_high)
                x += textlength(wordnow, font_high)
                wordnow = word.split(bookmark)[2]
                write((x,y), wordnow, *fntstyle_norm)
                x += textlength(wordnow, font_norm)
                word = ''
            # otherwise change the default font, and wait for the next mark
            elif word.count(bookmark) == 1:
                marks_found += 1
                wordnow = word.split(bookmark)[0]
                word = word.split(bookmark)[1]
                write((x,y), wordnow, *current_style)
                x += textlength(wordnow, current_style[1])
                if marks_found == 1:
                    current_style = fntstyle_high
                else: # if marks == 2:
                    current_style = fntstyle_norm
            # this is the bit that actually does most of the writing
            write((x,y), word, *current_style)
            x += textlength(word, current_style[1])
        # the offset calculated by multiline_text (what we're trying to mimic)
        # is based on uppercase letter A plus 4 pixels for whatever fucking
        # reason. see https://github.com/python-pillow/Pillow/discussions/6620
        y += font_norm.getbbox("A")[3] + 4
        x = start_x


def wrap_lines(text:str, font:ImageFont.truetype, line_length:int):
    # wraps lines to maximize the number of words within line_length. note
    # that lines *can* exceed line_length, this is intentional, as text looks
    # better if the font is rescaled afterwards. adapted from Chris Collett
    # https://stackoverflow.com/a/67203353/8225672
        lines = ['']
        for word in text.split():
            line = f'{lines[-1]} {word}'.strip()
            if font.getlength(line) <= line_length:
                lines[-1] = line
            else:
                lines.append(word)
        return '\n'.join(lines)


def calc_fntsize(length:int, height:int, text:str, fntname:str, basesize=110,
                                                              maxsize=800):
    # this will dynamically wrap and scale text with the optimal font size to
    # fill a given textbox, both length and height wise.
    # manually setting basesize to just below the mean of a sample will
    # massively reduce processing time with large batches of text, at the risk
    # of potentially wasting it with strings much larger than the mean

    # these are just for calculating the textbox size, they're discarded
    louvre = Image.new(mode='1', size=(0,0))
    monalisa = ImageDraw.Draw(louvre)

    lines = ''
    fntsize = basesize
    fnt = create_fnt(fntname, fntsize)
    boxheight = 0
    while not boxheight > height and not fntsize > maxsize:
        fntsize += 1
        fnt = fnt.font_variant(size=fntsize)
        lines = wrap_lines(text, fnt, length)
        boxheight = monalisa.multiline_textbbox((0,0), lines, fnt)[3]

    fntsize -= 1
    fnt = fnt.font_variant(size=fntsize)
    lines = wrap_lines(text, fnt, length)
    boxlength = monalisa.multiline_textbbox((0,0), lines, fnt)[2]
    while boxlength > length:
        # note: this is a sanity check. we intentionally don't reformat lines
        # here, as wrap_lines only checks if its output is *longer* than length,
        # which can produce a recursive loop where lines always get wrapped
        # into something longer, leading to overly small and unreadable fonts
        fntsize -= 1
        fnt = fnt.font_variant(size=fntsize)
        boxlength = monalisa.multiline_textbbox((0,0), lines, fnt)[2]
    # recursive call in case original basesize was too low
    boxheight = monalisa.multiline_textbbox((0,0), lines, fnt)[3]
    if boxheight > height:
        return calc_fntsize(length, height, text, fntname, basesize-10)
    return lines, fntsize


def create_fnt(name:str, size:int, layout_engine=ImageFont.Layout.BASIC):
    # Layout.BASIC is orders of magnitude faster than RAQM but will struggle
    # with RTL languages
    # see https://github.com/python-pillow/Pillow/issues/6631
    return ImageFont.truetype(name, size, layout_engine=layout_engine)


def get_pct_coords(d1, pct):
    d2 = (d1*pct)
    start = (d1/2)-(d2/2)
    return (start, start+d2)  # start coord, end coord


def main():
    hardworker = ' /ᐠ - ˕ -マ Ⳋ'
    with open(csvpath, newline='\n', encoding="utf8") as csvfile:
        jobs = len(csvfile.readlines()) - 1
        csvfile.seek(0)
        if len(argv) > 1:
            if argv[1].isdigit() and int(argv[1]) < jobs:
                jobs = int(argv[1])
        quotereader = csv.DictReader(csvfile, delimiter='|', quoting=csv.QUOTE_NONE) # add quote none otherwise you get big issues
        with Image.open(image_input).convert("RGBA") as base_image:
            for i, row in enumerate(quotereader):
                if i >= jobs:
                    print('Breaking',i,jobs)
                    break
                else: # unidecode to get rid of math characters used as italics etc.
                    TurnQuoteIntoImage(i, row['time'],unidecode(row['quote'].replace("<br/>"," ").replace("<br>"," ")),
                    unidecode(row['timestring']), row['author'], row['title'], base_image)
                progressbar = f'{hardworker} working.... {i+1}/{jobs}'
                print(progressbar, end='\r', flush=True)
    print('')

In [24]:
main()

Breaking 3472 3473ng.... 3473/3473


In [22]:
with open(csvpath, newline='\n', encoding="utf8") as csvfile:
    csvfile.seek(0)
    quotereader = csv.DictReader(csvfile, delimiter='|', quoting=csv.QUOTE_NONE)
    with open('test.txt', 'w') as f:       
        for i, row in enumerate(quotereader):
            # print(row['quote'])
            # f.write(unidecode(row['quote'].replace("<br/>"," ").replace("<br>"," ")))
            f.write(row['quote'])
            f.write('\n')