In [None]:
import os
from tqdm import tqdm
from tools.util import read_metadata

data_dir = '/shared/haofeng/fonts/google-fonts'
# FIXME: now only keep serif and sans
types_keep = ['SERIF', 'SANS_SERIF']

print('reading font family paths...')
family_paths = [os.path.join(font_type, i)
              for font_type in types_keep
              for i in os.listdir(os.path.join(data_dir, font_type))
              if not i.startswith('.')
             ]
print('reading font family metadata...')
normal_fonts = {}
italic_fonts = {}
for family in tqdm(family_paths):
    meta = read_metadata(os.path.join(data_dir, family, 'METADATA.pb'))
    for font in meta.fonts:
        if font.style == 'normal':
            if not family in normal_fonts.keys():
                normal_fonts[family] = {}
            normal_fonts[family][font.weight] = os.path.splitext(font.filename)[0]
        if font.style == 'italic':
            if not family in italic_fonts.keys():
                italic_fonts[family] = {}
            italic_fonts[family][font.weight] = os.path.splitext(font.filename)[0]

            
            
print('Summary:\n')
print('# Fonts:')
print('{} font families, {} normal fonts, {} italic fonts'.format(len(normal_fonts.keys()),
                                          len([j for i in normal_fonts.values() for j in i.keys()]), 
                                          len([j for i in italic_fonts.values() for j in i.keys()])))



In [None]:

normal_weight_dict = {}
for family in normal_fonts.keys():
    for w in normal_fonts[family].keys():
        if not w in normal_weight_dict.keys():
            normal_weight_dict[w] = 0
        normal_weight_dict[w] += 1

italic_weight_dict = {}
for family in italic_fonts.keys():
    for w in italic_fonts[family].keys():
        if not w in italic_weight_dict.keys():
            italic_weight_dict[w] = 0
        italic_weight_dict[w] += 1

print('\nWeight:')
print(' - Normal')
print(*['  {}: {}\n'.format(k, v) for k, v in sorted(normal_weight_dict.items())])

print(' - Italic')
print(*['  {}: {}\n'.format(k, v) for k, v in sorted(italic_weight_dict.items())])



In [None]:


# check all alphanumeric characters exist

numbers = [
             'zero', 'one', 'two', 'three', 'four',
             'five', 'six', 'seven', 'eight', 'nine']
lowers = [
             'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j',
             'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't',
             'u', 'v', 'w', 'x', 'y', 'z']
uppers = [
             '$A', '$B', '$C', '$D', '$E', '$F', '$G', '$H', '$I', '$J',
             '$K', '$L', '$M', '$N', '$O', '$P', '$Q', '$R', '$S', '$T',
             '$U', '$V', '$W', '$X', '$Y', '$Z']

alphanumerics = numbers + lowers + uppers



In [None]:

from IPython.display import SVG, HTML, display
import svgutils as sg

def show_svg(path):
    fig = sg.SVGFigure("5cm", "5cm")
    fig1 = sg.fromfile(path)
    fig2 = sg.fromfile(path)
    plot1 = fig1.getroot()
    plot2 = fig2.getroot()
    plot2.moveto(280, 0, scale=0.5)

    svg = sg.compose.SVG(path)
    originalSVG = sg.compose.SVG(path)
    figure = sg.compose.Figure(svg.height, svg.width, originalSVG)
    display(SVG(filename=path))

def display_lowers(font_path):
    name = font_path.split('/')[-1]
    for a in lowers:
        path = os.path.join(font_path, '{}_{}.svg'.format(a, name))
        if os.path.exists(path):
            show_svg(path)


def display_uppers(font_path):
    name = font_path.split('/')[-1]
    for a in uppers:
        path = os.path.join(font_path, '{}_{}.svg'.format(a, name))
        if os.path.exists(path):
            show_svg(path)
            print(path)
            break
    
    
def display_numbers(font_path):
    name = font_path.split('/')[-1]
    for a in numbers:
        path = os.path.join(font_path, '{}_{}.svg'.format(a, name))
        if os.path.exists(path):
            show_svg(path)
    
    
def display_all(font_name):
    display_lower_case(font_name)
    display_upper_case(font_name)
    display_numbers(font_name)
    
    
def display_character(c, font_paths):
    for font_path in font_paths:
        name = font_path.split('/')[-1]
        path = os.path.join(font_path, '{}_{}.svg'.format(c, name))
        if os.path.exists(path):
            show_svg(path)

            

In [None]:
         
# check missing characters

print('checking normal fonts...')
missing_dict = {a: [] for a in alphanumerics}
for family in tqdm(normal_fonts.keys()):
    family_dir = os.path.join(data_dir, family)
    for weight in normal_fonts[family].keys():
        name = normal_fonts[family][weight]
        svgs = os.listdir(os.path.join(family_dir, name))
        for a in alphanumerics:
            if not '{}_{}.svg'.format(a, name) in svgs:
                missing_dict[a] += [name]

print('checking italic fonts...')
italic_missing_dict = {a: [] for a in alphanumerics}
for family in tqdm(italic_fonts.keys()):
    family_dir = os.path.join(data_dir, family)
    for weight in italic_fonts[family].keys():
        name = italic_fonts[family][weight]
        svgs = os.listdir(os.path.join(family_dir, name))
        for a in alphanumerics:
            if not '{}_{}.svg'.format(a, name) in svgs:
                italic_missing_dict[a] += [name]
            
            
missing_normal_fonts = set([j for i in missing_dict.values() for j in i])
missing_italic_fonts = set([j for i in italic_missing_dict.values() for j in i])
len(missing_normal_fonts), len(missing_italic_fonts)
