In [86]:
import os
from tqdm import tqdm
from tools.util import read_metadata

data_dir = '/shared/haofeng/fonts/google-fonts'
# FIXME: now only keep serif and sans
types_keep = ['SERIF', 'SANS_SERIF']

print('reading font family paths...')
family_paths = [os.path.join(font_type, i)
              for font_type in types_keep
              for i in os.listdir(os.path.join(data_dir, font_type))
              if not i.startswith('.')
             ]
print('reading font family metadata...')
normal_fonts = {}
italic_fonts = {}
for family in tqdm(family_paths):
    meta = read_metadata(os.path.join(data_dir, family, 'METADATA.pb'))
    for font in meta.fonts:
        if font.style == 'normal':
            if not family in normal_fonts.keys():
                normal_fonts[family] = {}
            normal_fonts[family][font.weight] = os.path.splitext(font.filename)[0]
        if font.style == 'italic':
            if not family in italic_fonts.keys():
                italic_fonts[family] = {}
            italic_fonts[family][font.weight] = os.path.splitext(font.filename)[0]


  0%|          | 0/503 [00:00<?, ?it/s]

reading font family paths...
reading font family metadata...


100%|██████████| 503/503 [00:36<00:00, 13.66it/s]


In [89]:

print('Summary:\n')
print('# Fonts:')
print('{} font families, {} normal fonts, {} italic fonts'.format(len(normal_fonts.keys()),
                                          len([j for i in normal_fonts.values() for j in i.keys()]), 
                                          len([j for i in italic_fonts.values() for j in i.keys()])))

normal_weight_dict = {}
for family in normal_fonts.keys():
    for w in normal_fonts[family].keys():
        if not w in normal_weight_dict.keys():
            normal_weight_dict[w] = 0
        normal_weight_dict[w] += 1

italic_weight_dict = {}
for family in italic_fonts.keys():
    for w in italic_fonts[family].keys():
        if not w in italic_weight_dict.keys():
            italic_weight_dict[w] = 0
        italic_weight_dict[w] += 1

print('\nWeight:')
print(' - Normal')
print(*['  {}: {}\n'.format(k, v) for k, v in sorted(normal_weight_dict.items())])

print(' - Italic')
print(*['  {}: {}\n'.format(k, v) for k, v in sorted(italic_weight_dict.items())])



Summary:

# Fonts:
503 font families, 1433 normal fonts, 579 italic fonts

Weight:
 - Normal
  100: 53
   200: 81
   300: 141
   400: 496
   500: 127
   600: 130
   700: 259
   800: 76
   900: 70

 - Italic
  100: 31
   200: 39
   300: 58
   400: 151
   500: 58
   600: 56
   700: 115
   800: 35
   900: 36



In [90]:
# check all alphanumeric characters exist

numbers = [
             'zero', 'one', 'two', 'three', 'four',
             'five', 'six', 'seven', 'eight', 'nine']
lowers = [
             'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j',
             'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't',
             'u', 'v', 'w', 'x', 'y', 'z']
uppers = [
             '$A', '$B', '$C', '$D', '$E', '$F', '$G', '$H', '$I', '$J',
             '$K', '$L', '$M', '$N', '$O', '$P', '$Q', '$R', '$S', '$T',
             '$U', '$V', '$W', '$X', '$Y', '$Z']

alphanumerics = numbers + lowers + uppers



In [79]:
from IPython.display import SVG, HTML, display
import svgutils as sg

def show_svg(path):
    fig = sg.SVGFigure("5cm", "5cm")
    fig1 = sg.fromfile(path)
    fig2 = sg.fromfile(path)
    plot1 = fig1.getroot()
    plot2 = fig2.getroot()
    plot2.moveto(280, 0, scale=0.5)

    svg = sg.compose.SVG(path)
    originalSVG = sg.compose.SVG(path)
    figure = sg.compose.Figure(svg.height, svg.width, originalSVG)
    display(SVG(filename=path))

def display_lowers(font_path):
    name = font_path.split('/')[-1]
    for a in lowers:
        path = os.path.join(font_path, '{}_{}.svg'.format(a, name))
        if os.path.exists(path):
            show_svg(path)


def display_uppers(font_path):
    name = font_path.split('/')[-1]
    for a in uppers:
        path = os.path.join(font_path, '{}_{}.svg'.format(a, name))
        if os.path.exists(path):
            show_svg(path)
            print(path)
            break
    
    
def display_numbers(font_path):
    name = font_path.split('/')[-1]
    for a in numbers:
        path = os.path.join(font_path, '{}_{}.svg'.format(a, name))
        if os.path.exists(path):
            show_svg(path)
    
    
def display_all(font_name):
    display_lower_case(font_name)
    display_upper_case(font_name)
    display_numbers(font_name)
    
    
def display_character(c, font_paths):
    for font_path in font_paths:
        name = font_path.split('/')[-1]
        path = os.path.join(font_path, '{}_{}.svg'.format(c, name))
        if os.path.exists(path):
            show_svg(path)

ModuleNotFoundError: No module named 'svgutils'

In [None]:
display_uppers(os.path.join(data_dir, list(normal_fonts.keys())[0], list(normal_fonts.values())[0][400]))

In [80]:
# check missing characters

print('checking normal fonts...')
missing_dict = {a: [] for a in alphanumerics}
for family in tqdm(normal_fonts.keys()):
    family_dir = os.path.join(data_dir, family)
    for weight in normal_fonts[family].keys():
        name = normal_fonts[family][weight]
        svgs = os.listdir(os.path.join(family_dir, name))
        for a in alphanumerics:
            if not '{}_{}.svg'.format(a, name) in svgs:
                missing_dict[a] += [name]

print('checking italic fonts...')
italic_missing_dict = {a: [] for a in alphanumerics}
for family in tqdm(italic_fonts.keys()):
    family_dir = os.path.join(data_dir, family)
    for weight in italic_fonts[family].keys():
        name = italic_fonts[family][weight]
        svgs = os.listdir(os.path.join(family_dir, name))
        for a in alphanumerics:
            if not '{}_{}.svg'.format(a, name) in svgs:
                italic_missing_dict[a] += [name]
            

  0%|          | 0/503 [00:00<?, ?it/s]

checking normal fonts...


100%|██████████| 503/503 [05:33<00:00,  1.51it/s]
  0%|          | 0/153 [00:00<?, ?it/s]

checking italic fonts...


100%|██████████| 153/153 [01:28<00:00,  1.96it/s]


In [81]:
missing_normal_fonts = set([j for i in missing_dict.values() for j in i])
missing_italic_fonts = set([j for i in italic_missing_dict.values() for j in i])

In [82]:
len(missing_normal_fonts), len(missing_italic_fonts)

(129, 46)

In [94]:
filtered_normal_fonts = {}
for f, vf in normal_fonts.items():
    for w, fn in vf.items():
        if fn not in missing_normal_fonts:
            if not f in filtered_normal_fonts.keys():
                filtered_normal_fonts[f] = {}
            filtered_normal_fonts[f][w] = fn
            
filtered_italic_fonts = {}
for f, vf in italic_fonts.items():
    for w, fn in vf.items():
        if fn not in missing_italic_fonts:
            if not f in filtered_italic_fonts.keys():
                filtered_italic_fonts[f] = {}
            filtered_italic_fonts[f][w] = fn

In [100]:
import shutil

def makedirs(dirs):
    for dr in dirs:
        if not os.path.exists(dr):
            os.makedirs(dr)

# copy to new folder
new_data_dir = '/shared/haofeng/fonts/google-fonts-data'
normal_dir = os.path.join(new_data_dir, 'normal')
italic_dir = os.path.join(new_data_dir, 'italic')

print('copying normal fonts...')
for f, vf in tqdm(filtered_normal_fonts.items()):
    for w, fn in vf.items():
        for a in alphanumerics:
            path = os.path.join(data_dir, f, fn, '{}_{}.svg'.format(a, fn))
            new_base_path = os.path.join(normal_dir, f, str(w), 'svg')
            makedirs([new_base_path])
            new_path = os.path.join(new_base_path, '{}.svg'.format(a))
            shutil.copyfile(path, new_path)
            
for f, vf in tqdm(filtered_italic_fonts.items()):
    for w, fn in vf.items():
        for a in alphanumerics:
            path = os.path.join(data_dir, f, fn, '{}_{}.svg'.format(a, fn))
            new_base_path = os.path.join(italic_dir, f, str(w), 'svg')
            makedirs([new_base_path])
            new_path = os.path.join(new_base_path, '{}.svg'.format(a))
            shutil.copyfile(path, new_path)


  0%|          | 0/432 [00:00<?, ?it/s][A

copying normal fonts...



  0%|          | 1/432 [00:10<1:18:55, 10.99s/it][A
  0%|          | 2/432 [00:12<58:57,  8.23s/it]  [A
  1%|          | 3/432 [00:22<1:02:05,  8.68s/it][A
  1%|          | 4/432 [01:08<2:21:16, 19.81s/it][A
  1%|          | 5/432 [01:12<1:48:26, 15.24s/it][A
  1%|▏         | 6/432 [03:02<5:08:19, 43.43s/it][A
  2%|▏         | 7/432 [04:31<6:45:04, 57.19s/it][A
  2%|▏         | 8/432 [06:01<7:54:54, 67.20s/it][A
  2%|▏         | 9/432 [06:03<5:35:56, 47.65s/it][A
  2%|▏         | 10/432 [06:05<3:57:01, 33.70s/it][A
  3%|▎         | 11/432 [06:07<2:49:35, 24.17s/it][A
  3%|▎         | 12/432 [06:07<2:00:12, 17.17s/it][A
  3%|▎         | 13/432 [06:08<1:26:15, 12.35s/it][A
  3%|▎         | 14/432 [06:10<1:02:51,  9.02s/it][A
  3%|▎         | 15/432 [06:12<47:41,  6.86s/it]  [A
  4%|▎         | 16/432 [06:13<36:08,  5.21s/it][A
  4%|▍         | 17/432 [06:15<30:27,  4.40s/it][A
  4%|▍         | 18/432 [06:17<24:28,  3.55s/it][A
  4%|▍         | 19/432 [06:18<19:29,  2.8

 36%|███▌      | 156/432 [16:59<2:08:02, 27.83s/it][A
 36%|███▋      | 157/432 [17:10<1:45:07, 22.94s/it][A
 37%|███▋      | 158/432 [17:51<2:09:09, 28.28s/it][A
 37%|███▋      | 159/432 [17:54<1:34:11, 20.70s/it][A
 37%|███▋      | 160/432 [17:58<1:11:00, 15.66s/it][A
 37%|███▋      | 161/432 [18:02<55:21, 12.26s/it]  [A
 38%|███▊      | 162/432 [18:09<46:57, 10.44s/it][A
 38%|███▊      | 163/432 [18:16<42:13,  9.42s/it][A
 38%|███▊      | 164/432 [18:22<38:05,  8.53s/it][A
 38%|███▊      | 165/432 [18:37<46:21, 10.42s/it][A
 38%|███▊      | 166/432 [18:43<40:51,  9.22s/it][A
 39%|███▊      | 167/432 [18:57<47:00, 10.65s/it][A
 39%|███▉      | 168/432 [19:03<40:52,  9.29s/it][A
 39%|███▉      | 169/432 [20:47<2:44:06, 37.44s/it][A
 39%|███▉      | 170/432 [21:04<2:17:11, 31.42s/it][A
 40%|███▉      | 171/432 [21:16<1:51:11, 25.56s/it][A
 40%|███▉      | 172/432 [21:27<1:31:30, 21.12s/it][A
 40%|████      | 173/432 [22:25<2:19:43, 32.37s/it][A
 40%|████      | 174/432

 72%|███████▏  | 309/432 [41:49<08:34,  4.19s/it][A
 72%|███████▏  | 310/432 [41:54<09:01,  4.44s/it][A
 72%|███████▏  | 311/432 [41:56<07:28,  3.71s/it][A
 72%|███████▏  | 312/432 [42:00<07:13,  3.61s/it][A
 72%|███████▏  | 313/432 [42:04<07:28,  3.77s/it][A
 73%|███████▎  | 314/432 [42:07<07:04,  3.60s/it][A
 73%|███████▎  | 315/432 [42:08<05:32,  2.84s/it][A
 73%|███████▎  | 316/432 [42:11<05:40,  2.94s/it][A
 73%|███████▎  | 317/432 [42:19<08:32,  4.46s/it][A
 74%|███████▎  | 318/432 [42:40<17:55,  9.44s/it][A
 74%|███████▍  | 319/432 [42:52<19:19, 10.26s/it][A
 74%|███████▍  | 320/432 [42:55<14:46,  7.91s/it][A
 74%|███████▍  | 321/432 [43:04<15:10,  8.20s/it][A
 75%|███████▍  | 322/432 [43:40<30:12, 16.48s/it][A
 75%|███████▍  | 323/432 [43:47<24:48, 13.66s/it][A
 75%|███████▌  | 324/432 [44:40<46:08, 25.64s/it][A
 75%|███████▌  | 325/432 [44:48<35:57, 20.17s/it][A
 75%|███████▌  | 326/432 [45:09<36:04, 20.42s/it][A
 76%|███████▌  | 327/432 [45:13<27:29, 15.71s/

 23%|██▎       | 31/136 [02:54<28:13, 16.13s/it][A
 24%|██▎       | 32/136 [03:21<33:14, 19.18s/it][A
 24%|██▍       | 33/136 [03:29<27:04, 15.77s/it][A
 25%|██▌       | 34/136 [03:34<21:42, 12.77s/it][A
 26%|██▌       | 35/136 [03:36<15:54,  9.45s/it][A
 26%|██▋       | 36/136 [03:40<13:01,  7.82s/it][A
 27%|██▋       | 37/136 [03:42<09:55,  6.01s/it][A
 28%|██▊       | 38/136 [03:54<12:57,  7.94s/it][A
 29%|██▊       | 39/136 [03:59<11:10,  6.91s/it][A
 29%|██▉       | 40/136 [04:11<13:23,  8.37s/it][A
 30%|███       | 41/136 [04:13<10:21,  6.55s/it][A
 31%|███       | 42/136 [04:16<08:50,  5.64s/it][A
 32%|███▏      | 43/136 [04:19<07:18,  4.71s/it][A
 32%|███▏      | 44/136 [04:22<06:34,  4.29s/it][A
 33%|███▎      | 45/136 [04:26<06:05,  4.02s/it][A
 34%|███▍      | 46/136 [04:30<06:18,  4.21s/it][A
 35%|███▍      | 47/136 [05:26<29:15, 19.73s/it][A
 35%|███▌      | 48/136 [05:30<22:05, 15.06s/it][A
 36%|███▌      | 49/136 [05:39<18:55, 13.05s/it][A
 37%|███▋   