In [None]:
import json
from data_collection import get_fonts_info

# Load the configuration file
with open('../../config/config.json') as config_file:
    config = json.load(config_file)

fonts_df = get_fonts_info(config['GOOGLE_FONTS_API_KEY'])

In [None]:
from data_collection import select_fonts, download_fonts
import os

fonts_path = "../../data/raw/fonts/"
data_file = "download_data.csv"
Download=True
if Download:
    fonts_to_download = select_fonts(fonts_df, subsets=['hebrew', 'arabic'])
    fonts_to_download = download_fonts(fonts_to_download, os.path.join(fonts_path, "GF"))
    if not os.path.exists(data_file):
        fonts_to_download.to_csv(data_file, index=False)
    else:
        fonts_to_download.to_csv(data_file, mode='a', index=False, header=False)


In [None]:
from tqdm import tqdm
from data_collection import ttf_to_ufo, var_ttf_to_ufo
import pandas as pd
from data_utils import parse_list

# Read the CSV file, using the parse_list function to parse the values in the "subsets" and "file_path" columns
df = pd.read_csv(data_file, converters={"subsets": parse_list, "file_path": parse_list})

# Create an empty dataframe to store the converted UFO file information
ufo_df = pd.DataFrame(columns=['family', 'subsets', 'category', 'master', 'variants'])

# Iterate through the file paths in the 'file_path' column
# TODO: fix kerning error in cases: df = df[df.family == "Amiri Quran"] 
for index, row in tqdm(df.iterrows(), total=df.shape[0], desc="Converting to UFO..."):
    master = None
    file_path = row["file_path"]
    family = row["family"]
    print(family)
    # Set the file path of the UFO file
    family_folder = os.path.join(fonts_path, f"UFO/{family}")
    if not os.path.exists(family_folder):
        os.makedirs(family_folder)

    variants = {}
    for ttf_file_path in file_path:
        # Get the file name and extension of the TTF file
        ttf_file_name, ttf_file_extension = os.path.splitext(ttf_file_path)

        # Get the variant name from the TTF file name
        variant = ttf_file_name.split("-")[-1]

        ufo_file_path = os.path.join(family_folder, f"{family}-{variant}.ufo")

        # Convert the TTF file to a UFO file
        if "Variable" in variant:
            if not os.path.exists(ufo_file_path):
                var_ttf_to_ufo(ttf_file_path, ufo_file_path)
            master = ufo_file_path
        else:
            if not os.path.exists(ufo_file_path):
                ttf_to_ufo(ttf_file_path, ufo_file_path)
            variants[variant] = ufo_file_path

    # Add the converted UFO file information to the dataframe
    ufo_df = ufo_df.append({'family': family, 'subsets': row["subsets"], 'category': row["category"], 'master': master, 'variants': variants}, ignore_index=True)


ufo_df.to_csv("ufo_data.csv", index=False)