In [None]:
from tqdm import tqdm
import pandas as pd

import sys
sys.path.insert(0,"data_utils")
from data_utils.font_to_ufo import ttf_to_ufo, var_ttf_to_ufo

#Extract: The data is extracted from a CSV file using the pd.read_csv function and stored in a dataframe called df.
fonts_path = "../../data/processed/fonts/"
data_file = "download_data.csv"

# Read the CSV file, using the parse_list function to parse the values in the "subsets" and "file_path" columns
def parse_list(string): return [s.strip("''") for s in string.strip('[]').split(', ')]
df = pd.read_csv(data_file, converters={"subsets": parse_list, "file_path": parse_list})

# Create an empty dataframe to store the converted UFO file information
ufo_df = pd.DataFrame(columns=['family', 'subsets', 'category', 'master', 'variants'])

In [None]:
# TODO: fix kerning error in cases: df = df[df.family == "Amiri Quran"] 

#Transform: The data is transformed by iterating through the file paths in the file_path column of the df dataframe 
# and converting each TTF file to a UFO filie using either the ttf_to_ufo or var_ttf_to_ufo function. The resulting
# UFO file information is stored in a dictionary called variants.
for index, row in tqdm(df.iterrows(), total=df.shape[0], desc="Converting to UFO..."):
    master = None
    file_path = row["file_path"]
    family = row["family"]
    print(family)
    # Set the file path of the UFO file
    family_folder = os.path.join(fonts_path, f"UFO/{family}")
    if not os.path.exists(family_folder):
        os.makedirs(family_folder)

    variants = {}
    for ttf_file_path in file_path:
        # Get the file name and extension of the TTF file
        ttf_file_name, ttf_file_extension = os.path.splitext(ttf_file_path)

        # Get the variant name from the TTF file name
        variant = ttf_file_name.split("-")[-1]

        ufo_file_path = os.path.join(family_folder, f"{family}-{variant}.ufo")

        # Convert the TTF file to a UFO file
        if "Variable" in variant:
            if not os.path.exists(ufo_file_path):
                var_ttf_to_ufo(ttf_file_path, ufo_file_path)
            master = ufo_file_path
        else:
            if not os.path.exists(ufo_file_path):
                ttf_to_ufo(ttf_file_path, ufo_file_path)
            variants[variant] = ufo_file_path

    # Add the converted UFO file information to the dataframe
    ufo_df = ufo_df.append({'family': family, 'subsets': row["subsets"], 'category': row["category"], 'master': master, 'variants': variants}, ignore_index=True)

#Load: The transformed data is loaded into a dataframe called ufo_df using the append function. 
# The ufo_df dataframe is then saved to a CSV file using the to_csv function.
ufo_df.to_csv("ufo_data.csv", index=False)