In [None]:
# Import necessary libraries
import pandas as pd
import requests
import json

# Import necessary libraries
import json

# Load the configuration file
with open('config.json') as config_file:
    config = json.load(config_file)

# Get the API key from the configuration file
GOOGLE_FONTS_API_KEY = config['GOOGLE_FONTS_API_KEY']

# Set the API endpoint for the Google Fonts API
GOOGLE_FONTS_API_ENDPOINT = 'https://www.googleapis.com/webfonts/v1/webfonts'

# Set the parameters for the API request
params = {
    'key': GOOGLE_FONTS_API_KEY,
    'sort': 'popularity'
}

# Make the API request to get the list of fonts
response = requests.get(GOOGLE_FONTS_API_ENDPOINT, params=params)

# Parse the response as JSON
fonts = json.loads(response.text)

# Create a dictionary for storing the fonts in the database
database = {}

# Loop through each font in the response
for font in fonts["items"]:
    # Get the font's family name
    family = font['family']
    
    # Get the font's category
    category = font['category']
    
    # Store the font in the database
    database[family] = {
        'family': family,
        'category': category,
        'variants': font['variants'],
        'subsets': font['subsets'],
        'version': font['version'],
        'lastModified': font['lastModified']
    }

# Function for generating a dataset of fonts
def generate_font_dataset(num_fonts, categories=None, subsets=None, versions=None):
    # Initialize an empty list for storing the fonts in the dataset
    font_dataset = []
    
    # Loop through the database of fonts
    for font in database.values():
        # Check if the font's category matches the specified categories (if provided)
        if categories and font['category'] not in categories:
            continue
        
        # Check if the font's subsets match the specified subsets (if provided)
        if subsets and not set(font['subsets']).issubset(subsets):
            continue
        
        # Check if the font's version matches the specified versions (if provided)
        if versions and font['version'] not in versions:
            continue
        
        # Add the font to the dataset
        font_dataset.append(font)
        
        # Stop adding fonts to the dataset if the desired number of fonts has been reached
        if len(font_dataset) >= num_fonts:
            break
    
    # Return the generated dataset of fonts
    return font_dataset

# Example usage: generate a dataset of 10 sans-serif fonts that support the Latin and Cyrillic character sets
font_dataset = generate_font_dataset(10, categories=['sans-serif'], subsets=['latin', 'cyrillic'])

# Print the names of the fonts in the dataset
for font in font_dataset:
    print(font['family'])

# Create a DataFrame object from the font database
font_database_df = pd.DataFrame.from_dict(database, orient='index')
font_database_df

In [None]:
# Import necessary libraries
import requests
import os
from tqdm import tqdm

# Create the fonts directory
if not os.path.exists('fonts'):
    os.makedirs('fonts')

# Loop through the font database
for font in tqdm(database.values(), desc="Downloading fonts"):
    # Get the font's family name
    family = font['family']
    
    # Loop through the font's variants
    for variant in font['variants']:
        # Set the URL for the font file
        url = f'https://fonts.google.com/download?family={family}:{variant}'
        
        # Download the font file
        response = requests.get(url)
        
        # Save the font file to a local directory
        with open(f'fonts/{family}-{variant}.ttf', 'wb') as font_file:
            font_file.write(response.content)

# Update the font database to include the file paths for the downloaded font files
for font in database.values():
    # Get the font's family name
    family = font['family']
    
    # Loop through the font's variants
    for variant in font['variants']:
        # Set the file path for the font file
        file_path = f'fonts/{family}-{variant}.ttf'
        
        # Add the file path to the font's entry in the database
        font['file_path'] = file_path


In [None]:
# Import necessary libraries
import seaborn as sns
import matplotlib.pyplot as plt
from collections import Counter

# Create a Counter object to count the number of fonts that support each language
counter = Counter()

# Iterate over the "subsets" column of the dataframe and update the counter
for subsets in font_database_df["subsets"]:
    counter.update(subsets)

# Print each language and the number of fonts that support it
for language, count in counter.items():
    print(f"{language}: {count}")


In [None]:
# Import necessary libraries
import matplotlib.pyplot as plt
import seaborn as sns

# Extract the language names and counts from the counter
languages = list(counter.keys())
counts = list(counter.values())

# Sort the languages and counts by the counts in descending order
languages = [language for _, language in sorted(zip(counts, languages), reverse=True)]
counts = sorted(counts, reverse=True)

# Filter the languages and counts to only include those with at least 20 fonts
languages = [language for language, count in zip(languages, counts) if count >= 20]
counts = [count for count in counts if count >= 20]

# Set the size of the figure
plt.figure(figsize=(16, 8))

# Create a barplot showing the number of fonts that support each language
sns.barplot(x=languages, y=counts, hue=languages, dodge=False)

# Add a title and axes labels
plt.title("Top languages by number of supported fonts")
plt.xlabel("Language")
plt.ylabel("Number of fonts")

# Show the legend
plt.legend(title="Languages", loc="upper right")


In [None]:
# Import necessary libraries
from functools import reduce

# Use the map function to create a list of language counts
language_counts = map(lambda x: len(x), font_database_df["subsets"])

# Use the reduce function to sum up the language counts
total_count = reduce(lambda x, y: x + y, language_counts)

# Print the total number of fonts that support each language
print(total_count)
