In [None]:
pip install PyGithub

## Process multiple files separately

In [11]:
import requests
import json
import csv
import os
import re

API_TOKEN = 'add figma token'
FILE_KEYS = ['add first file key', 'add second file key']  # Ensure MODULE is part of the file key or name

# Figma API URL template
FIGMA_API_URL_TEMPLATE = 'https://api.figma.com/v1/files/{}'

# Headers for the API request
headers = {
    'X-Figma-Token': API_TOKEN
}

#  Function to determine the module (section) number from the file name or URL (e.g., MODULE1 -> 1)
def get_section_number(file_name):
    match = re.search(r'MODULE(\d+)', file_name, re.IGNORECASE)
    return int(match.group(1)) if match else None

# Main processing function to handle multiple files
def process_figma_files(file_keys):
    for file_key in file_keys:
        # Here we assume that the module name is included in the file_key itself, such as 'XN57QSSgnI4exbB2OTK1QM/MODULE1'
        # If the URL or file name contains the module information, extract it
        section_number = get_section_number(file_key)  # Extract the module number dynamically
        
        if section_number is None:
            section_number = 0  # Default to 0 if no module is found in the file_key
        print(f"Processing file for Module {section_number} :")

        # Proceed to fetch the data from Figma API
        response = requests.get(FIGMA_API_URL_TEMPLATE.format(file_key.split('/')[0]), headers=headers)  # Use only the actual file key part for the API
        if response.status_code == 200:
            file_data = response.json()
            if 'document' in file_data:
                # Extract frames for the current file using the section number
                frames = extract_frames(file_data['document']['children'], section_number)

                # Sort frames by their position on the canvas (first by y, then by x)
                frames.sort(key=lambda f: (f.get('y', float('inf')), f.get('x', float('inf'))))

                # Generate unique filenames for each file
                csv_filename = f'figma_frames_{file_key.split("/")[0]}.csv'  # Using only the file key part for the filename
                json_filename = f'figma_frames_{file_key.split("/")[0]}.json'

                # Export the frames data to a CSV file
                save_to_csv(frames, csv_filename)
                print(f"Frames data has been exported to {csv_filename}")

                # Export the frames data to a JSON file
                save_to_json(frames, json_filename)
                print(f"Frames data has been exported to {json_filename}")
            else:
                print(f"Warning: 'document' key not found in file with key {file_key}. Skipping this file.")
        else:
            print(f"Error: Failed to fetch data for file key {file_key}. Status code: {response.status_code}")
            print(f"Response: {response.text}")

# Function to recursively extract frames from the file data
def extract_frames(nodes, section_number=1, subsection_number=None):
    frames = []  # Initialize a new list for each call to avoid data mix-up
    for node in nodes:
        if node['type'] == 'FRAME':
            processed_frame = process_frame(node, section_number, subsection_number)
            if processed_frame:  # Only add if the frame was successfully processed
                frames.append(processed_frame)
        if 'children' in node:
            frames.extend(extract_frames(node['children'], section_number, subsection_number))  # Collect all child frames
    return frames

# Function to save the frames data to a CSV file
def save_to_csv(frames, filename):
    # Assign the correct column number after sorting
    for i, frame in enumerate(frames):
        frame['Column'] = i + 1
        frame.pop('x', None)  # Remove 'x' after sorting
        frame.pop('y', None)  # Remove 'y' after sorting

    # Specify the CSV column headers
    csv_columns = ['Section', 'Subsection', 'Section Title', 'Template Type', 'Column', 'Text', 'id']
    
    # Define the path to save the CSV file in the "01_Results" folder
    results_folder = os.path.join('..', '01_Results')
    os.makedirs(results_folder, exist_ok=True)  # Ensure the folder exists

    # Full path to the CSV file
    csv_path = os.path.join(results_folder, filename)
    
    # Open the CSV file for writing
    with open(csv_path, 'w', newline='', encoding='utf-8') as csv_file:
        writer = csv.DictWriter(csv_file, fieldnames=csv_columns)
        writer.writeheader()
        
        # Write each frame's data to the CSV file
        for frame in frames:
            writer.writerow(frame)

# Function to save the frames data to JSON files
def save_to_json(frames, filename):
    # Assign the correct column number after sorting
    for i, frame in enumerate(frames):
        frame['Column'] = i + 1
        frame.pop('x', None)  # Remove 'x' after sorting
        frame.pop('y', None)  # Remove 'y' after sorting
    
    # Define the path to save the JSON file in the "00_API" folder
    api_folder = os.path.join('..', '00_API')
    os.makedirs(api_folder, exist_ok=True)  # Ensure the folder exists

    # Full path to the JSON file
    json_path = os.path.join(api_folder, filename)                   

    # Convert the frames list to a JSON structure and save to file
    with open(json_path, 'w', encoding='utf-8') as json_file:
        json.dump(frames, json_file, indent=4, ensure_ascii=False)

# Execute the main function with the list of file keys
process_figma_files(FILE_KEYS)


Processing file for Module 1 :
Frames data has been exported to figma_frames_XN57QSSgnI4exbB2OTK1QM.csv
Frames data has been exported to figma_frames_XN57QSSgnI4exbB2OTK1QM.json
Processing file for Module 2 :
Frames data has been exported to figma_frames_5cH2oMyNQPZ1w9fUPM9Mfe.csv
Frames data has been exported to figma_frames_5cH2oMyNQPZ1w9fUPM9Mfe.json


### Extraction of multiple images and graphics 

In [5]:
# GitHub base URLs for images and graphics
IMAGE_BASE_URL = 'https://raw.githubusercontent.com/UNDP-Data/dsc-energy-academy-pipeline/main/03_Inputs/images/'
GRAPHIC_BASE_URL = 'https://raw.githubusercontent.com/UNDP-Data/dsc-energy-academy-pipeline/main/03_Inputs/graphics/'

# Headers for the API request
headers = {
    'X-Figma-Token': API_TOKEN
}

# Function to determine the appropriate header tag based on font size
def determine_header_tag(font_size):
    if font_size >= 32:
        return 'h1'
    elif 24 <= font_size < 32:
        return 'h2'
    elif 20 <= font_size < 24:
        return 'h3'
    elif 16 <= font_size < 20:
        return 'h4'
    elif 14 <= font_size < 16:
        return 'h5'
    else:
        return 'h6'

# Function to determine the module (section) number from the file name (e.g., MODULE1 -> 1)
def get_section_number(file_name):
    match = re.search(r'MODULE(\d+)', file_name, re.IGNORECASE)
    return int(match.group(1)) if match else None

# Function to look for image or graphic labels and return the corresponding URLs
def extract_image_or_graphic_url_from_text(text, module_number):
    image_urls = []

    # Check for image references in the format image_[number]:
    image_references = re.findall(r'image_(\d+):', text)
    for image_number in image_references:
        image_url = f"{IMAGE_BASE_URL}image_{image_number}.png"
        image_urls.append(image_url)

    # Check for graphic references in the format graphic_[number]:
    graphic_references = re.findall(r'graphic_(\d+):', text)
    for graphic_number in graphic_references:
        graphic_url = f"{GRAPHIC_BASE_URL}Module_{module_number}/graphic_{graphic_number}.png"
        image_urls.append(graphic_url)

    return image_urls

# Function to check for image nodes and return image URLs
def extract_image_from_frame(frame):
    # Check if the frame contains an image node
    image_nodes = [node for node in frame['children'] if node['type'] == 'IMAGE']
    
    if image_nodes:
        # Dynamically assign an image number or ID from the node name
        # Assuming the image follows the "image_[number]" convention
        image_number = frame['name'].split(' ')[-1]  # Extracting the number from the frame name
        image_url = f"{IMAGE_BASE_URL}image_{image_number}.png"
        return image_url
    return None

# Function to process a frame and extract relevant information, including image URLs and subsections
def process_frame(frame, section_number, subsection_number, file_key):
    if 'absoluteBoundingBox' not in frame:
        print(f"Warning: 'absoluteBoundingBox' missing for frame '{frame.get('name', 'Unnamed')}'. Skipping this frame.")
        return None

    frame_data = {
        'Section': section_number,
        'Subsection': subsection_number,
        'Section Title': frame['name'],
        'Template Type': 'Photo' if 'IMAGE' in [node['type'] for node in frame['children']] else 'Text',
        'Column': 0,
        'Text': '',
        'id': '',
        'image_url': []  # Placeholder for image URLs
    }

    # Check for image nodes in the frame
    image_url = extract_image_from_frame(frame)
    if image_url:
        frame_data['image_url'].append(image_url)
    else:
        # If no image node, check if the text contains "image_[number]:" or "graphic_[number]:"
        for node in frame['children']:
            if node['type'] == 'TEXT':
                font_size = node.get('style', {}).get('fontSize', 14)
                header_tag = determine_header_tag(font_size)
                text = node['characters']
                
                # Add text to the frame data
                frame_data['Text'] += f'<{header_tag}>{text}</{header_tag}>'

                # Extract image or graphic URLs from the text
                image_urls = extract_image_or_graphic_url_from_text(text, section_number)
                if image_urls:
                    frame_data['image_url'].extend(image_urls)
                else:
                    frame_data['image_url'].append("Image URL not found")

    return frame_data

# Function to recursively extract frames from the file data
def extract_frames(nodes, file_name, section_number, subsection_number=None):
    frames = []
    for node in nodes:
        if node['type'] == 'FRAME':
            # If the frame contains 'Subsection', update the subsection number
            if 'Subsection' in node['name']:
                subsection_number = int(re.search(r'\d+', node['name']).group())
                continue

            # Process the frame
            processed_frame = process_frame(node, section_number, subsection_number, file_name)
            if processed_frame:
                frames.append(processed_frame)

        # If there are children, recurse into them
        if 'children' in node:
            frames.extend(extract_frames(node['children'], file_name, section_number, subsection_number))
    return frames

# Function to save the frames data to JSON files
def save_to_json(frames, filename):
    with open(filename, 'w', encoding='utf-8') as json_file:
        json.dump(frames, json_file, indent=4, ensure_ascii=False)

# Function to process a single Figma file with retry and delay
def process_single_file(file_key, delay=2, max_retries=3):
    retries = 0
    success = False
    while retries < max_retries and not success:
        try:
            response = requests.get(FIGMA_API_URL_TEMPLATE.format(file_key), headers=headers)
            if response.status_code == 200:
                file_data = response.json()
                file_name = file_data['name']
                section_number = get_section_number(file_name)

                if 'document' in file_data:
                    frames = extract_frames(file_data['document']['children'], file_name, section_number)
                    frames.sort(key=lambda f: (f.get('y', float('inf')), f.get('x', float('inf'))))

                    json_filename = os.path.join('../00_API', f'figma_frames_{file_key}.json')
                    save_to_json(frames, json_filename)
                    print(f"Frames data has been exported to 00_API folder {json_filename}")
                    success = True  # Mark success to exit the retry loop
                else:
                    print(f"Warning: 'document' key not found in file with key {file_key}. Skipping this file.")
                    success = True  # No need to retry
            elif response.status_code == 429:  # Rate limiting error
                print(f"Rate limit hit. Retrying after {delay} seconds...")
                retries += 1
                time.sleep(delay)
            else:
                print(f"Error: Failed to fetch data for file key {file_key}. Status code: {response.status_code}")
                retries += 1
                time.sleep(delay)
        except Exception as e:
            print(f"Error processing file key {file_key}: {e}")
            retries += 1
            time.sleep(delay)
        if retries == max_retries:
            print(f"Max retries reached for file key {file_key}. Skipping this file.")

# Function to process Figma files sequentially with delay
def process_figma_files_sequential(file_keys, delay_between_requests=2):
    for file_key in file_keys:
        process_single_file(file_key, delay=delay_between_requests)

# Execute the main function with the list of file keys sequentially
process_figma_files_sequential(FILE_KEYS)


Frames data has been exported to 00_API folder ../00_API\figma_frames_XN57QSSgnI4exbB2OTK1QM.json
Frames data has been exported to 00_API folder ../00_API\figma_frames_5cH2oMyNQPZ1w9fUPM9Mfe.json
