# This code retrieves templates/frames built from figma using Figma API and save them in a .json file and .csv file

In [1]:
#import nececessary libraries
import requests
import json
import csv
import os
import re
import time  # For adding delays between requests
from dotenv import load_dotenv # for accessing the api key in .env file

# Load environment variables from the .env file
load_dotenv()

# Access the API key
API_TOKEN = os.getenv('FIGMA_API_KEY')

# Figma API URL template
FIGMA_API_URL_TEMPLATE = 'https://api.figma.com/v1/files/{}'

# Headers for the API request
headers = {
    'X-Figma-Token': API_TOKEN
}

# Base URL for the GitHub repository to fetch inputs (images and graphics)
GITHUB_BASE_URL = 'https://raw.githubusercontent.com/UNDP-Data/dsc-energy-academy-pipeline/main/03_Inputs/'

### Extracting the module number of the file

In [2]:
def extract_module_number(module_name):
    # Look for a pattern like "module X" or just a number in the module name
    match = re.search(r'\d+', module_name)
    if match:
        return match.group(0)  # Return the module number as a string
    return None  # Return None if no module number is found


### Retrieving the font sizes and tags of each text in the frame

In [3]:
# Function to determine the appropriate header tag based on font size
def determine_header_tag(font_size):
    if font_size >= 32:
        return 'h1'
    elif 24 <= font_size < 32:
        return 'h2'
    elif 20 <= font_size < 24:
        return 'h3'
    elif 16 <= font_size < 20:
        return 'h4'
    elif 14 <= font_size < 16:
        return 'h5'
    else:
        return 'h6'
#Test Case
"""
The input parameter is font size (integer). The function should return the appropriate header tag.
Example input: 32, 24, 20, 16, 14
Expected output: 'h1', 'h2', 'h3', 'h4', 'h5', 'h6'
"""

# # Example test (uncomment to test)
# font_size_1 = 50
# font_size_2 = 24
# font_size_3 = 20
# font_size_4 = 16
# font_size_5 = 14

# print(f"Header tag for font size {font_size_1}: {determine_header_tag(font_size_1)}")  # Expected output: 'h1'
# print(f"Header tag for font size {font_size_2}: {determine_header_tag(font_size_2)}")  # Expected output: 'h2'
# print(f"Header tag for font size {font_size_3}: {determine_header_tag(font_size_3)}")  # Expected output: 'h3'
# print(f"Header tag for font size {font_size_4}: {determine_header_tag(font_size_4)}")  # Expected output: 'h4'
# print(f"Header tag for font size {font_size_5}: {determine_header_tag(font_size_5)}")  # Expected output: 'h5'    

"\nThe input parameter is font size (integer). The function should return the appropriate header tag.\nExample input: 32, 24, 20, 16, 14\nExpected output: 'h1', 'h2', 'h3', 'h4', 'h5', 'h6'\n"

### Differentiate template types either Text, Text-image or Image

In [4]:
# Function to determine the template type based on frame content
def determine_template_type(frame):
    contains_text = False
    contains_image = False
    label_like_text_only = True  # Assume the text is a label by default

    for node in frame['children']:
        if node['type'] == 'TEXT':
            text = node.get('characters', '').strip()
            # Check if the text is more than a label (like image_1 or graph_1)
            if re.search(r'image_\d+|graph_\d+', text):
                continue  # Ignore label-like text
            elif len(text) > 0:  # Consider non-empty and non-label text as real text
                contains_text = True  # Regular text, not a label
                label_like_text_only = False
        
        # First, check if the node's name contains 'ignore_rect' and skip it
        if 'ignore_rect' in node.get('name', '').lower():
            continue

        # Then, check if the node is an IMAGE, VECTOR, or RECTANGLE
        if node['type'] in ['IMAGE', 'VECTOR', 'RECTANGLE']:
            contains_image = True  # Only mark this if it's not ignored

    # Determine template type based on whether the frame contains real text or just label-like text
    if contains_image and label_like_text_only:  # Only label-like text and image
        return 'Photo'
    elif contains_image and contains_text:
        return 'Text-Image'
    elif contains_text and not contains_image:
        return 'Text'
    else:
        return 'Unknown'  # In case neither text nor image is found

# Test Case
"""
The input is a Figma frame (a dictionary containing 'children' nodes), each child may be a TEXT, IMAGE, VECTOR, or RECTANGLE.
The function determines the template type of the frame based on its content.
Example input: Frame with text only, Frame with image only, Frame with both text and image, Frame with vector or rectangle
Expected output: 'Text', 'Photo', 'Text-Image', or 'Unknown' Vectors and rectangles are ignored. 
"""

# # Example test (uncomment to test)
# frame_with_text_only = {
#     'children': [
#         {'type': 'TEXT', 'characters': 'This is a text'}
#     ]
# }

# frame_with_image_only = {
#     'children': [
#         {'type': 'IMAGE', 'name': 'image_1'}
#     ]
# }

# frame_with_text_and_image = {
#     'children': [
#         {'type': 'TEXT', 'characters': 'This is a text'},
#         {'type': 'IMAGE', 'name': 'image_1'}
#     ]
# }

# print(f"Template type for text-only frame: {determine_template_type(frame_with_text_only)}")  # Expected output: 'Text'
# print(f"Template type for image-only frame: {determine_template_type(frame_with_image_only)}")  # Expected output: 'Photo'
# print(f"Template type for text and image frame: {determine_template_type(frame_with_text_and_image)}")  # Expected output: 'Text-Image'


"\nThe input is a Figma frame (a dictionary containing 'children' nodes), each child may be a TEXT, IMAGE, VECTOR, or RECTANGLE.\nThe function determines the template type of the frame based on its content.\nExample input: Frame with text only, Frame with image only, Frame with both text and image, Frame with vector or rectangle\nExpected output: 'Text', 'Photo', 'Text-Image', or 'Unknown' Vectors and rectangles are ignored. \n"

### Fetch the images and photos from Github

In [5]:
# Function to fetch image and graphic URL from GitHub based on image node name
def get_image_url(image_name, module_name, extension='.png'):
    # Extract the section number to be able to extract graphics
    module_number = extract_module_number(module_name)
        
    # Build URL directly using image name (like image_8 or graph_2)
    if image_name.startswith('image_'):
        return f'{GITHUB_BASE_URL}Images/{image_name}{extension}'
    elif image_name.startswith('graph_'):
        return f'{GITHUB_BASE_URL}Graphics/Section%20{module_number}/{image_name}{extension}'
    
    return None

### Process certain module intro frames specifically

In [6]:
# Function to extract data for the Module_Title-1440 template
def extract_module_title_1440(frame, column_number):
    """
    Processes the frame named 'Module_Title-1440' and builds the nested Content structure.
    """
    title_text = ""
    subtitle_text = ""
    module_number_text = ""

    # Sort nodes by position to ensure correct reading order
    sorted_nodes = sorted(
        frame['children'], 
        key=lambda n: (
            n.get('absoluteBoundingBox', {}).get('y', 0), 
            n.get('absoluteBoundingBox', {}).get('x', 0)
        )
    )

    for node in sorted_nodes:
        if node['type'] == 'TEXT':
            font_size = node.get('style', {}).get('fontSize', 14)
            header_tag = determine_header_tag(font_size)
            text = node['characters'].strip()

            if re.match(r'^\d+$', text):  # Only numbers for module number
                module_number_text = f'<{header_tag}>{text}</{header_tag}>'
            elif font_size >= 32 and len(text.split()) > 1:  # Large font with multiple words as title
                title_text += f'<{header_tag}>{text}</{header_tag}>'
            elif "MODULE" not in text.upper():  # Exclude "MODULE" text from subtitle
                subtitle_text += f'<{header_tag}>{text}</{header_tag}> '

    return {
        "Section": "Module Intro",
        "Frame Title": frame['name'],
        "Template Type": "Text",
        "Column": column_number,
        "Content": {
            "Title": title_text.strip(),
            "Subtitle": subtitle_text.strip(),
            "Module Number": module_number_text.strip()
        },
        "image_url": []
    }


In [7]:
# Function to extract data for the Module_Overview-1080 template
def extract_module_overview_1080(frame):
    title_text = ""
    subtitle_text = ""
    body_text = ""

    # Sort nodes by position to ensure correct reading order
    sorted_nodes = sorted(frame['children'], key=lambda n: (n.get('absoluteBoundingBox', {}).get('y', 0), n.get('absoluteBoundingBox', {}).get('x', 0)))

    first_text_found = False  # Flag to differentiate between subtitle and main text

    for node in sorted_nodes:
        if node['type'] == 'TEXT':
            font_size = node.get('style', {}).get('fontSize', 14)
            header_tag = determine_header_tag(font_size)
            text = node['characters'].strip()

            if font_size >= 32 and "MODULE OVERVIEW" in text.upper():  # Main title
                title_text = f'<{header_tag}>{text}</{header_tag}>'
            elif not first_text_found:  # First text block after title is subtitle
                subtitle_text = f'<{header_tag}>{text}</{header_tag}>'
                first_text_found = True
            else:
                # Append remaining text as main body text
                body_text += f'<{header_tag}>{text}</{header_tag}> '

    return {
        "Title": title_text.strip(),
        "Subtitle": subtitle_text.strip(),
        "Text": body_text.strip()
    }


In [8]:
# Function to extract data for the Learning_Objectives-1440 template
def extract_learning_objectives_1440(frame):
    title_text = ""
    subtitle_text = ""
    objectives_text = {}
    current_number = None  # Track the current number being processed

    # Sort nodes by position to ensure correct reading order
    sorted_nodes = sorted(
        frame['children'], 
        key=lambda n: (
            n.get('absoluteBoundingBox', {}).get('y', 0), 
            n.get('absoluteBoundingBox', {}).get('x', 0)
        )
    )

    subtitle_found = False  # Flag to identify when the subtitle has been captured

    for node in sorted_nodes:
        if node['type'] == 'TEXT':
            font_size = node.get('style', {}).get('fontSize', 14)
            header_tag = determine_header_tag(font_size)
            text = node['characters'].strip()

            if "LEARNING OBJECTIVES" in text.upper() and font_size >= 32:  # Main title
                title_text = f'<{header_tag}>{text}</{header_tag}>'
            elif not subtitle_found:  # Subtitle detection - first text after the title
                subtitle_text = f'<{header_tag}>{text}</{header_tag}>'
                subtitle_found = True
            elif re.match(r'^\d+$', text):  # Numbered list detection (e.g., 1, 2, 3, etc.)
                current_number = f'<h3>{text}</h3>'
                objectives_text[current_number] = ""  # Initialize with empty text
            elif current_number:  # Append text to the current numbered objective
                objectives_text[current_number] += f'<{header_tag}>{text}</{header_tag}> '

    # Ensure trailing whitespace is stripped from all texts
    objectives_text = {key: value.strip() for key, value in objectives_text.items()}

    return {
        "Title": title_text.strip(),
        "Subtitle": subtitle_text.strip(),
        "Text": objectives_text  # Nested dictionary for numbered objectives
    }


In [9]:
# Function to extract data for the Advanced Chapter 540 template
def extract_advance_chapter_540(frame):
    title_text = "<h1>Next up</h1>"  # The title is always "Next up"
    additional_text = ""
    image_url = ""

    # Sort nodes by position to ensure correct reading order
    sorted_nodes = sorted(
        frame['children'],
        key=lambda n: (
            n.get('absoluteBoundingBox', {}).get('y', 0),
            n.get('absoluteBoundingBox', {}).get('x', 0)
        )
    )

    for node in sorted_nodes:
        if node['type'] == 'TEXT':
            font_size = node.get('style', {}).get('fontSize', 14)
            header_tag = determine_header_tag(font_size)
            text = node['characters'].strip()

            # Debugging: Print the detected text
            #print(f"Detected text: {text}")

            # Exclude the word "CHAPTER" and its associated number
            if re.match(r'^\s*CHAPTER\s+\d+', text, re.IGNORECASE):
                continue
            elif text.upper() != "NEXT UP":  # Exclude "Next up" from the additional text
                additional_text += f'<{header_tag}>{text}</{header_tag}> '
        elif node['type'] in ['IMAGE', 'RECTANGLE']:  # Retrieve the image URL if available
            image_url = get_image_url(node['name'], frame['name'])

    # Ensure additional_text is stripped
    additional_text = additional_text.strip()

    return {
        "Content": {
            "Title": title_text,
            "Text": additional_text
        },
        "image_url": image_url 
    }


### Process certain Chapter intro frames specifically

In [10]:
# Function to extract data for the Chapter title 1200 template
def extract_chapter_title_1200(frame):
    chapter_number = ""
    title_text = ""
    image_url = ""

    # Sort nodes by position to ensure correct reading order
    sorted_nodes = sorted(
        frame['children'],
        key=lambda n: (
            n.get('absoluteBoundingBox', {}).get('y', 0),
            n.get('absoluteBoundingBox', {}).get('x', 0)
        )
    )

    for node in sorted_nodes:
        if node['type'] == 'TEXT':
            font_size = node.get('style', {}).get('fontSize', 14)
            header_tag = determine_header_tag(font_size)
            text = node['characters'].strip()

            if re.match(r'^\d+$', text):  # Detect chapter number
                chapter_number = f"<{header_tag}>{text}</{header_tag}>"  # Only the number
            else:  # Assume remaining text is the chapter title
                title_text += f"<{header_tag}>{text}</{header_tag}> "
        elif node['type'] in ['IMAGE', 'RECTANGLE']:  # Detect IMAGE or RECTANGLE nodes
            image_url = get_image_url(node['name'], frame['name'])

    # Ensure title text is stripped of trailing whitespace
    title_text = title_text.strip()

    return {
        "Content": {
            "Chapter": chapter_number,  # Only the number
            "Title": title_text
        },
        "image_url": image_url  # Add the image URL to the output
    }


In [11]:
# Function to extract data for the Chapter overview 720 template
def extract_chapter_overview_720(frame):
    overview_title = ""
    body_text = ""

    # Sort nodes by position to ensure correct reading order
    sorted_nodes = sorted(
        frame['children'],
        key=lambda n: (
            n.get('absoluteBoundingBox', {}).get('y', 0),
            n.get('absoluteBoundingBox', {}).get('x', 0)
        )
    )

    for node in sorted_nodes:
        if node['type'] == 'TEXT':
            font_size = node.get('style', {}).get('fontSize', 14)
            header_tag = determine_header_tag(font_size)
            text = node['characters'].strip()

            if "OVERVIEW" in text.upper():  # Title containing "Overview"
                overview_title = f"<{header_tag}>{text}</{header_tag}>"
            else:
                body_text += f"<{header_tag}>{text}</{header_tag}> "

    return {
        "Content": {
            "Title": overview_title.strip(),
            "Text": body_text.strip(),
        }
    }


In [12]:
# Function to extract data for the Chapter objectives template
def extract_chapter_toc_1440(frame):
    title_text = ""
    subtitle_text = ""
    lessons = {}

    # Sort nodes by position to ensure correct reading order
    sorted_nodes = sorted(
        frame['children'],
        key=lambda n: (
            n.get('absoluteBoundingBox', {}).get('y', 0),
            n.get('absoluteBoundingBox', {}).get('x', 0)
        )
    )

    current_lesson_number = None

    for node in sorted_nodes:
        if node['type'] == 'TEXT':
            font_size = node.get('style', {}).get('fontSize', 14)
            header_tag = determine_header_tag(font_size)
            text = node['characters'].strip()

            # Extract Title
            if font_size >= 32 and "LESSONS" in text.upper():
                title_text = f"<{header_tag}>{text}</{header_tag}>"

            # Extract Subtitle
            elif font_size >= 18 and current_lesson_number is None and not text.isdigit():
                subtitle_text += f"<{header_tag}>{text}</{header_tag}> "

            # Extract Lesson Numbers
            elif text.isdigit():
                current_lesson_number = f"<{header_tag}>{text}</{header_tag}>"
                lessons[current_lesson_number] = ""

            # Extract Lesson Descriptions
            elif current_lesson_number:
                lessons[current_lesson_number] += f"<{header_tag}>{text}</{header_tag}> "

    # Clean up subtitle and lessons
    subtitle_text = subtitle_text.strip()
    lessons = {k: v.strip() for k, v in lessons.items()}

    return {
        "Content": {
            "Title": title_text,
            "Subtitle": subtitle_text,
            "Lessons": lessons
        }
    }


In [13]:
# Function to extract data for the Advance_Lesson-540 template
def extract_advance_lesson_540(frame):
    title_text = "<h1>Next up</h1>"  # The title is always "Next up"
    additional_text = ""
    image_url = ""

    # Sort nodes by position to ensure correct reading order
    sorted_nodes = sorted(
        frame['children'],
        key=lambda n: (
            n.get('absoluteBoundingBox', {}).get('y', 0),
            n.get('absoluteBoundingBox', {}).get('x', 0)
        )
    )

    lesson_detected = False  # Flag to check when a lesson header is detected

    for node in sorted_nodes:
        if node['type'] == 'TEXT':
            font_size = node.get('style', {}).get('fontSize', 14)
            header_tag = determine_header_tag(font_size)
            text = node['characters'].strip()

            if re.match(r'^Lesson\s+\d+', text, re.IGNORECASE):  # Detect "Lesson [number]"
                lesson_detected = True  # Start capturing lesson description
                additional_text += f'<{header_tag}>{text}</{header_tag}> '
            elif lesson_detected:  # Add description text once "Lesson [number]" is detected
                additional_text += f'<{header_tag}>{text}</{header_tag}> '
        elif node['type'] in ['IMAGE', 'RECTANGLE']:  # Retrieve the image URL if available
            image_url = get_image_url(node['name'], frame['name'])

    # Ensure additional_text is stripped
    additional_text = additional_text.strip()

    return {
        "Content": {
            "Title": title_text,
            "Text": additional_text
        },
        "image_url": image_url  # Add the image URL to the output
    }


### Process all the frames 

In [14]:
# Function to process a frame and extract relevant information
def process_frame(frame, module_name, chapter_name, lesson_number, column_number, is_module_intro=False):
    if 'absoluteBoundingBox' not in frame:
        print(f"Warning: 'absoluteBoundingBox' missing for frame '{frame.get('name', 'Unnamed')}'. Skipping this frame.")
        return None
    
    template_type = determine_template_type(frame)
    
    # Initialize an empty list to hold all image URLs for the frame
    image_urls = []
    
    # General frame data structure
    frame_data = {
        'Section': module_name,
        'Chapter': chapter_name,
        'Frame Title': frame['name'],
        'Template Type': template_type,
        'Column': column_number,
        'Text': '',
        'x': frame['absoluteBoundingBox']['x'],
        'y': frame['absoluteBoundingBox']['y'],
        'image_url': image_urls  # Store the list of images
    }
    
    if not is_module_intro:
        # Only add 'Lesson' if it's not part of Module Intro
        frame_data['Lesson'] = lesson_number

    # Sort nodes by their position (y-coordinate first, then x-coordinate)
    sorted_nodes = sorted(
        frame['children'], 
        key=lambda n: (
            n.get('absoluteBoundingBox', {}).get('y', 0), 
            n.get('absoluteBoundingBox', {}).get('x', 0)
        )
    )

    # Frame-specific processing logic
    if frame['name'] == 'Module_Title-1440':
        # Process Module_Title-1440 frame
        processed_frame  = extract_module_title_1440(frame, column_number)
        frame_data['Content'] = processed_frame 
        frame_data.pop('Text', None) 
    elif frame['name'] == 'Module_Overview-1080':  # Check for Module_Overview-1080
        content = extract_module_overview_1080(frame)
        frame_data['Content'] = content
        frame_data.pop('Text', None)  # Remove 'Text' field for this specific frame
    elif frame['name'] == 'Learning_Objectives-1440':  # Check for Learning_Objectives-1440
        content = extract_learning_objectives_1440(frame)
        frame_data['Content'] = content
        frame_data.pop('Text', None) 
    elif frame['name'] == 'Advance_Chapter-540':  # Check for Advance_Chapter-540
        content = extract_advance_chapter_540(frame)
        frame_data.update(content)
    elif frame['name'] == 'Advance_Chapter-540':  # Check for Advance_Chapter-540
        content = extract_advance_chapter_540(frame)
        frame_data.update(content) 
    elif frame['name'] == 'Chapter_Title-1200':  # Check for Chapter_Title-1200
        content = extract_chapter_title_1200(frame)
        frame_data.update(content) 
    elif frame['name'] == 'Chapter_Overview-720':  # Check for Chapter_Overview-720
        content = extract_chapter_overview_720(frame)
        frame_data.update(content)  # Merge content
    elif frame['name'] == 'Chapter_ToC-1440':  # Check for Chapter_ToC-1440
        content = extract_chapter_toc_1440(frame)
        frame_data.update(content) 
    elif frame['name'] == 'Advance_Lesson-540':  # Check for Advance_Lesson-540
        content = extract_advance_lesson_540(frame)
        frame_data.update(content)  

    else:
        # Default processing for other frames
        for node in sorted_nodes:
            if node['type'] == 'TEXT':
                font_size = node.get('style', {}).get('fontSize', 14)  # Default to 14 if no fontSize is found
                header_tag = determine_header_tag(font_size)
                text = node['characters']
                frame_data['Text'] += f'<{header_tag}>{text}</{header_tag}>'
            
            # Consider IMAGE and RECTANGLE nodes for image processing
            if node['type'] in ['IMAGE', 'RECTANGLE']:
                image_url = get_image_url(node['name'], module_name)
                if image_url:
                    image_urls.append(image_url)  # Append the URL to the image_urls list

    return frame_data


### Extracting lessons 

In [15]:
# Function to extract frames from lessons
def extract_frames(nodes, module_name, chapter_name, lesson_number):
    frames = []
    column_number = 1  # Track the column within each lesson

    for node in nodes:
        if node['type'] == 'FRAME':
            processed_frame = process_frame(node, module_name, chapter_name, lesson_number, column_number)
            if processed_frame:
                frames.append(processed_frame)
                column_number += 1
    
    return frames

# Test Case
"""
The input is a list of nodes (frames) along with the section name, chapter name, and lesson number.
The function extracts relevant frames and returns a list of processed frames.
Example input: A list of frames with text or image content.
Expected output: A list of dictionaries representing each processed frame.
"""

# # Example test(uncomment to test)
# nodes = [
#     {
#         'type': 'FRAME',
#         'name': 'Frame 1',
#         'absoluteBoundingBox': {'x': 100, 'y': 200},
#         'children': [{'type': 'TEXT', 'characters': 'This is frame 1 text'}]
#     },
#     {
#         'type': 'FRAME',
#         'name': 'Frame 2',
#         'absoluteBoundingBox': {'x': 300, 'y': 400},
#         'children': [{'type': 'IMAGE', 'name': 'image_1'}]
#     }
# ]
# module_name = "Module 1"
# chapter_name = "Chapter 1"
# lesson_number = 1

# frames = extract_frames(nodes, module_name, chapter_name, lesson_number)
# print(frames)  # Expected output: A list of processed frame dictionaries


'\nThe input is a list of nodes (frames) along with the section name, chapter name, and lesson number.\nThe function extracts relevant frames and returns a list of processed frames.\nExample input: A list of frames with text or image content.\nExpected output: A list of dictionaries representing each processed frame.\n'

### Assigning some rows as lessons

In [16]:
# Function to dynamically assign lessons based on similar y-values (rows)
def assign_lessons_by_row(frames, y_threshold=500):  # You can adjust the y_threshold (this depends on the vertical space btn lesson1 and lesson2)
    if not frames:
        return frames

    # Sort frames by their y-position first, then by x-position
    frames.sort(key=lambda f: (f['absoluteBoundingBox']['y'], f['absoluteBoundingBox']['x']))

    lesson_number = 1
    last_y = frames[0]['absoluteBoundingBox']['y']
    
    for frame in frames:
        current_y = frame['absoluteBoundingBox']['y']

        # If the vertical difference between frames is larger than the threshold, start a new lesson
        if abs(current_y - last_y) > y_threshold:
            lesson_number += 1

        # Assign the lesson number
        frame['Lesson'] = lesson_number

        # Update last_y to the current frame's y-position
        last_y = current_y
    
    return frames

# Test Case

"""
The input is a list of frames with y-values and an optional threshold.
The function assigns lesson numbers to frames based on vertical spacing (y-coordinate differences).
Example input: A list of frames with valid 'absoluteBoundingBox' values.
Expected output: The list of frames, each assigned with a lesson number.
"""

# # Example test (uncomment to test)
# frames = [
#     {'absoluteBoundingBox': {'y': 100, 'x': 50}},
#     {'absoluteBoundingBox': {'y': 200, 'x': 60}},
#     {'absoluteBoundingBox': {'y': 800, 'x': 50}},
#     {'absoluteBoundingBox': {'y': 900, 'x': 60}}
# ]
# y_threshold = 300

# assigned_frames = assign_lessons_by_row(frames, y_threshold)

# # Output the results of the test
# for frame in assigned_frames:
#     print(f"Frame at y={frame['absoluteBoundingBox']['y']} assigned to lesson {frame['Lesson']}")


"\nThe input is a list of frames with y-values and an optional threshold.\nThe function assigns lesson numbers to frames based on vertical spacing (y-coordinate differences).\nExample input: A list of frames with valid 'absoluteBoundingBox' values.\nExpected output: The list of frames, each assigned with a lesson number.\n"

### Creating a Module Intro section

In [17]:
# Function to extract frames from the Module Intro section
def extract_module_intro(document_children):
    module_intro_frames = []
    column_number = 1  # Initialize column number for sequence

    for canvas_node in document_children:
        if canvas_node['type'] == 'CANVAS':
            for section_node in canvas_node['children']:
                if 'Module Intro' in section_node['name']:
                    module_name = section_node['name']
                    
                    # Extract all frames
                    frames = [frame for frame in section_node['children'] if frame['type'] == 'FRAME']
                    
                    # Sort the frames based on their y (and x for frames with the same y) coordinates
                    frames.sort(key=lambda f: (f['absoluteBoundingBox']['y'], f['absoluteBoundingBox']['x']))
                    
                    # Process each frame inside Module Intro
                    for frame in frames:
                        if frame['name'] == 'Module_Title-1440':  # Check for specific frame name
                            # Use the dedicated function for Module_Title-1440
                            processed_frame = extract_module_title_1440(frame, column_number)
                        else:
                            # Default processing for other frames
                            processed_frame = process_frame(frame, module_name, "", 0, column_number, is_module_intro=True)
                        
                        if processed_frame:
                            module_intro_frames.append(processed_frame)
                            column_number += 1  # Increment the column number based on sequence
    
    return module_intro_frames


### Extract Chapters

In [18]:
# Function to extract only chapters and lessons dynamically based on vertical spacing
def extract_chapters_and_lessons(document_children):
    chapters = []
    
    for canvas_node in document_children:
        if canvas_node['type'] == 'CANVAS':
            # Filter only nodes with 'Chapter' in their name to skip templates or images
            chapter_nodes = [
                chapter_node for chapter_node in canvas_node['children']
                if 'Chapter' in chapter_node['name']
            ]
            
            for chapter_node in chapter_nodes:
                chapter_name = chapter_node['name']  # e.g., "Chapter 1"
                all_frames = [
                    frame for frame in chapter_node['children']
                    if frame['type'] == 'FRAME' and not frame['name'].startswith('Vector')
                ]

                # Assign lessons based on vertical spacing
                frames_with_lessons = assign_lessons_by_row(all_frames, y_threshold=500)

                # Group frames by their lesson number
                lessons = []
                current_lesson = []
                current_lesson_number = frames_with_lessons[0]['Lesson']

                for frame in frames_with_lessons:
                    if frame['Lesson'] != current_lesson_number:
                        lessons.append(current_lesson)
                        current_lesson = []
                        current_lesson_number = frame['Lesson']

                    current_lesson.append(frame)

                # Append the last lesson if any
                if current_lesson:
                    lessons.append(current_lesson)

                # Add chapter and its associated lessons to the list
                chapters.append({
                    'chapter_name': chapter_name,
                    'lessons': lessons
                })

    return chapters


# Test Case
"""
The input is a list of document children (Figma nodes). 
The function extracts chapters and lessons dynamically based on the structure of nodes and their vertical spacing.
Example input: A list of chapters with valid 'children' nodes and their associated frames, each with 'absoluteBoundingBox' values containing both 'x' and 'y'.
Expected output: A list of chapters, each containing a list of lessons with associated frames.
"""

# # Example testn(uncomment to test)
# document_children = [
#     {
#         'type': 'CANVAS',
#         'children': [
#             {
#                 'type': 'FRAME',
#                 'name': 'Chapter 1',
#                 'children': [
#                     {'type': 'FRAME', 'absoluteBoundingBox': {'y': 100, 'x': 50}, 'name': 'Frame 1'},
#                     {'type': 'FRAME', 'absoluteBoundingBox': {'y': 300, 'x': 60}, 'name': 'Frame 2'},
#                     {'type': 'FRAME', 'absoluteBoundingBox': {'y': 900, 'x': 50}, 'name': 'Frame 3'}
#                 ]
#             },
#             {
#                 'type': 'FRAME',
#                 'name': 'Chapter 2',
#                 'children': [
#                     {'type': 'FRAME', 'absoluteBoundingBox': {'y': 100, 'x': 40}, 'name': 'Frame 1'},
#                     {'type': 'FRAME', 'absoluteBoundingBox': {'y': 400, 'x': 70}, 'name': 'Frame 2'}
#                 ]
#             }
#         ]
#     }
# ]

# # Run the extract_chapters_and_lessons function with the test case
# chapters = extract_chapters_and_lessons(document_children)

# # Output the results of the test
# for chapter in chapters:
#     print(f"Chapter: {chapter['chapter_name']}")
#     for i, lesson in enumerate(chapter['lessons'], start=1):
#         print(f"  Lesson {i}: {[frame['name'] for frame in lesson]}")


"\nThe input is a list of document children (Figma nodes). \nThe function extracts chapters and lessons dynamically based on the structure of nodes and their vertical spacing.\nExample input: A list of chapters with valid 'children' nodes and their associated frames, each with 'absoluteBoundingBox' values containing both 'x' and 'y'.\nExpected output: A list of chapters, each containing a list of lessons with associated frames.\n"

### Save result in a .CSV file

In [19]:
def save_to_csv(frames, module_name, chapter_name, filename, is_module_intro=False, is_intro=False):
    frames_with_y = [frame for frame in frames if 'y' in frame]
    frames_without_y = [frame for frame in frames if 'y' not in frame]
    frames_with_y.sort(key=lambda frame: frame['y'])
    sorted_frames = frames_with_y + frames_without_y

    for i, frame in enumerate(sorted_frames):
        frame['Column'] = i + 1  # Assign the correct column number based on sequence
        frame.pop('x', None)
        frame.pop('y', None)

        # Remove unnecessary keys based on context
        if is_module_intro:
            frame.pop('Lesson', None)
            frame.pop('Chapter', None)
        elif is_intro:
            frame.pop('Lesson', None)

    # Dynamically extract all keys from frames to match fieldnames
    all_keys = {key for frame in sorted_frames for key in frame.keys()}
    fieldnames = list(all_keys)

    # Create the folder path dynamically
    if is_module_intro:
        folder = os.path.join('..', '01_Results', module_name, 'Module_Intro')
    elif is_intro:
        folder = os.path.join('..', '01_Results', module_name, chapter_name, 'Chapter_Intro')
    else:
        folder = os.path.join('..', '01_Results', module_name, chapter_name)

    os.makedirs(folder, exist_ok=True)
    csv_path = os.path.join(folder, filename)

    with open(csv_path, 'w', newline='', encoding='utf-8') as csv_file:
        writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
        writer.writeheader()
        for frame in sorted_frames:
            writer.writerow(frame)


### Save results in a Json file

In [20]:
# Function to save the frames data to JSON files
def save_to_json(frames, module_name, chapter_name, filename, is_module_intro=False):
    frames_with_y = [frame for frame in frames if 'y' in frame]
    frames_without_y = [frame for frame in frames if 'y' not in frame]
    frames_with_y.sort(key=lambda frame: frame['y'])
    sorted_frames = frames_with_y + frames_without_y

    for i, frame in enumerate(sorted_frames):
        frame['Column'] = i + 1
        frame.pop('x', None)
        frame.pop('y', None)

        if is_module_intro:
            frame.pop('Lesson', None)  # Remove 'Lesson' for Module Intro
            frame.pop('Chapter', None)  # Remove 'Chapter' column for Module Intro
            

    if is_module_intro:
        folder = os.path.join('..', '00_API', module_name,'Module_Intro')
    else:
        folder = os.path.join('..', '00_API', module_name, chapter_name)

    os.makedirs(folder, exist_ok=True)
    
    json_path = os.path.join(folder, filename)
    
    with open(json_path, 'w', encoding='utf-8') as json_file:
        json.dump(sorted_frames, json_file, indent=4, ensure_ascii=False)
        
        
# Test Case
"""
The input is a list of frames with data, along with section name, chapter name, lesson number, and a filename.
The function saves the data to a JSON file.
Example input: List of frames with 'x' and 'y' positions and other relevant data.
Expected output: The frames are sorted and written to a JSON file.
"""

# # Example test (uncomment to test)
# filename_json = "lesson_1.json"
# save_to_json(frames, module_name, chapter_name, lesson_number, filename_json)
# print(f"JSON file for {module_name}, {chapter_name} is saved as '{filename_json} in API folder'")

"\nThe input is a list of frames with data, along with section name, chapter name, lesson number, and a filename.\nThe function saves the data to a JSON file.\nExample input: List of frames with 'x' and 'y' positions and other relevant data.\nExpected output: The frames are sorted and written to a JSON file.\n"

### Process Multiple files sequentially

In [21]:
# Fuction to fecth data adding delay and retry logic for handling rate limits and avoiding timeouts
def fetch_file_data(file_key):
    max_retries = 5
    retry_count = 0
    delay_between_retries = 10  # Delay of 10 seconds between retries

    while retry_count < max_retries:
        response = requests.get(FIGMA_API_URL_TEMPLATE.format(file_key), headers=headers)
        
        if response.status_code == 200:
            # If the request is successful, return the data
            return response.json()
        
        elif response.status_code == 429:  # Rate limit hit
            print("Rate limit hit. Waiting for rate limit reset...")
            retry_after = int(response.headers.get('Retry-After', delay_between_retries))
            time.sleep(retry_after)
            retry_count += 1
        
        else:
            print(f"Error: Failed to fetch data for file key {file_key}. Status code: {response.status_code}")
            retry_count += 1
            time.sleep(delay_between_retries)

    print(f"Max retries reached for file {file_key}. Skipping...")
    return None


# Test Case 

from unittest.mock import patch
"""
The function fetches data from the Figma API using the provided file key and handles retries in case of failure or rate limit hits.
Example input: A Figma file key.
Expected output: The function returns the file data when successful, retries on rate limit hit, or handles failure gracefully.
"""

# Example test  (uncomment to test)

# # add real file key and token to test
# file_key = "example_file_key"
# FIGMA_API_URL_TEMPLATE = 'https://api.figma.com/v1/files/{}'
# headers = {'X-Figma-Token': 'add_token'}

# # Mocking requests.get to simulate different responses - Uncomment to test it 
# @patch('requests.get')
# def test_fetch_file_data(mock_get):
#     # Simulate a successful API response
#     mock_get.return_value.status_code = 200
#     mock_get.return_value.json.return_value = {'name': 'Test Figma File'}

#     data = fetch_file_data(file_key)
#     assert data == {'name': 'Test Figma File'}, "Test failed for successful fetch."
#     print("Test passed for successful fetch.")

#     # Simulate rate limit hit (status 429)
#     mock_get.return_value.status_code = 429
#     mock_get.return_value.headers = {'Retry-After': '1'}  # 1 second delay

#     data = fetch_file_data(file_key)
#     assert data is None, "Test failed for rate limit handling."
#     print("Test passed for rate limit handling.")

#     # Simulate a general failure (status 500)
#     mock_get.return_value.status_code = 500

#     data = fetch_file_data(file_key)
#     assert data is None, "Test failed for general failure."
#     print("Test passed for general failure.")

# # Run the test
# test_fetch_file_data()


'\nThe function fetches data from the Figma API using the provided file key and handles retries in case of failure or rate limit hits.\nExample input: A Figma file key.\nExpected output: The function returns the file data when successful, retries on rate limit hit, or handles failure gracefully.\n'

<h2> Main Processing Function </h2>

In [22]:
file_keys = ['XN57QSSgnI4exbB2OTK1QM']   # you can add multiple files

In [23]:
def process_figma_files(file_keys):
    for file_key in file_keys:
        file_data = fetch_file_data(file_key)
        if file_data and 'name' in file_data:
            file_name = file_data['name']  # Retrieve file name (e.g., "SEA Test Module 1")
            print(f"Processing Module {file_name}")
        else:
            print(f"Error: Could not retrieve file name for file key: {file_key}")
            continue

        if 'document' in file_data:
            document_children = file_data['document']['children']

            # Extract Module Intro separately
            module_intro_frames = extract_module_intro(document_children)

            # Save Module Intro as CSV and JSON
            if module_intro_frames:
                csv_filename = f'Module_Intro.csv'
                json_filename = f'Module_Intro.json'
                save_to_csv(module_intro_frames, file_name, "", csv_filename, is_module_intro=True)
                save_to_json(module_intro_frames, file_name, "", json_filename, is_module_intro=True)
                print(f"Module Intro saved as {csv_filename} and {json_filename}")

            # Extract chapters and lessons dynamically based on vertical spacing
            chapters = extract_chapters_and_lessons(document_children)

            # Process each chapter and its lessons
            for chapter in chapters:
                chapter_name = chapter['chapter_name']
                lessons = chapter['lessons']

                # Adjust naming for intro and lessons
                for lesson_number, lesson_frames in enumerate(lessons):
                    frames = extract_frames(lesson_frames, file_name, chapter_name, lesson_number)
                    
                    # Determine the filename based on lesson number
                    if lesson_number == 0:  # First lesson as intro
                        csv_filename = 'intro.csv'
                        json_filename = 'intro.json'
                    else:  # Subsequent lessons renumbered starting from Lesson_1
                        csv_filename = f'Lesson_{lesson_number}.csv'
                        json_filename = f'Lesson_{lesson_number}.json'

                    save_to_csv(frames, file_name, chapter_name, csv_filename)
                    save_to_json(frames, file_name, chapter_name, json_filename)
                    print(f"Lesson {lesson_number} saved as {csv_filename} and {json_filename}")

        else:
            print(f"Error: 'document' key not found in file {file_key}.")


In [24]:
# the main function with the list of file keys
process_figma_files(file_keys)

Processing Module SEA Test Module 1
Module Intro saved as Module_Intro.csv and Module_Intro.json
Lesson 0 saved as intro.csv and intro.json
Lesson 1 saved as Lesson_1.csv and Lesson_1.json
Lesson 2 saved as Lesson_2.csv and Lesson_2.json
Lesson 0 saved as intro.csv and intro.json
Lesson 1 saved as Lesson_1.csv and Lesson_1.json
Lesson 2 saved as Lesson_2.csv and Lesson_2.json
Lesson 0 saved as intro.csv and intro.json
Lesson 1 saved as Lesson_1.csv and Lesson_1.json
Lesson 2 saved as Lesson_2.csv and Lesson_2.json
