### Test to extract the image url where a frame contains both image and text 

In [17]:
import requests
import json
import csv
import os
import time  # For introducing delay between requests to prevent API timeouts
import re  # For regex operations to find image references

API_TOKEN = 'add figma token'
FILE_KEYS = [
   'add file key'
    # Add more file keys as needed
]

# Figma API URL template
FIGMA_API_URL_TEMPLATE = 'https://api.figma.com/v1/files/{}'

# GitHub base URL for images
GITHUB_BASE_URL = 'add github url of the image'

# Headers for the API request
headers = {
    'X-Figma-Token': API_TOKEN
}

# Function to determine the appropriate header tag based on font size
def determine_header_tag(font_size):
    if font_size >= 32:
        return 'h1'
    elif 24 <= font_size < 32:
        return 'h2'
    elif 20 <= font_size < 24:
        return 'h3'
    elif 16 <= font_size < 20:
        return 'h4'
    elif 14 <= font_size < 16:
        return 'h5'
    else:
        return 'h6'

# Function to determine the template type based on frame content
def determine_template_type(frame):
    contains_text = any(node['type'] == 'TEXT' for node in frame['children'])
    contains_image = any(node['type'] == 'IMAGE' for node in frame['children'])

    if contains_text and not contains_image:
        return 'Text'
    elif contains_image and not contains_text:
        return 'Photo'
    else:
        return 'Photo'

# Function to find image references and create URLs dynamically
def extract_image_url(text):
    # Use a regex to find image references in the format image_[number]:
    image_references = re.findall(r'image_(\d+):', text)
    # Create a list to store the generated image URLs
    image_urls = []
    
    for image_number in image_references:
        # Construct the GitHub image URL dynamically based on the extracted number
        image_url = f"{GITHUB_BASE_URL}image_{image_number}.png"
        image_urls.append(image_url)  # Add the image URL to the list
    
    return image_urls

# Function to process a frame and extract relevant information, including image URLs
def process_frame(frame, section_number, subsection_number=None):
    if 'absoluteBoundingBox' not in frame:
        print(f"Warning: 'absoluteBoundingBox' missing for frame '{frame.get('name', 'Unnamed')}'. Skipping this frame.")
        return None
    
    template_type = determine_template_type(frame)
    image_id = ', '.join([node['id'] for node in frame['children'] if node['type'] == 'IMAGE'])
    
    frame_data = {
        'Section': section_number,
        'Subsection': subsection_number,
        'Section Title': frame['name'],
        'Template Type': template_type,
        'Column': 0,
        'Text': '',
        'id': image_id,
        'x': frame['absoluteBoundingBox']['x'],
        'y': frame['absoluteBoundingBox']['y'],
        'image_url': []  # Placeholder for image URLs
    }

    for node in frame['children']:
        if node['type'] == 'TEXT':
            font_size = node.get('style', {}).get('fontSize', 14)
            header_tag = determine_header_tag(font_size)
            text = node['characters']
            
            # Add text to the frame
            frame_data['Text'] += f'<{header_tag}>{text}</{header_tag}>'
            
            # Extract image URLs from the text if there are any image references
            image_urls = extract_image_url(text)
            if image_urls:
                # Append URLs to the frame's image_url field
                frame_data['image_url'].extend(image_urls)

    return frame_data

# Function to recursively extract frames from the file data
def extract_frames(nodes, section_number=1, subsection_number=None):
    frames = []
    for node in nodes:
        if node['type'] == 'FRAME':
            if subsection_number is None:
                processed_frame = process_frame(node, section_number)
                if processed_frame:
                    frames.append(processed_frame)
                    section_number += 1
            else:
                processed_frame = process_frame(node, section_number, subsection_number)
                if processed_frame:
                    frames.append(processed_frame)
                    subsection_number += 1
        if 'children' in node:
            frames.extend(extract_frames(node['children'], section_number, subsection_number))
    return frames

# Function to save the frames data to a CSV file
def save_to_csv(frames, filename):
    for i, frame in enumerate(frames):
        frame['Column'] = i + 1
        frame.pop('x', None)
        frame.pop('y', None)

    csv_columns = ['Section', 'Subsection', 'Section Title', 'Template Type', 'Column', 'Text', 'id', 'image_url']
    
    results_folder = os.path.join('..', '01_Results')
    os.makedirs(results_folder, exist_ok=True)

    csv_path = os.path.join(results_folder, filename)
    
    with open(csv_path, 'w', newline='', encoding='utf-8') as csv_file:
        writer = csv.DictWriter(csv_file, fieldnames=csv_columns)
        writer.writeheader()
        for frame in frames:
            writer.writerow(frame)

# Function to save the frames data to JSON files
def save_to_json(frames, filename):
    for i, frame in enumerate(frames):
        frame['Column'] = i + 1
        frame.pop('x', None)
        frame.pop('y', None)
    
    api_folder = os.path.join('..', '00_API')
    os.makedirs(api_folder, exist_ok=True)

    json_path = os.path.join(api_folder, filename)

    with open(json_path, 'w', encoding='utf-8') as json_file:
        json.dump(frames, json_file, indent=4, ensure_ascii=False)

# Function to process a single Figma file with retry and delay
def process_single_file(file_key, delay=2, max_retries=3):
    retries = 0
    success = False
    while retries < max_retries and not success:
        try:
            response = requests.get(FIGMA_API_URL_TEMPLATE.format(file_key), headers=headers)
            if response.status_code == 200:
                file_data = response.json()
                if 'document' in file_data:
                    frames = extract_frames(file_data['document']['children'])
                    frames.sort(key=lambda f: (f.get('y', float('inf')), f.get('x', float('inf'))))

                    csv_filename = f'figma_frames_{file_key}.csv'
                    json_filename = f'figma_frames_{file_key}.json'
                    
                    save_to_csv(frames, csv_filename)
                    print(f"Frames data has been exported to {csv_filename}")

                    save_to_json(frames, json_filename)
                    print(f"Frames data has been exported to {json_filename}")
                    success = True  # Mark success to exit the retry loop
                else:
                    print(f"Warning: 'document' key not found in file with key {file_key}. Skipping this file.")
                    success = True  # No need to retry
            elif response.status_code == 429:  # Rate limiting error
                print(f"Rate limit hit. Retrying after {delay} seconds...")
                retries += 1
                time.sleep(delay)
            else:
                print(f"Error: Failed to fetch data for file key {file_key}. Status code: {response.status_code}")
                print(f"Response: {response.text}")
                retries += 1
                time.sleep(delay)
        except Exception as e:
            print(f"Error processing file key {file_key}: {e}")
            retries += 1
            time.sleep(delay)
        if retries == max_retries:
            print(f"Max retries reached for file key {file_key}. Skipping this file.")

# Function to process Figma files sequentially with delay
def process_figma_files_sequential(file_keys, delay_between_requests=2):
    for file_key in file_keys:
        process_single_file(file_key, delay=delay_between_requests)

# Execute the main function with the list of file keys sequentially
process_figma_files_sequential(FILE_KEYS)


Frames data has been exported to figma_frames_iLkuiE3rkUk1ph4k0ENtdF.csv
Frames data has been exported to figma_frames_iLkuiE3rkUk1ph4k0ENtdF.json
