In [10]:
pip install PyGithub

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.1.2 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip


## Process multiple files separately

In [12]:
import requests
import json
import csv
import os

FILE_KEYS = ['XN57QSSgnI4exbB2OTK1QM', '5cH2oMyNQPZ1w9fUPM9Mfe']  # Add more file keys as needed

# Figma API URL template
FIGMA_API_URL_TEMPLATE = 'https://api.figma.com/v1/files/{}'

# Headers for the API request
headers = {
    'X-Figma-Token': API_TOKEN
}

# Function to determine the appropriate header tag based on font size
def determine_header_tag(font_size):
    if font_size >= 32:
        return 'h1'
    elif 24 <= font_size < 32:
        return 'h2'
    elif 20 <= font_size < 24:
        return 'h3'
    elif 16 <= font_size < 20:
        return 'h4'
    elif 14 <= font_size < 16:
        return 'h5'
    else:
        return 'h6'

# Function to determine the template type based on frame content
def determine_template_type(frame):
    contains_text = any(node['type'] == 'TEXT' for node in frame['children'])
    contains_image = any(node['type'] == 'IMAGE' for node in frame['children'])

    if contains_text and not contains_image:
        return 'Text'
    elif contains_image and not contains_text:
        return 'Photo'
    else:
        return 'Photo'

# Function to process a frame and extract relevant information
def process_frame(frame, section_number, subsection_number=None):
    # Check if 'absoluteBoundingBox' exists
    if 'absoluteBoundingBox' not in frame:
        print(f"Warning: 'absoluteBoundingBox' missing for frame '{frame.get('name', 'Unnamed')}'. Skipping this frame.")
        return None  # Return None to indicate skipping this frame
    
    template_type = determine_template_type(frame)
    image_id = ', '.join([node['id'] for node in frame['children'] if node['type'] == 'IMAGE'])
    
    frame_data = {
        'Section': section_number,
        'Subsection': subsection_number,  # New field for subsections
        'Section Title': frame['name'],  # Keep section title as is
        'Template Type': template_type,
        'Column': 0,  # Placeholder, will be set after sorting
        'Text': '',  # Store the concatenated text with header tags
        'id': image_id,  # Original id
        'x': frame['absoluteBoundingBox']['x'],  # Capture the x position for sorting
        'y': frame['absoluteBoundingBox']['y']   # Capture the y position for sorting
    }

    for node in frame['children']:
        if node['type'] == 'TEXT':
            font_size = node.get('style', {}).get('fontSize', 14)  # Default to 14 if no fontSize is found
            header_tag = determine_header_tag(font_size)
            text = node['characters']
            # Concatenate text with the appropriate header tags
            frame_data['Text'] += f'<{header_tag}>{text}</{header_tag}>'
    
    return frame_data

# Function to recursively extract frames from the file data
def extract_frames(nodes, section_number=1, subsection_number=None):
    frames = []  # Initialize a new list for each call to avoid data mix-up
    for node in nodes:
        if node['type'] == 'FRAME':
            if subsection_number is None:
                processed_frame = process_frame(node, section_number)
                if processed_frame:  # Only add if the frame was successfully processed
                    frames.append(processed_frame)
                    section_number += 1
            else:
                processed_frame = process_frame(node, section_number, subsection_number)
                if processed_frame:
                    frames.append(processed_frame)
                    subsection_number += 1
        if 'children' in node:
            frames.extend(extract_frames(node['children'], section_number, subsection_number))  # Collect all child frames
    return frames

# Function to save the frames data to a CSV file
def save_to_csv(frames, filename):
    # Assign the correct column number after sorting
    for i, frame in enumerate(frames):
        frame['Column'] = i + 1
        frame.pop('x', None)  # Remove 'x' after sorting
        frame.pop('y', None)  # Remove 'y' after sorting

    # Specify the CSV column headers
    csv_columns = ['Section', 'Subsection', 'Section Title', 'Template Type', 'Column', 'Text', 'id']
    
    # Define the path to save the CSV file in the "01_Results" folder
    results_folder = os.path.join('..', '01_Results')
    os.makedirs(results_folder, exist_ok=True)  # Ensure the folder exists

    # Full path to the CSV file
    csv_path = os.path.join(results_folder, filename)
    
    # Open the CSV file for writing
    with open(csv_path, 'w', newline='', encoding='utf-8') as csv_file:
        writer = csv.DictWriter(csv_file, fieldnames=csv_columns)
        writer.writeheader()
        
        # Write each frame's data to the CSV file
        for frame in frames:
            writer.writerow(frame)

# Function to save the frames data to JSON files
def save_to_json(frames, filename):
    # Assign the correct column number after sorting
    for i, frame in enumerate(frames):
        frame['Column'] = i + 1
        frame.pop('x', None)  # Remove 'x' after sorting
        frame.pop('y', None)  # Remove 'y' after sorting
    
    # Define the path to save the JSON file in the "00_API" folder
    api_folder = os.path.join('..', '00_API')
    os.makedirs(api_folder, exist_ok=True)  # Ensure the folder exists

    # Full path to the JSON file
    json_path = os.path.join(api_folder, filename)                   

    # Convert the frames list to a JSON structure and save to file
    with open(json_path, 'w', encoding='utf-8') as json_file:
        json.dump(frames, json_file, indent=4, ensure_ascii=False)

# Main processing function to handle multiple files
def process_figma_files(file_keys):
    for file_key in file_keys:
        response = requests.get(FIGMA_API_URL_TEMPLATE.format(file_key), headers=headers)
        if response.status_code == 200:
            file_data = response.json()
            if 'document' in file_data:
                # Extract frames for the current file
                frames = extract_frames(file_data['document']['children'])
                
                # Sort frames by their position on the canvas (first by y, then by x)
                frames.sort(key=lambda f: (f.get('y', float('inf')), f.get('x', float('inf'))))
                
                # Generate unique filenames for each file
                csv_filename = f'figma_frames_{file_key}.csv'
                json_filename = f'figma_frames_{file_key}.json'
                
                # Export the frames data to a CSV file
                save_to_csv(frames, csv_filename)
                print(f"Frames data has been exported to {csv_filename}")

                # Export the frames data to a JSON file
                save_to_json(frames, json_filename)
                print(f"Frames data has been exported to {json_filename}")
            else:
                print(f"Warning: 'document' key not found in file with key {file_key}. Skipping this file.")
        else:
            print(f"Error: Failed to fetch data for file key {file_key}. Status code: {response.status_code}")
            print(f"Response: {response.text}")

# Execute the main function with the list of file keys
process_figma_files(FILE_KEYS)


Frames data has been exported to figma_frames_XN57QSSgnI4exbB2OTK1QM.csv
Frames data has been exported to figma_frames_XN57QSSgnI4exbB2OTK1QM.json
Frames data has been exported to figma_frames_5cH2oMyNQPZ1w9fUPM9Mfe.csv
Frames data has been exported to figma_frames_5cH2oMyNQPZ1w9fUPM9Mfe.json


In [None]:
### making the code run in sequencce not parallel

### Run the pipeline

In [8]:
# Step 1: Initialize GitHub Connection
import os
from github import Github

# GitHub repository information

GITHUB_REPO = 'UNDP-Data/dsc-energy-academy-pipeline'  # Replace with your GitHub repository in the form 'username/repository'

# Initialize GitHub object
# g = Github(GITHUB_TOKEN)

try:
#     repo = g.get_repo(GITHUB_REPO)
    print(f"Successfully accessed repository: {repo.full_name}")
except Exception as e:
    print(f"Error accessing repository: {e}")

# Function to upload file to GitHub in the specified folder
def upload_to_github(local_file_path, repo_folder_path, commit_message):
    with open(local_file_path, 'rb') as file:
        content = file.read()
    file_name = os.path.basename(local_file_path)
    github_file_path = f"{repo_folder_path}/{file_name}"

    try:
        # Check if the file already exists in the repository
        existing_file = repo.get_contents(github_file_path)
        # If it exists, update it
        repo.update_file(existing_file.path, commit_message, content, existing_file.sha)
        print(f"Updated file: {github_file_path}")
    except Exception:
        # If the file doesn't exist, create a new one
        repo.create_file(github_file_path, commit_message, content)
        print(f"Created new file: {github_file_path}")

# Function to run the full pipeline
def run_pipeline(file_keys):
    # Step 1: Process Figma files and generate CSV/JSON files (Already implemented in your previous code)
    process_figma_files(file_keys)

    # Step 2: Upload the generated CSV and JSON files to GitHub
    for file_key in file_keys:
        # Define the paths for the local files and corresponding GitHub folders
        csv_file = os.path.abspath(os.path.join('..', '01_Results', f'figma_frames_{file_key}.csv'))  # Path to the local CSV file
        json_file = os.path.abspath(os.path.join('..', '00_API', f'figma_frames_{file_key}.json'))    # Path to the local JSON file
        
        # Upload the generated CSV and JSON files to GitHub
        upload_to_github(csv_file, '01_Results', f"Add CSV file for Figma file {file_key}")
        upload_to_github(json_file, '00_API', f"Add JSON file for Figma file {file_key}")

    print("Pipeline complete. Files uploaded to GitHub.")

# List of Figma file keys for which files were generated
FILE_KEYS = ['XN57QSSgnI4exbB2OTK1QM', '5cH2oMyNQPZ1w9fUPM9Mfe']  # Replace with your list of file keys

# Execute the full pipeline
run_pipeline(FILE_KEYS)


Successfully accessed repository: UNDP-Data/dsc-energy-academy-pipeline
Frames data has been exported to figma_frames_XN57QSSgnI4exbB2OTK1QM.csv
Frames data has been exported to figma_frames_XN57QSSgnI4exbB2OTK1QM.json
Frames data has been exported to figma_frames_5cH2oMyNQPZ1w9fUPM9Mfe.csv
Frames data has been exported to figma_frames_5cH2oMyNQPZ1w9fUPM9Mfe.json


UnknownObjectException: 404 {"message": "Not Found", "documentation_url": "https://docs.github.com/rest/repos/contents#create-or-update-file-contents", "status": "404"}