<a href="https://colab.research.google.com/github/dineshuthayakumarkissflow/kf-custom-components/blob/main/Attachment_Folder_Organisation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install requests requests-aws4auth boto3

# **PR Processing Logic**


In [145]:
from re import split
import csv
import boto3
import requests
from google.colab import userdata

attachment_columns=split(',',userdata.get('list-of-fields'))
aws_access_key = userdata.get("Aws-Access-Key")
aws_secret_key = userdata.get("Aws-Secret-Key")
aws_bucket_name = userdata.get("Aws-Bucket-Name")
aws_region = userdata.get('Aws-Region')

# ---------- Data Classes ----------

class AttachmentFile:
    def __init__(self, id, filename, location):
        self.id = id
        self.filename = filename
        self.location = location

    def __repr__(self):
        return f"AttachmentFile(id={self.id}, filename='{self.filename}', location='{self.location}')"

class ProcessField:
    def __init__(self, field_schema_name, attachments):
        self.field_schema_name = field_schema_name
        self.attachments = attachments  # List of AttachmentFile

    def __repr__(self):
        return f"ProcessField(field_schema_name='{self.field_schema_name}', attachments={self.attachments})"

class Process:
    def __init__(self, id, name, request_number, requester_name):
        self.id = id
        self.name = name
        self.request_number = request_number
        self.requester_name = requester_name
        self.fields = []  # List of ProcessField

    def __repr__(self):
        return (
            f"Process(id={self.id!r}, name={self.name!r}, request_number={self.request_number}, "
            f"requester_name={self.requester_name!r}, fields={self.fields})"
        )

In [100]:
# ---------- Kissflow Service Client ----------
kf_sub_domain = userdata.get('kf-sub-domain')
kf_api_version = userdata.get('kf-api-version')
kf_accountid = userdata.get('kf-accountid')
kf_process_name = userdata.get('kf-process-name')

kissflow_api_url = f"https://{kf_sub_domain}.kissflow.com/process/{kf_api_version}/{kf_accountid}/admin/{kf_process_name}/item"
if kf_api_version == "2":
    kissflow_headers = {
        "X-Access-Key-Id": userdata.get("X-Access-Key-Id"),
        "X-Access-Key-Secret": userdata.get("X-Access-Key-Secret")
    }
else:
    kissflow_headers = {
      "X-Api-Key": userdata.get("X-Api-Key"),
    }

kissflow_params = {
    "page_number": int(userdata.get('kf-page-no')),
    "page_size": int(userdata.get('kf-page-size')),
    "apply_preference": False
}

In [101]:
# ---------- Fetch all process items from Kissflow API ----------

def fetch_kissflow_data():
    response = requests.get(kissflow_api_url, headers=kissflow_headers, params=kissflow_params)
    if response.status_code == 200:
        return response.json().get("Data", [])
    else:
        print("Error fetching Kissflow data:", response.text)
        return []

In [40]:
# ---------- Fetch specific process item from Kissflow API ----------

kissflow_api_url = f"https://{kf_sub_domain}.kissflow.com/process/{kf_api_version}/{kf_accountid}/admin/{kf_process_name}/PknLKUFlWiFql"
def fetch_specific_process_item_from_kissflow_data():
    response = requests.get(kissflow_api_url, headers=kissflow_headers, params=kissflow_params)
    #print(response.json())
    if response.status_code == 200:
        return response.json()
    else:
        print("Error fetching Kissflow data:", response.text)
        return None

In [22]:
# ---------- Amazon S3 Bucket Service Client ----------
def print_to_file(attachments):
    csv_filename = "attachments.csv"
    with open(csv_filename, mode='w', newline='', encoding='utf-8') as csv_file:
        writer = csv.writer(csv_file)
        writer.writerow(["ID", "Filename", "Location"])  # Header
        for attachment in attachments:
            writer.writerow([attachment.id, attachment.filename, attachment.location])

def collect_attachment_files():
    s3 = boto3.client(
    's3',
    aws_access_key_id=userdata.get("Aws-Access-Key"),
    aws_secret_access_key=userdata.get("Aws-Secret-Key"),
    region_name=userdata.get('Aws-Region')
    )
    attachments = []
    paginator = s3.get_paginator('list_objects_v2')
    pages = paginator.paginate(Bucket=userdata.get("Aws-Bucket-Name"), Prefix="", PaginationConfig={'PageSize': 500})

    for page in pages:
        if 'Contents' not in page:
            continue
        for obj in page['Contents']:
            key = obj['Key']
            parts = key.split('/')
            #print(len(parts))
            if key.endswith('/') or len(parts) <= 4:
                continue  # skip folder markers

            #print(parts)
            attachment_id = parts[3]
            filename = parts[4]

            attachment = AttachmentFile(
                id=attachment_id,
                filename=filename,
                location=key
            )
            attachments.append(attachment)

    return attachments

attachments = collect_attachment_files()

In [102]:
# ---------- Map S3 Attachment to Kissflow Process Items ----------
def map_attachments_to_processes(attachment_files, kissflow_process_items):
    matched_processes = []

    for item in kissflow_process_items:
        process_id = item["_id"]
        process_name = item["Name"]
        request_number = item.get("Purchase_Request_Number", "NA")
        requester_name = item.get("_created_by", "NA").get("Name")

        process = Process(
                id=process_id,
                name=process_name,
                request_number=request_number,
                requester_name=requester_name
        )
        matched_processes.append(process)

        for attachment_column in attachment_columns:
            if(item.get(attachment_column,None)):
                for attachment_column_details in item[attachment_column]:
                    for file in attachment_files:
                        if file.id == attachment_column_details['id']:
                            process.fields.append(ProcessField(
                                field_schema_name=attachment_column,
                                attachments=[file]
                            ))
    return matched_processes

In [None]:
kissflow_process_items = fetch_kissflow_data()
print(kissflow_process_items)
mapped_processes = map_attachments_to_processes(attachments, kissflow_process_items)

In [None]:
for mapped_process in mapped_processes:
    for mapped_field in mapped_process.fields:
        for mapped_attachment in mapped_field.attachments:
            print(mapped_process.id, '\t', mapped_field.field_schema_name, '\t', mapped_attachment.filename)

kissflow_process_items

In [147]:
# ---------- Copy files to Amazon S3 Bucket Destination Folder ----------
def copy_files_to_destination(processes):
    for proc in processes:
        for field in proc.fields:
            for attachment in field.attachments:
                destination_key = (
                    f"RBL___PR_Flow/{proc.request_number} - {proc.requester_name} - {proc.id}/"
                    f"{field.field_schema_name}/"
                    f"Attachment - {attachment.id}/"
                    f"{attachment.filename}"
                )
                print(f"Copying {attachment.location} → {destination_key}")
                s3.copy_object(
                    Bucket=aws_bucket_name,
                    CopySource={"Bucket": aws_bucket_name, "Key": attachment.location},
                    Key=destination_key
                )

In [None]:
# ---------- MAIN EXECUTION ----------
aws_attachment_files = collect_attachment_files()
kissflow_process_items = fetch_kissflow_data()
print(kissflow_process_items)
mapped_processes = map_attachments_to_processes(aws_attachment_files, kissflow_process_items)
copy_files_to_destination(mapped_processes)

In [150]:
s3 = boto3.client(
    's3',
    aws_access_key_id=aws_access_key,
    aws_secret_access_key=aws_secret_key,
    region_name=aws_region
    )
paginator = s3.get_paginator('list_objects_v2')
pages = paginator.paginate(Bucket=aws_bucket_name, Prefix="B", PaginationConfig={'PageSize': 500})

for page in pages:
    if 'Contents' not in page:
        continue
    for obj in page['Contents']:
        key = obj['Key']
        parts = key.split('/')
        #print(len(parts))
        if key.endswith('/') or len(parts) <= 4:
            continue  # skip folder markers
        print(key)
