diff --git a/parse-maintainer.py b/parse-maintainer.py new file mode 100644 index 0000000..f813769 --- /dev/null +++ b/parse-maintainer.py @@ -0,0 +1,168 @@ +""" +Script to parse maintainers data for Forklore PR +Intended for both Maintainer who want to self-contribute or simple anyone to help out! +""" + +import re +import json +from html import escape + +def is_image(url): + return any(url.lower().endswith(ext) for ext in ['.png', '.jpg', '.jpeg', '.gif', '.svg', '.webp']) + +def format_response(value: str) -> str: + value = value.strip() + + # Convert links and image URLs into HTML tags + def convert_url(match): + url = match.group(0) + if is_image(url): + return f'image' + return f'{url}' + + value = re.sub(r'https?://\S+', convert_url, value) + + # Convert newlines into
+ value = value.replace('\n', '
') + return value + +def parse_multiline_field(lines, start_index): + """Extract multiline field starting at start_index + 1 until next field or section.""" + value_lines = [] + i = start_index + 1 + while i < len(lines): + line = lines[i] + if line.startswith("**") and line.endswith("**"): # Next field label + break + if line.startswith("### ") or line.startswith("## "): # Next section heading + break + value_lines.append(line) + i += 1 + return " ".join(value_lines).strip(), i + + +def parse_issue(markdown: str): + # Remove all comments () + markdown = re.sub(r'', '', markdown, flags=re.DOTALL) + + lines = [line.strip() for line in markdown.strip().splitlines()] + lines = [line for line in lines if line != ''] # remove empty lines + + data = { + "username": "", + "full_name": "", + "photo": "", + "designation": "", + "socials": [], + "projects": [], + "form": [] + } + + # State machine parsing + current_section = None + current_project = {} + project_fields = ["Name", "Project Link", "Website Link", "Logo URL", "Short Description", "Full Description"] + form_questions = [] + + i = 0 + while i < len(lines): + line = lines[i] + + # ==== USER DETAILS ==== + if line.startswith("**Username:**"): + data["username"] = lines[i + 1].strip() + i += 2 + continue + + if line.startswith("**Full Name:**"): + data["full_name"] = lines[i + 1].strip() + i += 2 + continue + + if line.startswith("**Photo URL:**"): + data["photo"] = lines[i + 1].strip() + i += 2 + continue + + if line.startswith("**Designation / Role:**"): + data["designation"] = lines[i + 1].strip() + i += 2 + continue + + if line.startswith("**Social Profiles:**"): + i += 1 + while i < len(lines) and ':' in lines[i]: + label_link = lines[i].split(":", 1) + if len(label_link) == 2: + label, link = label_link[0].strip(), label_link[1].strip() + data["socials"].append({"label": label, "link": link}) + i += 1 + continue + + # ==== PROJECTS ==== + if line.startswith("### Project"): + if current_project: + data["projects"].append(current_project) + current_project = {} + i += 1 + continue + + if any(line.startswith(f"**{field}:**") for field in project_fields): + field = re.match(r"\*\*(.*?):\*\*", line).group(1).strip() + value, i = parse_multiline_field(lines, i) + current_project[field] = value + continue + + # ==== FORM QUESTIONS ==== + question_match = re.match(r"\*\*(\d+\..*?)\*\*", line) + if question_match: + question = question_match.group(1).strip() + # Capture all lines under this until we hit another bold or end + i += 1 + response_lines = [] + while i < len(lines) and not lines[i].startswith("**"): + response_lines.append(lines[i]) + i += 1 + response_text = "\n".join(response_lines).strip() + formatted_response = format_response(response_text) + form_questions.append({ + "question": re.sub(r"^\d+\.\s*", "", question), + "response": formatted_response + }) + continue + + i += 1 + + # Append last project + if current_project: + # Rename keys to match JSON spec + project_json = { + "name": current_project.get("Name", ""), + "project_link": current_project.get("Project Link", ""), + "website_link": current_project.get("Website Link", ""), + "logo": current_project.get("Logo URL", ""), + "description": current_project.get("Full Description", ""), + "short_description": current_project.get("Short Description", "") + } + data["projects"].append(project_json) + + # Append form Q&A + data["form"] = form_questions + + return data + + +if __name__ == "__main__": + import sys + + if len(sys.argv) < 2: + print("Usage: python parse_issue.py ") + sys.exit(1) + + input_file = sys.argv[1] + + with open(input_file, "r", encoding="utf-8") as f: + md = f.read() + result = parse_issue(md) + print(json.dumps(result, indent=2, ensure_ascii=False)) +