# Helper Functions

In [1]:
from difflib import SequenceMatcher
import requests
import re

def similar(a, b):
    return SequenceMatcher(None, a, b).ratio()

def find_authors(paper_id, authors):
    result = []
    for _, row in authors.iterrows():
        if row['submission #'] == paper_id:
            result.append(row)
    return result

def get_paper_type(track_id, tracks):
    for _, row in tracks.iterrows():
        if row['#'] == track_id:
            return row['paper type']
    return ''

def split_authors(authorString):
    subauthors = authorString.replace(' and ',', ')
    subauthors = subauthors.split(', ')
    return [x for x in subauthors if "and" not in x]

def get_order(author, authorString):
    closest = 0
    authors = split_authors(authorString)
    for i in range(len(authors)):
        if similar(author['first name'] + " " + author['last name'], authors[i]) > similar(author['first name'] + " " + author['last name'], authors[closest]):
            closest = i
    return closest

def check_location(location, check_country):
    response = requests.request("GET", f"https://www.geonames.org/search.html?q={location}&country=")
    country = re.findall("/countries.*\.html", response.text)
    if len(country) > 0:
        country = country[0].split("/")[-1].split(".")[0]
        return check_country.strip().lower() == country.strip().lower()
    return False

# CSV Files about authors, accepted papers, and different tracks
You need to add a column called "paper type" which contains full paper, short paper, or empty field to know which papers to include and which ones to exclude

In [2]:
import pandas

author_file = 'authors.csv'
submission_file = 'accepted.csv'
tracks_file = 'tracks.csv'

authors = pandas.read_csv(author_file)
authors.fillna('', inplace=True)
tracks = pandas.read_csv(tracks_file)
tracks.fillna('', inplace=True)
accepted = pandas.read_csv(submission_file)
accepted.fillna('', inplace=True)
accepted = accepted.apply(lambda x: x.strip() if type(x)==str else x)
accepted = accepted[accepted['decision']!='reject']
accepted = accepted.sort_values('#', ascending=True)
papers = []
for _, submission in accepted.iterrows():
    paper_id = submission['#']
    paper_type = get_paper_type(submission['track #'], tracks)
    if len(paper_type) == 0:
        continue
    paper_authors = find_authors(paper_id, authors)
    papers.append({
        "paper_type": paper_type,
        "art_submission_date": submission["submitted"],
        "art_approval_date": "16-January-2025",
        "paper_title": submission["title"],
        "event_tracking_number": str(submission["#"]),
        "published_article_number": None,
        "start_page": None,
        "end_page": None,
        "authors": []
    })
    for author in paper_authors:
        papers[-1]["authors"].append({
            "prefix": None,
            "first_name": author["first name"],
            "middle_name": None,
            "last_name": author["last name"],
            "suffix": None,
            "affiliations": {
                "department": None,
                "institution": author["affiliation"],
                "state_province": None,
                "city": None,
                "country": author["country"],
                "sequence_no": str(1)
            },
            "email_address": author["email"],
            "sequence_no": str(get_order(author, submission["authors"]) + 1),
            "contact_author": ["N", "Y"][len(author["corresponding?"].strip()) > 0],
            "ACM_profile_id": None,
            "ACM_client_no": None,
            "ORCID": None
        })
    papers[-1]["authors"].sort(key=lambda x: int(x["sequence_no"]))
    all_after = False
    for author in papers[-1]["authors"]:
        if not all_after and author["contact_author"] == "Y":
            all_after = True
        else:
            author["contact_author"] = "N"
    

# Save to XML

In [3]:
import xml.etree.ElementTree as ET

proceeding_id = "14792"
output_file = "fdg.xml"

erights_element = ET.Element('erights_record')
parent_element = ET.SubElement(erights_element, 'parent_data')
ET.SubElement(parent_element, 'proceeding').text = proceeding_id
ET.SubElement(parent_element, 'volume')
ET.SubElement(parent_element, 'issue')
ET.SubElement(parent_element, 'issue_date')
ET.SubElement(parent_element, 'source').text = "Easychair"
for paper in papers:
    paper_element = ET.SubElement(erights_element, 'paper')
    for key, value in paper.items():
        key_element = ET.SubElement(paper_element, key)
        if value is not None:
            if key == "authors":
                for author in value:
                    author_element = ET.SubElement(key_element, 'author')
                    for a_key, a_value in author.items():
                        a_key_element = ET.SubElement(author_element, a_key)
                        if a_value is not None:
                            if a_key == "affiliations":
                                temp_element = ET.SubElement(a_key_element, "affiliation")
                                for af_key, af_value in a_value.items():
                                    af_key_element = ET.SubElement(temp_element, af_key)
                                    if af_value is not None:
                                        af_key_element.text = str(af_value.strip())
                            else:
                                a_key_element.text = str(a_value.strip())
            else:
                key_element.text = str(value.strip())

text_xml = ET.tostring(erights_element)

with open(output_file, "wb") as f:
    f.write(text_xml)

# Create Contact CSV File for a specific type of papers

In [11]:
import csv

csv_file = "contacts.csv"
paper_types = ["full paper", "short paper"]

contacts = []
for paper in papers:
    if not paper["paper_type"].lower() in paper_types:
        continue
    author = None
    for a in paper["authors"]:
        if a["contact_author"] == "Y":
            author = a
            break
    contacts.append({
        "title": paper["paper_title"],
        "corresponding_author": a["first_name"] + " " + a["last_name"],
        "email": a["email_address"]
    })

with open(csv_file, "w") as f:
    wr = csv.DictWriter(f, delimiter=",",fieldnames=list(contacts[0].keys()))
    wr.writeheader()
    wr.writerows(contacts)