# Organising Committee Indentifier

This is a first prototype an identifier for a conference organiser.

Steps:
1. Extracting information from the Call for Papers
2. Identifying organisers in OpenAlex
3. Locating conference or event on the main databases (such as DBLP, AIDA Dashboard, or [ConfIDent](https://www.confident-conference.org/index.php/Category:Event_Series))

In [1]:
from IPython.core.display import JSON
import json

In [2]:
import configparser
config = configparser.ConfigParser()
config.read('config.ini')
topsecret = config['DEFAULT']['api']

In [3]:
from openai import OpenAI
client = OpenAI(
  base_url="https://openrouter.ai/api/v1",
  api_key=topsecret,
)

# Reading CfP and creating prompt

In [4]:
SOURCE = 'cfps'
CFP = 'iswc2025.txt'
with open(f'{SOURCE}/{CFP}','r') as fr:
    call_for_papers = fr.read()
# print(call_for_papers)

In [5]:
text_prompt = f"""In this prompt, you will receive a Call for Papers of a scientific event. Your task is to parse it, and identify some crucial elements:

- the event name and its acronym;
- the location of the event
- the organisers of the event

<call_for_papers>
{call_for_papers}
</call_for_papers>"""
# print(text_prompt)

In [7]:
true = True
false = False
extra_headers={
"HTTP-Referer": "https://skm.kmi.open.ac.uk", # Optional. Site URL for rankings on openrouter.ai.
"X-Title": "SKM Team at OU", # Optional. Site title for rankings on openrouter.ai.
}
model="openai/gpt-4o"
messages= [
{ "role": "user", "content": text_prompt }
]
response_format={
"type": "json_schema",
"json_schema": {
  "name": "organising_committe_of_conference",
  "strict": true,
  "schema": {
    "type": "object",
    "properties": {
      "event_name": {
        "type": "string",
        "description": "Name of the workshop or conference. This identifies the extended name of the event."
      },
      "conference_series": {
        "type": "string",
        "description": "This refers to the name of a conference series, which is a collection of events that happen on a regular basis. It's usually similar to the event's name, but without the edition number or the year."
      },
      "event_acronym": {
        "type": "string",
        "description": "Acronym of the workshop or conference. This identifies the acronym name of the event."
      },
      "colocated_with": {
        "type": "string",
        "description": "If the name of the event is co-located with another big event. Otherwise if empty."
      },
      "location": {
        "type": "string",
        "description": "City or location name"
      },
      "organisers": {
        "type": "array",
        "items": {
                "type": "object",
                "properties": {
                    "organiser_name": {
                        "type": "string",
                        "description": "The organiser name."
                    },
                    "organiser_affiliation": {
                        "type": "string",
                        "description": "The institution (affiliation) of the organiser. This can be either a university or a company."
                    },
                    "organiser_country": {
                        "type": "string",
                        "description": "The institution country of the organiser. This information is not always available."
                    },
                    "track_name": {
                        "type": "string",
                        "description": "This identifies the main track in which the organiser is involved. A conference may have several tracks, whereas a workshop may have one single track. As default you shall use 'main'."
                    }
                },
            "required": ["organiser_name", "organiser_affiliation", "organiser_country", "track_name"],
            "additionalProperties": false
            },
        "description": "Identifies the name, affiliation (ideally including country) of the conference organisers and the name of the track they organise."
      }
    },
    "required": ["event_name", "event_acronym", "conference_series", "colocated_with", "location", "organisers"],
    "additionalProperties": false
  }
}
}

# Running the extraction

In [8]:
completion = client.chat.completions.create(extra_headers=extra_headers, 
                                            model=model, 
                                            messages=messages, 
                                            response_format=response_format)

# JSON(completion.choices[0].message.content)

In [9]:
result = json.loads(completion.choices[0].message.content)
JSON(result)

<IPython.core.display.JSON object>

# Find Organisers on OpenAlex

In [10]:
from pyalex import config
from pyalex import Authors, Institutions
import logging

config.email = "angelo.salatino@open.ac.uk"
config.max_retries = 0
config.retry_backoff_factor = 0.1
config.retry_http_codes = [429, 500, 503]

In [11]:
def get_authors_info_from_openalex(organisers:list)->list:
    DEBUG = False
    for organiser in organisers:
        find_author_with_less_info = False
        orga = {}
        # Search for the institution
        insts = Institutions().search(organiser["organiser_affiliation"]).get()
        # print(f"{len(insts)} search results found for the institution")
        # print(insts)
        if len(insts) > 0:
            inst_id = insts[0]["id"].replace("https://openalex.org/", "")
    
            if "ror" in insts[0]["ids"]:
                organiser["affiliation_ror"] = insts[0]["ids"]["ror"]
            
            # Search for the author within the institution
            auths = Authors().search(organiser["organiser_name"]).filter(affiliations={"institution":{"id": inst_id}}).get()
            if len(auths) > 0:        
                if DEBUG: print(f"{len(auths)} search results found for the author")
                orga = auths[0]
            else:
                find_author_with_less_info = True
                if DEBUG: print(f"For {organiser['organiser_name']} I could not find a record")
            
                
        else:
            find_author_with_less_info = True
            if DEBUG: print(f"For {organiser['organiser_name']} I could not find a record of her institution")
    
        if find_author_with_less_info:
            auths = Authors().search(organiser['organiser_name']).get()
            if len(auths) == 1:
                orga = auths[0]
            elif len(auths) == 0:
                if DEBUG: print(f"For {organiser['organiser_name']} I could not find a record, AGAIN")
            else:
                if DEBUG: print(f"Found multiple records for {organiser['organiser_name']}")
                new_auths = sorted(auths, key=lambda item: item['works_count'], reverse=True)
                orga = new_auths[0]
    
        if len(orga) > 0:
            organiser["openalex_name"] = orga["display_name"]
            organiser["openalex_page"] = orga["id"]
            organiser["orcid"] = orga["orcid"]

    return organisers

result["organisers"] = get_authors_info_from_openalex(result["organisers"])

In [12]:
JSON(result)

<IPython.core.display.JSON object>

# Find Event on major databases

In [18]:
from sentence_transformers import SentenceTransformer
import faiss  
import pickle
import urllib.parse

# import re
# regex = r'(\d+st)|(\d+nd)|(\d+rd)|(\d+th)'
# result = re.sub(regex, "", result["event_name"], 0, re.MULTILINE | re.IGNORECASE)

# Load a pretrained Sentence Transformer model
model = SentenceTransformer("all-MiniLM-L6-v2")
embeddings = model.encode([result["conference_series"]])

In [19]:
with open('DBLP.pickle', 'rb') as handle:
    dblp_confs = pickle.load(handle)

D, I = dblp_confs["index"].search(embeddings, k=1)
if D[0][0] <= 0.4:
    this_conf = dblp_confs["sentences"][I[0][0]]
    this_acronym = dblp_confs["confs"][this_conf]
    print(this_conf)
    print(this_acronym)
    print(f"https://dblp.org/streams/conf/{urllib.parse.quote(this_acronym, safe='')}")

International Semantic Web Conference (ISWC)
semweb
https://dblp.org/streams/conf/semweb


In [21]:
with open('AIDA.pickle', 'rb') as handle:
    aida_confs = pickle.load(handle)

D, I = aida_confs["index"].search(embeddings, k=1)
if D[0][0] <= 0.4:
    this_conf_aida = aida_confs["sentences"][I[0][0]]
    this_acronym_aida = aida_confs["confs"][this_conf_aida]
    print(this_conf_aida)
    print(this_acronym_aida)
    print(f"http://aida-dashboard.unica.it:8080/cs/conference/{urllib.parse.quote(this_conf_aida, safe='')}")

International Semantic Web Conference (ISWC)
semweb
http://aida-dashboard.unica.it:8080/cs/conference/International%20Semantic%20Web%20Conference%20%28ISWC%29
