# SPARQL Query



```
PREFIX rdf: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX dblp: <https://dblp.org/rdf/schema#>
SELECT *
WHERE{
?s ?p <https://dblp.org/rdf/schema#Conference> .
?s rdf:label ?label .
?s dblp:primaryStreamTitle ?label2 .
?s dblp:streamTitle ?label3 .
}
```

Eventually, I downloaded the file from [https://sparql.dblp.org/1RSnO4#csv](https://sparql.dblp.org/1RSnO4#csv)

In [4]:
import pandas as pd
import re

In [12]:
df = pd.read_csv("1RSnO4.csv")

In [13]:
ids_confs = {}
unique_confs = {}
for ix, row in df.iterrows():
    conf_id = row["s"].replace("https://dblp.org/streams/conf/","")
    ids_confs[conf_id] = row["label"]
    
    unique_confs[row["label"]] = conf_id
    unique_confs[row["label2"]] = conf_id
    unique_confs[row["label3"]] = conf_id
    cleaned_name = re.sub(r'\([^)]*\)', '', row["label"]).strip()
    if cleaned_name not in unique_confs:
        unique_confs[cleaned_name] = conf_id
    cleaned_name = re.sub(r'\([^)]*\)', '', row["label2"]).strip()
    if cleaned_name not in unique_confs:
        unique_confs[cleaned_name] = conf_id
    cleaned_name = re.sub(r'\([^)]*\)', '', row["label3"]).strip()
    if cleaned_name not in unique_confs:
        unique_confs[cleaned_name] = conf_id

In [14]:
unique_confs

{'International Workshop on Collaborative Virtual Environments (3DCVE)': '3dcve-ws',
 'International Workshop on Collaborative Virtual Environments': '3dcve-ws',
 'International Workshop on 3D Geoinformation Systems (3D-GIS)': '3dgis',
 'International Workshop on 3D Geoinformation Systems': '3dgis',
 'IEEE International Conference on 3D System Integration (3DIC)': '3dic',
 'IEEE International Conference on 3D System Integration': '3dic',
 '3D Imaging and Applications (3DIA)': '3dica',
 '3D Imaging and Applications': '3dica',
 'International Conference on 3D Vision (3DV)': '3dim',
 'International Conference on 3D Vision': '3dim',
 'Eurographics Workshop on 3D Object Retrieval (3DOR)': '3dor',
 'Eurographics Workshop on 3D Object Retrieval': '3dor',
 '3D Physiological Human Workshop (3DPH)': '3dph',
 '3D Physiological Human Workshop': '3dph',
 '3D Data Processing Visualization and Transmission (3DPVT)': '3dpvt',
 '3D Data Processing Visualization and Transmission': '3dpvt',
 '3DTV-Confer

In [15]:
from sentence_transformers import SentenceTransformer

# 1. Load a pretrained Sentence Transformer model
model = SentenceTransformer("all-MiniLM-L6-v2")

# The sentences to encode
sentences = list(unique_confs.keys())

# 2. Calculate embeddings by calling model.encode()
embeddings = model.encode(sentences)
print(embeddings.shape)
# [3, 384]

vector_size = embeddings.shape[1]

# # 3. Calculate the embedding similarities
# similarities = model.similarity(embeddings, embeddings)
# print(similarities)
# # tensor([[1.0000, 0.6660, 0.1046],
# #         [0.6660, 1.0000, 0.1411],
# #         [0.1046, 0.1411, 1.0000]])

(13554, 384)


In [16]:
import faiss                   # make faiss available
index = faiss.IndexFlatL2(vector_size)   # build the index
print(index.is_trained)

True


In [17]:
index.add(embeddings)                  # add vectors to the index
print(index.ntotal)

13554


# Save index

In [18]:
to_save = dict()
to_save["sentences"] = sentences
to_save["index"] = index
to_save["confs"] = unique_confs
to_save["idsconfs"] = ids_confs
import pickle
with open('DBLP.pickle', 'wb') as handle:
    pickle.dump(to_save, handle, protocol=pickle.HIGHEST_PROTOCOL)

# Test Index

In [34]:
sentence = "International Semantic Web Conference"
sentence = "5th International Workshop on Scientific Knowledge: Representation, Discovery, and Assessment"
embedding = model.encode([sentence])

In [35]:
D, I = index.search(embedding, k=3)

In [36]:
D

array([[0.08745116, 0.7501303 , 0.7509186 ]], dtype=float32)

In [37]:
for i in I[0]:
    print(sentences[i])

International Workshop on Scientific Knowledge: Representation, Discovery, and Assessment (Sci-K)
International Conference on Discovery Science (DS)
International Workshop on Natural Scientific Language Processing and Research Knowledge Graphs (NSLP)


In [32]:
i

array([5955, 2172, 2869])