In [None]:
import requests

url = "https://api-beta.openaire.eu/graph/researchProducts"
params = {
    "page": 10,
    "pageSize": 100,
    "sortBy": "relevance DESC",
    "type": "publication"
}
headers = {
    "accept": "application/json"
}

response = requests.get(url, headers=headers, params=params)

if response.status_code == 200:
    data = response.json()
else:
    print(f"Failed {response.status_code}")

In [31]:
!python3.11 -m pip install neo4j



In [None]:
import json
import csv

def flatten_main(record):
    return {
        'id': record.get('id', ''),
        'title': record.get('mainTitle', ''),
        'description': ' '.join(record.get('description', [])),
        'type': record.get('type', ''),
        'language': record.get('language', {}).get('label', ''),
        'publicationDate': record.get('publicationDate', ''),
        'publisher': record.get('publisher', '')
    }

def flatten_authors(record):
    authors = []
    for author in record.get('author', []):
        authors.append({
            'id': record.get('id', ''),
            'fullName': author.get('fullName', ''),
            'rank': author.get('rank', '')
        })
    return authors

def flatten_keywords(record):
    keywords = []
    for subject in record.get('subjects', []):
        keywords.append({
            'id': record['id'],
            'keyword': subject['subject']['value']
        })
    return keywords

main_records = []
authors_records = []
keywords_records = []

for result in data['results']:
    main_records.append(flatten_main(result))
    authors_records.extend(flatten_authors(result))
    keywords_records.extend(flatten_keywords(result))

def write_csv(filename, fieldnames, records):
    with open(filename, 'w', newline='', encoding='utf-8') as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(records)

write_csv('main.csv', ['id', 'title', 'description', 'type', 'language', 'publicationDate', 'publisher'], main_records)
write_csv('authors.csv', ['id', 'fullName', 'rank'], authors_records)
write_csv('keywords.csv', ['id', 'keyword'], keywords_records)


CSV files generated: main.csv, authors.csv, keywords.csv


In [40]:
import pandas as pd

df = pd.read_csv("/Users/kseniablokhina/Desktop/LOD-OperAire-KG/authors.csv")
df.dropna()
df.head(6)

Unnamed: 0,id,fullName,rank
0,doi_________::4b7f84c788e28f843e3ac21a9b63c562,T. J. HODGKINSON,1
1,doi_________::4b7f84c788e28f843e3ac21a9b63c562,L. R. KELLAND,2
2,doi_________::4b7f84c788e28f843e3ac21a9b63c562,M. SHIPMAN,3
3,doi_________::4b7f84c788e28f843e3ac21a9b63c562,J. VILE,4
4,doi_________::4cfb3f1d8dd3382051c28e771df44af2,Yasuhiro Tezuka,1
5,doi_________::4cfb3f1d8dd3382051c28e771df44af2,Mohammad S. Ali,2


In [None]:
# I decided to clean keywords because they contain a lot of noise information
import re
df = pd.read_csv("/Users/kseniablokhina/Desktop/LOD-OperAire-KG/keywords.csv")

def clean_keyword(keyword):
    keyword = re.sub(r"\[.*?\]", "", keyword)
    keyword = re.sub(r"\b\d+\b", "", keyword)
    keyword = re.sub(r"\s+", " ", keyword).strip()
    return keyword

df["keyword"] = df["keyword"].apply(clean_keyword)

In [None]:
import numpy as np
df = df.replace('', np.nan).dropna()
df = df.replace('.', np.nan).dropna()

In [57]:
df.to_csv("keywords.csv", index=False)

In [None]:
from neo4j import GraphDatabase
import csv

uri = "" 
username = "neo4j"
password = ""
driver = GraphDatabase.driver(uri, auth=(username, password))

def upload_data_from_csv(file_path, query):
    with driver.session() as session:
        with open(file_path, 'r', encoding='utf-8') as file:
            reader = csv.DictReader(file)
            for row in reader:
                session.run(query, row)

query_main = """
MERGE (r:Record {id: $id})
SET r.title = $title, 
    r.description = $description, 
    r.type = $type, 
    r.language = $language, 
    r.publicationDate = $publicationDate, 
    r.publisher = $publisher
"""

query_authors = """
MERGE (a:Author {fullName: $fullName})
SET a.rank = $rank
WITH a
MATCH (r:Record {id: $id})
MERGE (r)-[:HAS_AUTHOR]->(a)
"""

query_keywords = """
MERGE (k:Keyword {keyword: $keyword})
WITH k
MATCH (r:Record {id: $id})
MERGE (r)-[:HAS_KEYWORD]->(k)
"""

upload_data_from_csv('main.csv', query_main)
upload_data_from_csv('authors.csv', query_authors)
upload_data_from_csv('keywords.csv', query_keywords)

driver.close()


Data uploaded successfully!
