# Autonomous Vehicle Literature Review Knowledge Graphs
Investigation on Knowledge Graphs assisting literature reviews.  
Using API https://www.semanticscholar.org/product/api via semanticscholar python module.  
and noting for future reference that AI2 has an open source PyTorch NLP research library:  
https://github.com/allenai/allennlp

In [1]:
# Some modified boilerplate code, previously we had worked with a subset
# of dictionary keys, here we will use all to start with

# helper functions for ML/AI applied to literature review
def getInfo(suffix, start, end, key) :
    """
    Get AI2 Semantic Scholar article info
    Inputs
        suffix: string, DOI suffix
        start: integer, first paper index
        end: integer, last paper index
        key: dictionary key
    Examples:
    # SAFECOMP 2020 get all 31 abstracts
    text = getInfo('10.1007/978-3-030-55583-2_', 1, 31, 'abstract')
    all keys:
    'abstract', 'arxivId', 'authors', 'citationVelocity', 'citations', 'corpusId', 'doi', 'fieldsOfStudy',
    'influentialCitationCount', 'isOpenAccess', 'isPublisherLicensed', 'is_open_access', 'is_publisher_licensed',
    'numCitedBy', 'numCiting', 'paperId', 'references', 'title', 'topics', 'url', 'venue', 'year'
    """
    import time
    import semanticscholar as sch 
    text = ''
    for i in range (start,end+1) :
        doi = suffix + str(i)
        paper = sch.paper(doi, timeout=2)
        if key in paper :
            text = text + paper[key] + ' '
        else :
            print('No {} for paper index {}'.format(key, i))
            print("DOI entry: ", doi)
        time.sleep(3)
    return text

def getAllInfo(suffix, start, end) :
    """
    Get AI2/DOI Semantic Scholar complete article info
    Inputs
        suffix: string, DOI suffix
        start: integer, first paper index
        end: integer, last paper index
    Output
        papers: list of dictionaries
    Examples:    
    # SAFECOMP 2020 get all 31 papers
    text = getInfo('10.1007/978-3-030-55583-2_', 1, 31, 'abstract')  
    all keys:
    'abstract', 'arxivId', 'authors', 'citationVelocity', 'citations', 'corpusId', 'doi', 'fieldsOfStudy', 
    'influentialCitationCount', 'isOpenAccess', 'isPublisherLicensed', 'is_open_access', 'is_publisher_licensed', 
    'numCitedBy', 'numCiting', 'paperId', 'references', 'title', 'topics', 'url', 'venue', 'year'
    """
    import time
    import semanticscholar as sch 
    papers = []
    for i in range (start,end+1) :
        doi = suffix + str(i)
        paper = sch.paper(doi, timeout=2)
        if 'abstract' in paper :
            print('Found paper DOI:', doi)
            papers.append(paper)
        else :
            print('No abstract for paper index {}'.format(i))
            print("DOI entry: ", doi)
        # pause 3 seconds to stop Semantic Scholar from blocking automated searches (repeating
        # in intervals less than 3 seconds long)
        time.sleep(3)
    return papers

In [3]:
# does "title" search work?
import semanticscholar as sch
title = "An Auto-tuning Framework for Autonomous Vehicles"
results = sch.paper(title, timeout=2)
len(results) # 0
# User corpus ID
title = "52013502"
results = sch.paper(title, timeout=2)
len(results) # 0

0

In [5]:
# cannot run wget from notebook
!wget https://api.semanticscholar.org/v1/paper/CorpusID:52013502 -o baidu.txt

/bin/sh: 1: wget: not found


In [15]:
# AirSim paper
# https://www.semanticscholar.org/paper/a20f3dfc9142b48b924e68ee22ba259a0d621bb2
# S2 (semantic scholar) Paper ID - this works
title = "a20f3dfc9142b48b924e68ee22ba259a0d621bb2"
results = sch.paper(title, timeout=2)
print(len(results)) # 22
for key in results :
    print(key)

22
abstract
arxivId
authors
citationVelocity
citations
corpusId
doi
fieldsOfStudy
influentialCitationCount
isOpenAccess
isPublisherLicensed
is_open_access
is_publisher_licensed
numCitedBy
numCiting
paperId
references
title
topics
url
venue
year


## Using S2 (Semantic Scholar) Paper ID
We use Paper ID to retrieve information for our 4 papers

In [16]:
# paper ids - extracted from url, by searching article title at
# https://www.semanticscholar.org/
pids = ['a20f3dfc9142b48b924e68ee22ba259a0d621bb2', # airsim
        '4592a123502a9e7bbd76d21c976a4ed17503df8c', # baidu apollo
        '1b7ef1a7df32d8d38efbb7aca9a9b2468d0da224', # carla
        '65b58efdd4c04bb501244a8f825b28dcba788b60'] # LGSVL

In [18]:
# Let's use a dictionary
pids = {'airsim' : 'a20f3dfc9142b48b924e68ee22ba259a0d621bb2', # airsim
        'baidu' : '4592a123502a9e7bbd76d21c976a4ed17503df8c', # baidu apollo
        'carla' : '1b7ef1a7df32d8d38efbb7aca9a9b2468d0da224', # carla
        'LGSVL' : '65b58efdd4c04bb501244a8f825b28dcba788b60'} # LGSVL

In [19]:
for key in pids :
    print(pids[key])

a20f3dfc9142b48b924e68ee22ba259a0d621bb2
4592a123502a9e7bbd76d21c976a4ed17503df8c
1b7ef1a7df32d8d38efbb7aca9a9b2468d0da224
65b58efdd4c04bb501244a8f825b28dcba788b60


In [29]:
# retrieve data
def AI2PIDsearch(pid):
    """
    Search AI2 by paper ID
    Input:
        pid: string, AI2 paper ID
    Output
        paper: dictionary
    Example:
    paper = AI2PIDsearch('a20f3dfc9142b48b924e68ee22ba259a0d621bb2')
    print(paper['title'])
    AirSim: High-Fidelity Visual and Physical Simulation for Autonomous Vehicles
    """
    import semanticscholar as sch
    import time
    results = sch.paper(pid, timeout=2)
    time.sleep(3) # do not hammer AI2
    return results

AirSim: High-Fidelity Visual and Physical Simulation for Autonomous Vehicles


In [38]:
# init results dictionary
results = {}
for key in pids :
    result = AI2PIDsearch(pids[key])
    results[key] = result 

In [40]:
for key in results :
    print(len(results[key]))

22
22
22
22


In [42]:
# build graph:
# we are looking at
# https://www.stardog.com/tutorials/sparql/
# https://github.com/stardog-union/stardog-tutorials/blob/master/music/beatles.ttl
# https://github.com/city-knowledge-graphs/python/blob/main/lab4/lab4.ttl
"""
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix : <http://city.ac.uk/kg/lab4/> .

:airsim       rdf:type  :Article .
:airsim       :title    "<topics>" .
:airsim       :topic    "<topics>" .
:airsim       :citation "<citations>" .
"""

'AirSim: High-Fidelity Visual and Physical Simulation for Autonomous Vehicles'

In [70]:
# print header
header = """
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix : <http://city.ac.uk/kg/lab4/> .

"""
print(header)

# iterate through dictionary
for key in results:
    # rdf:type
    print(":{}		rdf:type	:Article .".format(key))
    print(":{}		:title		\"{}\".".format(key, results[key]['title']))
    for topic in results[key]['topics'] :
        print(":{}		:topic		\"{}\".".format(key, topic['topic']))
    for reference in results[key]['references'] :
        print(":{}		:citation	\"{}\".".format(key, reference['title']))        


@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix : <http://city.ac.uk/kg/lab4/> .


:airsim		rdf:type	:Article .
:airsim		:title		"AirSim: High-Fidelity Visual and Physical Simulation for Autonomous Vehicles".
:airsim		:topic		"Autonomous robot".
:airsim		:topic		"Physics engine".
:airsim		:topic		"Unreal Development Kit".
:airsim		:topic		"PhysX".
:airsim		:topic		"Machine learning".
:airsim		:topic		"Reinforcement learning".
:airsim		:topic		"Ray tracing (graphics)".
:airsim		:topic		"Deep learning".
:airsim		:topic		"Extensibility".
:airsim		:topic		"Artificial intelligence".
:airsim		:topic		"Modular design".
:airsim		:topic		"Algorithm".
:airsim		:topic		"Augmented reality".
:airsim		:topic		"Hardware-in-the-loop simulation".
:airsim		:topic		"Component-based software engineering".
:airsim		:topic		"Experiment".
:airsim		:topic		"Sensor".
:airsim		:topic		"Real-time clock".
:airsim		:topic		"Application p

In [71]:
# print header
header = """
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix : <http://city.ac.uk/kg/lab4/> .

"""
ttl = header + "\n"

# iterate through dictionary
for key in results:
    # rdf:type
    ttl += ":{}		rdf:type	:Article .\n".format(key)
    ttl += ":{}		:title		\"{}\".\n".format(key, results[key]['title'])
    for topic in results[key]['topics'] :
        ttl += ":{}		:topic		\"{}\".\n".format(key, topic['topic'])
    for reference in results[key]['references'] :
        ttl += ":{}		:citation	\"{}\".\n".format(key, reference['title'])
print(ttl)


@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix : <http://city.ac.uk/kg/lab4/> .


:airsim		rdf:type	:Article .
:airsim		:title		"AirSim: High-Fidelity Visual and Physical Simulation for Autonomous Vehicles".
:airsim		:topic		"Autonomous robot".
:airsim		:topic		"Physics engine".
:airsim		:topic		"Unreal Development Kit".
:airsim		:topic		"PhysX".
:airsim		:topic		"Machine learning".
:airsim		:topic		"Reinforcement learning".
:airsim		:topic		"Ray tracing (graphics)".
:airsim		:topic		"Deep learning".
:airsim		:topic		"Extensibility".
:airsim		:topic		"Artificial intelligence".
:airsim		:topic		"Modular design".
:airsim		:topic		"Algorithm".
:airsim		:topic		"Augmented reality".
:airsim		:topic		"Hardware-in-the-loop simulation".
:airsim		:topic		"Component-based software engineering".
:airsim		:topic		"Experiment".
:airsim		:topic		"Sensor".
:airsim		:topic		"Real-time clock".
:airsim		:topic		"Application p

In [None]:
# TODO - write queries
# 1. Common topics
# 2. Common references
# 2.1 Appearing in at least two articles
# 2.2 Appearing in all articles
# We would like insights similar to:
# https://www.citationtree.org/tree?id=10.1007/978-3-319-67361-5_40
# https://github.com/city-knowledge-graphs/python/blob/main/lab4/lab4.ttl