In [1]:
import json
import os
import rdflib
import urllib.parse
from rdflib import URIRef, Literal
from rdflib.namespace import RDF
from rdflib.namespace import Namespace
from decimal import Decimal

In [2]:
def read_json(file_path):
    with open(file_path, 'r') as file:
        meta_data = json.load(file)

    return meta_data

In [3]:
base = Namespace("http://codereuse.org/")

In [4]:
repo = Namespace(f"{base}repository/")
person = Namespace(f"{base}person/")
issue = Namespace(f"{base}issue/")
pull_request = Namespace(f"{base}pull_request/")
topic = Namespace(f"{base}topic/")
language_ = Namespace(f"{base}language/")
entity = Namespace(f"{base}entity/")
status = Namespace(f"{base}status/")
FOAF = Namespace("http://xmlns.com/foaf/0.1/")
DCTERMS = Namespace("http://purl.org/dc/terms/")

In [5]:
TYPE_MAP = {
    'repo': repo,
    'person': person,
    'issue': issue,
    'pull_request': pull_request,
    'topic': topic,
    'language': language_,
    'entity': entity,
    'status': status
}

In [6]:
def make_uri(type_, name):
    namespace = TYPE_MAP.get(type_)
    if namespace: 
        return namespace[name]
    else: 
        return URIRef(name)

In [7]:
def make_predicate(name):
    return base[name]

In [8]:
def parse_string(string_to_encode):
    encoded_string = urllib.parse.quote(string_to_encode)
    custom_encoded_string = encoded_string.replace('%', '-')

    return custom_encoded_string

In [9]:
def combine_string(string1, string2):
    return string1 + "/" + string2

In [10]:
def make_profile_url(username):
    return "https://github.com/" + username

In [11]:
def make_issue_url(repo_author, repo_name, issue_no):
    return "https://github.com/" + repo_author + "/" + repo_name + "/issues/" + issue_no 

In [12]:
def make_repo_url(username, reponame):
    return "https://github.com/" + username + "/" + reponame

In [13]:
def count_issue_ratio(data):
    closed = 0
    open = 0
    for issue in data:
        if issue['issue_status'] == 'closed':
            closed+=1
        elif issue['issue_status'] == 'open':
            open+=1

    return open, closed

In [14]:
def count_forks_ratio(data):
    Inactive = 0
    active = 0
    for fork in data:
        if fork['repo_status'] == 'Inactive':
            Inactive+=1
        elif fork['repo_status'] == 'active':
            active+=1

    return active, Inactive

In [15]:
def make_author_rdf(graph, data):
    graph.add((make_uri('person', data['author']), DCTERMS.type, make_uri('entity', 'author')))
    graph.add((make_uri('person', data['author']), FOAF.accountName, Literal(data['author'])))
    graph.add((make_uri('person', data['author']), FOAF.hasUrl, URIRef(make_profile_url(data['author']))))

In [16]:
def make_repo_rdf(data, graph):
    graph.add((make_uri('repo', data['repositoryName']), DCTERMS.type, make_uri('entity', 'repository')))
    graph.add((make_uri('repo', data['repositoryName']), FOAF.hasUrl, URIRef(data['url'])))
    graph.add((make_uri('repo', data['repositoryName']), make_predicate('hasAuthor'), make_uri('person', data['author'])))
    make_author_rdf(graph, data)

    if data['about']:
        graph.add((make_uri('repo', data['repositoryName']), DCTERMS.description, Literal(data['about'])))

In [17]:
def make_topic_rdf(data, graph):
    if data['topics']:
        for topicname in data['topics']:
            graph.add((make_uri('repo', data['repositoryName']), DCTERMS.subject, make_uri('topic', parse_string(topicname))))

In [18]:
def make_language_rdf(data, graph):
    if data['languages']:
        count = 1
        for prog_language in data['languages']:
            temp_string = combine_string(data['repositoryName'], "languageInfo")
            temp_string = combine_string(temp_string, str(count))

            graph.add((make_uri('repo', data['repositoryName']), DCTERMS.language, make_uri('repo', temp_string)))
            graph.add((make_uri('repo', temp_string), make_predicate('hasLanguageUsage'), Literal(Decimal(data['languages'][prog_language].strip('%')))))
            graph.add((make_uri('repo', temp_string), make_predicate('hasLanguageName'), make_uri('language', parse_string(prog_language))))
            graph.add((make_uri('language', parse_string(prog_language)), DCTERMS.type, make_uri('entity', 'language')))

            count+=1

In [19]:
def make_star_rdf(data, graph):
    if data['stars']:
        graph.add((make_uri('repo', data['repositoryName']), make_predicate('hasStarCount'), Literal(len(data['stars']))))
        for stargazer in data['stars']:
            graph.add((make_uri('person', stargazer['user_name']), DCTERMS.type, make_uri('entity', 'stargazer')))
            graph.add((make_uri('repo', data['repositoryName']), make_predicate('hasStargazer'), make_uri('person', stargazer['user_name'])))
            graph.add((make_uri('person', stargazer['user_name']), make_predicate('StarsRepo'), make_uri('repo', data['repositoryName'])))
            graph.add((make_uri('person', stargazer['user_name']), FOAF.about, Literal(stargazer['user_about'])))
            graph.add((make_uri('person', stargazer['user_name']), FOAF.accountName, Literal(stargazer['user_name'])))
            graph.add((make_uri('person', stargazer['user_name']), FOAF.hasUrl, URIRef(make_profile_url(stargazer['user_name']))))

In [20]:
def make_watcher_rdf(data, graph):
    if data['watchers']:
        graph.add((make_uri('repo', data['repositoryName']), make_predicate('hasWatcherCount'), Literal(len(data['watchers']))))
        for watcher in data['watchers']:
            graph.add((make_uri('person', watcher['user_name']), DCTERMS.type, make_uri('entity', 'watcher')))
            graph.add((make_uri('repo', data['repositoryName']), make_predicate('haswatcher'), make_uri('person', watcher['user_name'])))
            graph.add((make_uri('person', watcher['user_name']), make_predicate('watchesRepo'), make_uri('repo', data['repositoryName'])))
            graph.add((make_uri('person', watcher['user_name']), FOAF.about, Literal(watcher['user_about'])))
            graph.add((make_uri('person', watcher['user_name']), FOAF.accountName, Literal(watcher['user_name'])))
            graph.add((make_uri('person', watcher['user_name']), FOAF.hasUrl, URIRef(make_profile_url(watcher['user_name']))))

In [21]:
def make_forked_rdf(data, graph):
    if data['forks']:
        active, inactive = count_forks_ratio(data['forks'])
        graph.add((make_uri('repo', data['repositoryName']), make_predicate('hasActiveForkCount'), Literal(active)))
        graph.add((make_uri('repo', data['repositoryName']), make_predicate('hasInActiveForkCount'), Literal(inactive)))

        graph.add((make_uri('repo', data['repositoryName']), make_predicate('hasTotalForkCount'), Literal(len(data['forks']))))
        
        for forker in data['forks']:
            graph.add((make_uri('person', forker['user_name']), DCTERMS.type, make_uri('entity', 'forker')))
            graph.add((make_uri('person', forker['user_name']), FOAF.accountName, Literal(forker['user_name'])))
            graph.add((make_uri('person', forker['user_name']), FOAF.hasUrl, URIRef(make_profile_url(forker['user_name']))))
            
            temp_string = combine_string(data['repositoryName'], "forkedBy")
            temp_string = combine_string(temp_string, forker['user_name'])

            graph.add((make_uri('repo', temp_string), make_predicate('repoForkedFrom'), make_uri('repo', data['repositoryName'])))
            graph.add((make_uri('repo', temp_string), make_predicate('forkedBy'), make_uri('person', forker['user_name'])))
            graph.add((make_uri('repo', temp_string), make_predicate('hasStatus'), make_uri('status', forker['repo_status'])))
            graph.add((make_uri('repo', temp_string), FOAF.hasUrl, URIRef(make_repo_url(forker['user_name'], forker['repo_forked_as']))))
            graph.add((make_uri('repo', temp_string), DCTERMS.type, make_uri('entity', 'forkedRepository')))

In [22]:
def make_contributes_rdf(data, graph):
    if data['contributors']:
        graph.add((make_uri('repo', data['repositoryName']), make_predicate('hasContributorCount'), Literal(len(data['contributors']))))
        for contributor in data['contributors']:
            graph.add((make_uri('person', contributor['user_name']), DCTERMS.type, make_uri('entity', 'contributor')))
            graph.add((make_uri('repo', data['repositoryName']), make_predicate('hasContributer'), make_uri('person', contributor['user_name'])))
            graph.add((make_uri('person', contributor['user_name']), make_predicate('ContibutesTo'), make_uri('repo', data['repositoryName'])))
            graph.add((make_uri('person', contributor['user_name']), make_predicate('hasContributedCommits'), Literal(contributor['no_of_commits'])))
            graph.add((make_uri('person', contributor['user_name']), FOAF.accountName, Literal(contributor['user_name'])))
            graph.add((make_uri('person', contributor['user_name']), FOAF.hasUrl, URIRef(make_profile_url(contributor['user_name']))))
            

In [23]:
def make_issues_rdf(data, graph):
    if data['issues']:
        open, closed = count_issue_ratio(data['issues'])
        graph.add((make_uri('repo', data['repositoryName']), make_predicate('hasOpenIssueCount'), Literal(open)))
        graph.add((make_uri('repo', data['repositoryName']), make_predicate('hasClosedIssueCount'), Literal(closed)))
        
        graph.add((make_uri('repo', data['repositoryName']), make_predicate('hasTotalIssueCount'), Literal(len(data['issues']))))
        
        for issue in data['issues']:
            temp_string = combine_string('issue', issue['issue_id'])
            temp_string = combine_string(data['repositoryName'], temp_string)

            graph.add((make_uri('repo', data['repositoryName']), make_predicate('hasIssue'), make_uri('repo', temp_string)))
            
            graph.add((make_uri('repo', temp_string), DCTERMS.title, Literal(issue['issue_title'])))
            graph.add((make_uri('repo', temp_string), make_predicate('hasStatus'), make_uri('status', issue['issue_status'])))
            graph.add((make_uri('repo', temp_string), make_predicate('createdBy'), make_uri('person', issue['issue_author'])))
            graph.add((make_uri('repo', temp_string), FOAF.hasUrl, URIRef(make_issue_url(data['author'], data['repositoryName'], issue['issue_id']))))
            
            graph.add((make_uri('person', issue['issue_author']), DCTERMS.type, make_uri('entity', 'issueAuthor')))
            graph.add((make_uri('person', issue['issue_author']), FOAF.accountName, Literal(issue['issue_author'])))
            graph.add((make_uri('person', issue['issue_author']), FOAF.hasUrl, URIRef(make_profile_url(issue['issue_author']))))


In [24]:
json_files_path = '/Users/abdulrafay/Desktop/RP/RDF_Graph/Jsons'
files = os.listdir(json_files_path)

In [25]:
graph = rdflib.Graph()

In [26]:
for file in files:
    if file != '.DS_Store':
        path = os.path.join(json_files_path, file)
        json_data = read_json(path)
        
        make_repo_rdf(json_data, graph)
        make_topic_rdf(json_data, graph)
        make_language_rdf(json_data, graph)
        make_star_rdf(json_data, graph)
        make_watcher_rdf(json_data, graph)
        make_forked_rdf(json_data, graph)
        make_contributes_rdf(json_data, graph)
        make_issues_rdf(json_data, graph)

In [27]:
graph.serialize(destination="output.nt", format="nt", encoding='UTF-8')



<Graph identifier=N02860d885397469ca4a361d805606f8f (<class 'rdflib.graph.Graph'>)>