In [262]:
import json
import os
import rdflib
import urllib.parse
from rdflib import URIRef, Literal
from rdflib.namespace import RDF
from rdflib.namespace import Namespace
from decimal import Decimal
import urllib.parse

In [263]:
def read_json(file_path):
    with open(file_path, 'r') as file:
        code_data = json.load(file)

    return code_data

In [264]:
base = Namespace("http://codereuse.org/")

In [265]:
repo = Namespace(f"{base}repository/")
entity = Namespace(f"{base}entity/")
package = Namespace(f"{base}package/")
java_class = Namespace(f"{base}java_class/")
modifier = Namespace(f"{base}modifer/")
implement = Namespace(f"{base}implement/")
variable_type = Namespace(f"{base}variable_type/")
parameter_type = Namespace(f"{base}parameter_type/")

In [266]:
DCTERMS = Namespace("http://purl.org/dc/terms/")

In [267]:
TYPE_MAP = {
    'repo': repo,
    'entity': entity,
    'base':base,
    'package': package,
    'java_class': java_class,
    'modifier': modifier,
    'variable_type': variable_type,
    'parameter_type': parameter_type
}

In [268]:
def make_uri(type_, name):
    namespace = TYPE_MAP.get(type_)
    if namespace: 
        return namespace[name]
    else: 
        return URIRef(name)

In [269]:
def make_predicate(name):
    return base[name]

In [270]:
def encode_string(uri):
    encoded_uri = urllib.parse.quote(uri, safe=":/")

    return encoded_uri

In [271]:
def combine_string(string1, string2, string3, string4):
    return string1 + "/" + string2 + "/" + string3 + "/" + string4

In [272]:
def setup_repo_name(repo_name, graph):
    repo_name = repo_name.strip('.json')
    graph.add((make_uri('repo', repo_name), DCTERMS.type, make_uri('entity', 'repository')))

    return repo_name

In [273]:
def make_file_rdf(filename, graph, repo_name):
    graph.add((make_uri('repo', repo_name), make_predicate('hasFile'), make_uri('base', filename)))

In [274]:
def make_package_rdf(filename, graph, data):
    graph.add((make_uri('base', filename), make_predicate('hasPackage'), Literal(data)))

In [275]:
def make_import_rdf(filename, graph, data):
    for _import in data:
        graph.add((make_uri('base', filename), make_predicate('hasImport'), Literal(_import)))

In [276]:
def make_class_modifier_rdf(j_class, graph, data):
    for modifier in data:
        graph.add((make_uri('base', j_class), make_predicate('hasClassModifier'), make_uri('modifier', modifier)))

In [277]:
def make_class_extends_rdf(j_class, graph, data):
    graph.add((make_uri('base', j_class), make_predicate('classExtendsTo'), make_uri('java_class', data)))

In [278]:
def make_class_implements_rdf(j_class, graph, data):
    for implement in data:
        graph.add((make_uri('base', j_class), make_predicate('classImplementsTo'), make_uri('java_class', implement)))

In [279]:
def make_class_variables_rdf(j_variable, graph, data):
    graph.add((make_uri('base', j_variable), make_predicate('hasType'), make_uri('variable_type', encode_string(data['type']))))

    for v_modifer in data['modifiers']:
        graph.add((make_uri('base', j_variable), make_predicate('hasVariableModifier'), make_uri('modifier', v_modifer)))

In [280]:
def make_class_constructor_rdf(j_constructor, graph, data):
    for con_modifer in data['modifiers']:
        graph.add((make_uri('base', j_constructor), make_predicate('hasConstructorModifier'), make_uri('modifier', con_modifer)))

    for parameter in data['parameters']:
        cons_paramter = combine_string('java_constructor', data['name'], 'parameter', parameter['name'])
        graph.add((make_uri('base', j_constructor), make_predicate('hasConstructorParameter'), make_uri('base', cons_paramter)))
        graph.add((make_uri('base', cons_paramter), make_predicate('hasType'), make_uri('parameter_type', parameter['type'])))


        for param_cons_modifer in parameter['modifiers']:
            graph.add((make_uri('base', cons_paramter), make_predicate('hasParameterModifier'), make_uri('modifier', param_cons_modifer)))
            
    for used_import in data['used_imports']:
        graph.add((make_uri('base', j_constructor), make_predicate('hasUsedImport'), Literal(used_import)))

    for throw in data['throws']:
        graph.add((make_uri('base', j_constructor), make_predicate('throws'), Literal(throw)))


In [281]:
def make_class_method_rdf(j_method, graph, data):
    for met_modifer in data['modifiers']:
        graph.add((make_uri('base', j_method), make_predicate('hasMethodModifier'), make_uri('modifier', met_modifer)))

    for parameter in data['parameters']:
        meth_paramter = combine_string('java_method', data['name'], 'parameter', parameter['name'])
        graph.add((make_uri('base', j_method), make_predicate('hasMethodParameter'), make_uri('base', meth_paramter)))
        graph.add((make_uri('base', meth_paramter), make_predicate('hasType'), make_uri('parameter_type', parameter['type'])))


        for meth_param_modifer in parameter['modifiers']:
            graph.add((make_uri('base', meth_paramter), make_predicate('hasParameterModifier'), make_uri('modifier', meth_param_modifer)))
            
    for used_import in data['used_imports']:
        graph.add((make_uri('base', j_method), make_predicate('hasUsedImport'), Literal(used_import)))

    for throw in data['throws']:
        graph.add((make_uri('base', j_method), make_predicate('throws'), Literal(throw)))


In [282]:
def make_class_rdf(filename, graph, data, key):
    for _class in data:
        j_class = combine_string('file', key, 'java_class', _class['name'])
        graph.add((make_uri('base', filename), make_predicate('hasClass'), make_uri('base', j_class)))

        make_class_modifier_rdf(j_class, graph, _class['modifiers'])
        make_class_extends_rdf(j_class, graph, _class['extends'])
        make_class_implements_rdf(j_class, graph, _class['implements'])

        for variable in _class['variables']:
            j_variable = combine_string('java_class', _class['name'], 'java_variable', variable['name'])
            graph.add((make_uri('base', j_class), make_predicate('hasVariable'), make_uri('base', j_variable)))

            make_class_variables_rdf(j_variable, graph, variable)

        for constructor in _class['constructors']:
            j_constructor = combine_string('java_class', _class['name'], 'java_constructor', constructor['name'])
            graph.add((make_uri('base', j_class), make_predicate('hasConstructor'), make_uri('base', j_constructor)))

            make_class_constructor_rdf(j_constructor, graph, constructor)


        for method in _class['methods']:
            j_method = combine_string('java_class', _class['name'], 'java_method', method['name'])
            graph.add((make_uri('base', j_class), make_predicate('hasMethod'), make_uri('base', j_method)))

            make_class_constructor_rdf(j_method, graph, method)


In [283]:
def process_java_file(key, value, graph, repo_name):
    filename = combine_string('repository', repo_name, 'file', key)

    make_file_rdf(filename, graph, repo_name)
    make_package_rdf(filename, graph, value['package'])
    make_import_rdf(filename, graph, value['imports'])
    
    if 'classes' in value and value['classes']:
        make_class_rdf(filename, graph, value['classes'], key)

    elif 'interfaces' in value and value['interfaces']:
        print("Interface ok")

    else:
        print(value)


In [284]:
def process_json(data, graph, repo_name):

    if isinstance(data, dict): 
        for key, value in data.items():
            if key.endswith('.java'):
                process_java_file(key, value, graph, repo_name)

            process_json(value, graph, repo_name)
            
    elif isinstance(data, list):
        for item in data:
            process_json(item, graph, repo_name)  


In [285]:
json_files_path = '/Users/abdulrafay/Desktop/RP/RDF_Graph/codedata_jsons'
files = os.listdir(json_files_path)

In [286]:
graph = rdflib.Graph()

In [None]:
for file in files:
    if file != '.DS_Store':
        path = os.path.join(json_files_path, file)
        json_data = read_json(path)

        repo_name = setup_repo_name(file, graph)
        process_json(json_data, graph, repo_name)


In [None]:
graph.serialize(destination="codedata.nt", format="nt", encoding='UTF-8')