In [15]:
# !pip install owlrl
import os

# Specify the desired directory
directory_path = r"C:\backup\narrateastory"

# Change the current working directory
os.chdir(directory_path)

# Verify the change
current_directory = os.getcwd()
print("Current working directory:", current_directory)


from rdflib import Graph, Namespace, RDF, RDFS, OWL, Literal
from rdflib.plugins import stores
import csv



Current working directory: C:\backup\narrateastory


In [None]:
# set swrl in protege
hasAncientTradeRoutes(?a, ?b) ^ hasAncientTradeRoutes(?b, ?c) -> hasAncientTradeRoutes(?a, ?c)


In [18]:
# Ontology Development

import csv
from rdflib import Graph, Literal, Namespace, RDF, RDFS, OWL, URIRef

# Create the RDF graph and define the namespaces
rdf_graph = Graph()
ex = Namespace("http://narrateastory.com/heritageontology#")

rdf_graph.bind("ex", ex)
rdf_graph.bind("owl", OWL)
rdf_graph.bind("rdfs", RDFS)

# Define inferred_triples at a higher scope
inferred_triples = set()

# Function to infer transitive closure
def infer_transitive_closure(graph, property_uri, visited_triples=None):
    if visited_triples is None:
        visited_triples = set()

    global inferred_triples  # Reference the global variable
    for s1, p, o1 in graph.triples((None, property_uri, None)):
        for s2, _, o2 in graph.triples((None, property_uri, s1)):
            inferred_triple = (s2, property_uri, o1)
            if inferred_triple not in visited_triples:
                graph.add(inferred_triple)
                inferred_triples.add(inferred_triple)
                print(f"Inferred: {inferred_triple[0]} {inferred_triple[1]} {inferred_triple[2]}")
                visited_triples.add(inferred_triple)
                # Recursively check for more transitive closure
                infer_transitive_closure(graph, property_uri, visited_triples)

# Read the CSV file and build the ontology
with open("markers_data.csv", "r", encoding="utf-8-sig") as csvfile:
    reader = csv.DictReader(csvfile)

    for row in reader:
        subject = row["subject"].replace(" ", "_")
        relation = row["relation"]
        obj = row["obj"].replace(" ", "_")

        subject_uri = ex[subject]
        obj_uri = ex[obj]

        # Check if the relation is "isa" to determine superclass-subclass relationship
        if relation.lower() == "isa":
            # Check if the subclass relation already exists to avoid conflicts
            if (subject_uri, RDFS.subClassOf, obj_uri) not in rdf_graph and (obj_uri, RDFS.subClassOf, subject_uri) not in rdf_graph:
                rdf_graph.add((subject_uri, RDF.type, OWL.Class))
                rdf_graph.add((obj_uri, RDF.type, OWL.Class))
                rdf_graph.add((obj_uri, RDFS.subClassOf, subject_uri))
                inferred_triples.add((subject_uri, RDFS.subClassOf, obj_uri))
            else:
                print(f"Conflict detected: {subject} {relation} {obj}. Skipping.")

        else:
            # Check if the reverse relation exists to avoid conflicts
            if (obj_uri, ex[relation], subject_uri) not in rdf_graph:
                # Add subject and obj classes
                rdf_graph.add((subject_uri, RDF.type, OWL.Class))
                rdf_graph.add((obj_uri, RDF.type, OWL.Class))

                # Add relation property
                relation_uri = ex[relation]
                rdf_graph.add((relation_uri, RDF.type, OWL.ObjectProperty))

                # Add individual instances
                rdf_graph.add((subject_uri, RDF.type, subject_uri))
                rdf_graph.add((obj_uri, RDF.type, obj_uri))

                # Add relation between subject and obj classes
                rdf_graph.add((subject_uri, relation_uri, obj_uri))

                # Add relation between subject and obj instances
                rdf_graph.add((subject_uri, relation_uri, obj_uri))
                inferred_triples.add((subject_uri, relation_uri, obj_uri))

                # Add additional properties as RDFS:seeAlso
                if "timePeriod" in row:
                    rdf_graph.add((subject_uri, RDFS.seeAlso, Literal(row["timePeriod"])))
                if "lat" in row and "long" in row:
                    rdf_graph.add((subject_uri, RDFS.seeAlso, Literal(f"Lat: {row['lat']}, Long: {row['long']}")))
                if "utube_link" in row:
                    rdf_graph.add((subject_uri, RDFS.seeAlso, URIRef(row["utube_link"])))
                if "further_reading" in row:
                    rdf_graph.add((subject_uri, RDFS.seeAlso, URIRef(row["further_reading"])))
                if "current_name_of_place" in row:
                    rdf_graph.add((subject_uri, RDFS.seeAlso, Literal(row["current_name_of_place"])))

            else:
                print(f"Conflict detected: {subject} {relation} {obj}. Skipping.")

# Infer transitive closure for "hasAncientTradeRoute" (not "hasAncientTradeRoutes")
infer_transitive_closure(rdf_graph, ex.hasAncientTradeRoute)

# Save the entire inferred graph to an RDF file
rdf_graph.serialize(destination="inferredtriples.owl", format="xml")

# Save the inferred triples to a CSV file with "|" delimiter
with open("inferredtriples.csv", "a", encoding="utf-8-sig", newline="") as csvfile:
    csv_writer = csv.writer(csvfile, delimiter="|")
    for inferred_triple in inferred_triples:
        if not inferred_triple[0].endswith("_instance") and not inferred_triple[2].endswith("_instance"):
            csv_writer.writerow([inferred_triple[0].split("#")[1], inferred_triple[1].split("#")[1], inferred_triple[2].split("#")[1]])


Inferred: http://narrateastory.com/heritageontology#nathula http://narrateastory.com/heritageontology#hasAncientTradeRoute http://narrateastory.com/heritageontology#Tamralipta
Inferred: http://narrateastory.com/heritageontology#nathula http://narrateastory.com/heritageontology#hasAncientTradeRoute http://narrateastory.com/heritageontology#Saptagram
Inferred: http://narrateastory.com/heritageontology#chandraketugarh http://narrateastory.com/heritageontology#hasAncientTradeRoute http://narrateastory.com/heritageontology#Rome
Inferred: http://narrateastory.com/heritageontology#nathula http://narrateastory.com/heritageontology#hasAncientTradeRoute http://narrateastory.com/heritageontology#Rome


In [29]:
# SUBJECTIVE DISTANCE BETWEEN PLACES

import numpy as np
import pandas as pd
import xml.etree.ElementTree as ET
import heapq

# Function to parse the OWL file and extract triples with a specific object property
def parse_owl_file(file_path, target_object_property):
    tree = ET.parse(file_path)
    root = tree.getroot()

    # Extract namespaces
    namespaces = {
        'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
        'ex': 'http://narrateastory.com/heritageontology#'
    }

    triples = []

    # Loop through RDF descriptions in the XML file
    for description in root.findall(".//rdf:Description", namespaces=namespaces):
        about = description.get("{" + namespaces['rdf'] + "}about").split("#")[-1].lower()

        # Find object properties matching the target property
        for prop in description.findall(f".//ex:{target_object_property}", namespaces=namespaces):
            related_resource = prop.get("{" + namespaces['rdf'] + "}resource").split("#")[-1].lower()
            triples.append((about, target_object_property, related_resource))

    return triples

# Function to create a distance matrix from triples
def create_distance_matrix(triples):
    locations = list(set([triple[0] for triple in triples] + [triple[2] for triple in triples]))
    num_locations = len(locations)
    distance_matrix = np.inf * np.ones((num_locations, num_locations))

    location_index = {location: index for index, location in enumerate(locations)}

    # Assigning a distance of 1 if the relationship is hasAncientTradeRoute
    for start, relationship, end in triples:
        start_index = location_index[start]
        end_index = location_index[end]
        if relationship == 'hasAncientTradeRoute':
            distance_matrix[start_index, end_index] = 1
            distance_matrix[end_index, start_index] = 1  # assuming symmetric distance

    np.fill_diagonal(distance_matrix, 0)  # Diagonal elements should be zero

    return distance_matrix, locations

# Function for Dijkstra's algorithm to find the shortest path
def dijkstra(graph, start, end, locations):
    heap = [(0, start, [])]
    visited = set()

    while heap:
        (cost, current, path) = heapq.heappop(heap)

        if current in visited:
            continue

        visited.add(current)
        path = path + [locations[current]]

        if current == end:
            return cost, path

        for next_loc, next_cost in enumerate(graph[current]):
            if next_cost != np.inf and next_loc not in visited:
                heapq.heappush(heap, (cost + next_cost, next_loc, path))

    return np.inf, []

# Load and process the OWL file for hasAncientTradeRoute
owl_file_path = "inferredtriples.owl"
target_object_property = "hasAncientTradeRoute"
triples = parse_owl_file(owl_file_path, target_object_property)

# Create distance matrix for hasAncientTradeRoute
distance_matrix, all_locations = create_distance_matrix(triples)

# Save distance matrix to a csv file 
header = " ".join(all_locations)
df = pd.DataFrame(distance_matrix, columns=all_locations, index=all_locations)
#df.to_csv('distance_matrix.csv', sep=',', float_format='%.2f')

# Display the distance matrix for hasAncientTradeRoute
print("\nDistance Matrix:")
print(df)






# Load and process the OWL file for hasTravelObstacle
target_object_property = "hasTravelObstacle"
triples = parse_owl_file(owl_file_path, target_object_property)

# Create distance matrix for hasTravelObstacle
distance_matrix_hasTravelObstacle, all_locations_hasTravelObstacle = create_distance_matrix(triples)

# Display the distance matrix for hasTravelObstacle
print("\nDistance Matrix for hasTravelObstacle:")
print(pd.DataFrame(distance_matrix_hasTravelObstacle, columns=all_locations_hasTravelObstacle, index=all_locations_hasTravelObstacle))

# Extract subjects and objects dynamically
subjects_objects = set()
for subject, _, obj in triples:
    subjects_objects.add(subject)
    subjects_objects.add(obj)

# Subtract 0.5 from rows and columns with headers that have the relation "hasTravelObstacle"
headers_to_subtract = list(subjects_objects)

adjusted_cells = set()

for header in headers_to_subtract:
    if header in all_locations:
        index = all_locations.index(header)
        for i in range(len(distance_matrix)):
            if (index, i) not in adjusted_cells:
                distance_matrix[i, index] -= 0.5
                distance_matrix[index, i] -= 0.5
                if distance_matrix[i, index] < 0:
                    distance_matrix[i, index] = 0
                if distance_matrix[index, i] < 0:
                    distance_matrix[index, i] = 0
                adjusted_cells.add((index, i))
                adjusted_cells.add((i, index))

# Display the modified distance matrix
print("\nModified Distance Matrix:")
print(pd.DataFrame(distance_matrix, columns=all_locations, index=all_locations))





# Example: Find the Dijkstra shortest distance and path between two places
start_place = "chandraketugarh"
end_place = "saptagram"

start_index = all_locations.index(start_place)
end_index = all_locations.index(end_place)

shortest_distance, shortest_path = dijkstra(df.values, start_index, end_index, all_locations)

print(f"\nShortest Distance from {start_place} to {end_place}: {shortest_distance}")
print(f"Shortest Path: {' -> '.join(shortest_path)}")


# Create a list to store the CSV data
csv_data = []

# Add header row
csv_data.append(','.join(["Location1", "Location2", "Distance"]))

# Iterate over the distance matrix and create CSV data
for i in range(len(df)):
    for j in range(len(df.columns)):
        location1 = df.index[i]
        location2 = df.columns[j]
        distance = df.iloc[i, j]
        combined_places = f"{location1},{location2}"
        csv_data.append(','.join([combined_places, str(distance)]))

# Save the CSV data to a file
with open('distance_matrix_csv_format.csv', 'w') as csv_file:
    csv_file.write('\n'.join(csv_data))

# Display the created CSV data
print("\nCSV Data:")
for line in csv_data:
    print(line)

# Display message indicating successful CSV creation
print("\nCSV file 'distance_matrix_csv_format.csv' created successfully.")



Distance Matrix:
                 tamralipta  saptagram  nathula  chandraketugarh  rome
tamralipta              0.0        inf      1.0              1.0   1.0
saptagram               inf        0.0      1.0              1.0   inf
nathula                 1.0        1.0      0.0              1.0   1.0
chandraketugarh         1.0        1.0      1.0              0.0   1.0
rome                    1.0        inf      1.0              1.0   0.0

Distance Matrix for hasTravelObstacle:
                 chandraketugarh  airport  saptagram
chandraketugarh              0.0      inf        inf
airport                      inf      0.0        inf
saptagram                    inf      inf        0.0

Modified Distance Matrix:
                 tamralipta  saptagram  nathula  chandraketugarh  rome
tamralipta              0.0        inf      1.0              0.5   1.0
saptagram               inf        0.0      0.5              0.5   inf
nathula                 1.0        0.5      0.0              0.5