# Process of converting a relational data model into RDF suitable for KG ingestion

In [3]:
import pandas as pd
import sqlite3
from rdflib import Graph, Literal, Namespace, URIRef

def convert_table_to_rdf(table_name, conn, graph, namespace):
    df = pd.read_sql_query(f"SELECT * FROM {table_name} LIMIT 10", conn)
    print(df.info())
    for index, row in df.iterrows():
        # Assuming the first column is the primary key
        subject = namespace[str(row.iloc[0])]
        for col in df.columns[1:]:  # Skip the primary key column for predicates
            predicate = namespace[col]
            object = Literal(row[col])
            graph.add((subject, predicate, object))

In [6]:
conn = sqlite3.connect('datasets/data_model.db')
g = Graph()
n = Namespace("http://kg.org/data/")

tables_query = "SELECT name FROM sqlite_master WHERE type='table';"
tables = pd.read_sql_query(tables_query, conn)

for table in ['usage', 'species']:
    convert_table_to_rdf(table, conn, g, n)

# Serialize the graph to an RDF file
g.serialize(destination='./datasets/usage_species.rdf', format='xml')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 6 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   use_id              10 non-null     int64 
 1   species_id          10 non-null     int64 
 2   tribe_id            10 non-null     int64 
 3   source_id           10 non-null     int64 
 4   use_subcategory_id  10 non-null     int64 
 5   name                10 non-null     object
dtypes: int64(5), object(1)
memory usage: 612.0+ bytes
None
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   species_id    10 non-null     int64 
 1   name          10 non-null     object
 2   common_names  10 non-null     object
 3   family        10 non-null     object
 4   family_apg    10 non-null     object
dtypes: int64(1), object(4)
memory usage: 532.0+ bytes
None


<Graph identifier=N00f3165ad4184dcfab85cf0560ba87fb (<class 'rdflib.graph.Graph'>)>