In [None]:
!huggingface-cli download bartowski/Triplex-GGUF --include "Triplex-Q4_K_M.gguf" --local-dir ./


In [None]:
# !CMAKE_ARGS="-DGGML_CUDA=on" pip install llama-cpp-python
# !CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python --no-cache-dir
!pip install llama-cpp-python

In [None]:
import json
from llama_cpp import Llama

In [None]:
model = Llama(model_path="/content/Triplex-Q4_K_M.gguf", n_ctx=2048, n_gpu_layers=-1)

In [None]:
def triplextract(model, text, entity_types, predicates):
    input_format = """Perform Named Entity Recognition (NER) and extract knowledge graph triplets from the text. NER identifies named entities of given entity types, and triple extraction identifies relationships between entities using specified predicates. return only the triples.

        **Entity Types:**
        {entity_types}

        **Predicates:**
        {predicates}

        **Text:**
        {text}
        """

    message = input_format.format(
                entity_types=json.dumps(entity_types),
                predicates=json.dumps(predicates),
                text=text)

    output = model.create_completion(
        message,
        max_tokens=2048,
        stop=["</s>", "Human:", "H:"],  # Add appropriate stop tokens
        echo=False
    )
    return output['choices'][0]['text'].strip()

In [None]:
entity_types = [ "LOCATION", "POSITION", "DATE", "CITY", "COUNTRY", "NUMBER" ]
predicates = [ "POPULATION", "AREA" ]

In [None]:
text = """
San Francisco,[24] officially the City and County of San Francisco, is a commercial, financial, and cultural center in Northern California.

With a population of 808,437 residents as of 2022, San Francisco is the fourth most populous city in the U.S. state of California behind Los Angeles, San Diego, and San Jose.
"""

In [None]:
prediction = triplextract(model, text, entity_types, predicates)
print(prediction)

Llama.generate: 200 prefix-match hit, remaining 1 prompt tokens to eval

llama_print_timings:        load time =   55321.61 ms
llama_print_timings:      sample time =      12.22 ms /   218 runs   (    0.06 ms per token, 17843.99 tokens per second)
llama_print_timings: prompt eval time =       0.00 ms /     0 tokens (    -nan ms per token,     -nan tokens per second)
llama_print_timings:        eval time =   93709.31 ms /   218 runs   (  429.86 ms per token,     2.33 tokens per second)
llama_print_timings:       total time =   93949.09 ms /   218 tokens


```json
{
    "entities_and_triples": [
        "[1], CITY:San Francisco",
        "[2], LOCATION:San Francisco, City and County",
        "[3], LOCATION:San Francisco",
        "[4], LOCATION:Northern California",
        "[5], NUMBER:808,437",
        "[1] POPULATION [5]",
        "[6], DATE:2022",
        "[7], COUNTRY:U.S.",
        "[8], LOCATION:California",
        "[9], CITY:Los Angeles",
        "[10], CITY:San Diego",
        "[11], CITY:San Jose",
        "[12], LOCATION:San Francisco, County of San Francisco",
        "[13], LOCATION:San Francisco, City and County of San Francisco"
    ]
}
```


In [None]:
import pandas as pd
import re

In [None]:
prediction = prediction.strip('`')

lines = prediction.split('\n')

entity_dict = {}
triples = []

In [None]:
for line in lines:
    line = line.strip()
    if line.startswith('"[') and ':' in line:

        match = re.match(r'\"\[(\d+)\], (.+):(.+)\"', line)
        if match:
            entity_id, entity_type, entity_name = match.groups()
            entity_dict[entity_id] = entity_name.strip()
    elif line.startswith('"[') and ']' in line:

        triples.append(line.strip('"'))

In [None]:
processed_triples = []

for triple in triples:

    match = re.match(r'\[(\d+)\] (.+) \[(\d+)\]', triple)

    if match:

        subject_id, predicate, object_id = match.groups()

        subject = entity_dict.get(subject_id, subject_id)

        object = entity_dict.get(object_id, object_id)

        processed_triples.append((subject, predicate, object))

In [None]:
df = pd.DataFrame(processed_triples, columns=['Subject', 'Predicate', 'Object'])

print(df)

         Subject   Predicate   Object
0  San Francisco  POPULATION  808,437
1  San Francisco  POPULATION  808,437
2  San Francisco  POPULATION  808,437
3  San Francisco  POPULATION  808,437


### **Without An Input Entity an Predicate**

In [None]:
def triplextract(model, text):
    input_format = """Perform Named Entity Recognition (NER) and extract knowledge graph triplets from the text. Identify relevant entity types and predicates based on the content of the text.

        **Instructions:**
        1. Identify relevant entity types present in the text.
        2. Identify relevant predicates (relationships) between entities in the text.
        3. Extract entities and their types.
        4. Extract triplets (subject-predicate-object) based on the identified entities and predicates.

        **Text:**
        {text}

        """

    message = input_format.format(text=text)

    try:
        output = model.create_completion(
            message,
            max_tokens=2048,
            stop=["</s>", "Human:", "H:"],
            echo=False
        )
        return output['choices'][0]['text'].strip()
    except Exception as e:
        print(f"An error occurred: {e}")
        raise


In [None]:
prediction = triplextract(model, text)
print(prediction)

Llama.generate: 20 prefix-match hit, remaining 241 prompt tokens to eval

llama_print_timings:        load time =   51169.01 ms
llama_print_timings:      sample time =       1.23 ms /    21 runs   (    0.06 ms per token, 17031.63 tokens per second)
llama_print_timings: prompt eval time =   65211.42 ms /   241 tokens (  270.59 ms per token,     3.70 tokens per second)
llama_print_timings:        eval time =    8094.32 ms /    20 runs   (  404.72 ms per token,     2.47 tokens per second)
llama_print_timings:       total time =   73325.97 ms /   261 tokens


```json
{
    "entities_and_triples": []
}
```


In [None]:
def triplextract(model, text):
    input_format = """Perform Named Entity Recognition (NER) and extract knowledge graph triplets from the text. Identify relevant entity types and predicates based on the content of the text.

        **Instructions:**
        1. Identify relevant entity types present in the text. Include at least LOCATION, CITY, COUNTRY, NUMBER, and DATE if applicable.
        2. Identify relevant predicates (relationships) between entities in the text. Include at least POPULATION and LOCATION_IN if applicable.
        3. Extract entities and their types.
        4. Extract triplets (subject-predicate-object) based on the identified entities and predicates.

        **Text:**
        {text}

        """

    message = input_format.format(text=text)

    try:
        output = model.create_completion(
            message,
            max_tokens=2048,
            stop=["</s>", "Human:", "H:"],
            echo=False
        )
        return output['choices'][0]['text'].strip()
    except Exception as e:
        print(f"An error occurred: {e}")
        raise

In [None]:
prediction = triplextract(model, text)
print(prediction)

Llama.generate: 60 prefix-match hit, remaining 194 prompt tokens to eval

llama_print_timings:        load time =   51169.01 ms
llama_print_timings:      sample time =      14.38 ms /   239 runs   (    0.06 ms per token, 16616.84 tokens per second)
llama_print_timings: prompt eval time =   51216.10 ms /   194 tokens (  264.00 ms per token,     3.79 tokens per second)
llama_print_timings:        eval time =  102210.94 ms /   238 runs   (  429.46 ms per token,     2.33 tokens per second)
llama_print_timings:       total time =  153695.39 ms /   432 tokens


```json
{
    "entities_and_triples": [
        "[1], CITY:San Francisco",
        "[2], CITY:City and County of San Francisco",
        "[3], LOCATION:Northern California",
        "[1] LOCATION_IN [3]",
        "[4], NUMBER:808,437",
        "[1] POPULATION [4]",
        "[5], DATE:2022",
        "[6], LOCATION:California",
        "[1] LOCATION_IN [6]",
        "[7], CITY:Los Angeles",
        "[8], CITY:San Diego",
        "[9], CITY:San Jose",
        "[10], COUNTRY:United States",
        "[6] LOCATION_IN [10]",
        "[11], LOCATION:Northern California region",
        "[1] LOCATION_IN [11]"
    ]
}
```


In [None]:
prediction = re.sub(r'```json\s*|\s*```', '', prediction)

result = json.loads(prediction)

In [None]:
entities = {}
triples = []

for item in result['entities_and_triples']:
    if ':' in item:
            # This is an entity
        match = re.match(r'\[(\d+)\], (.+):(.+)', item)
        if match:
            entity_id, entity_type, entity_name = match.groups()
            entities[entity_id] = {'type': entity_type, 'name': entity_name}
    else:
            # This is a triple
        match = re.match(r'\[(\d+)\] (.+) \[(\d+)\]', item)
        if match:
            subject_id, predicate, object_id = match.groups()
            triples.append((subject_id, predicate, object_id))

In [None]:
df = pd.DataFrame(triples, columns=['Subject', 'Predicate', 'Object'])

df['Subject'] = df['Subject'].map(lambda x: entities.get(x, {}).get('name', x))
df['Object'] = df['Object'].map(lambda x: entities.get(x, {}).get('name', x))

In [None]:
print("Extracted Entities:")

for entity_id, entity_info in entities.items():
    print(f"[{entity_id}] {entity_info['type']}: {entity_info['name']}")

Extracted Entities:
[1] CITY: San Francisco
[2] CITY: City and County of San Francisco
[3] LOCATION: Northern California
[4] NUMBER: 808,437
[5] DATE: 2022
[6] LOCATION: California
[7] CITY: Los Angeles
[8] CITY: San Diego
[9] CITY: San Jose
[10] COUNTRY: United States
[11] LOCATION: Northern California region


In [None]:
df

Unnamed: 0,Subject,Predicate,Object
0,San Francisco,LOCATION_IN,Northern California
1,San Francisco,POPULATION,808437
2,San Francisco,LOCATION_IN,California
3,California,LOCATION_IN,United States
4,San Francisco,LOCATION_IN,Northern California region


In [None]:
entity_types = list(set(entity['type'] for entity in entities.values()))
predicates = list(df['Predicate'].unique())

print("\nIdentified Entity Types:")
print(entity_types)
print("\nIdentified Predicates:")
print(predicates)


Identified Entity Types:
['DATE', 'NUMBER', 'CITY', 'LOCATION', 'COUNTRY']

Identified Predicates:
['LOCATION_IN', 'POPULATION']


In [None]:
def triplextract(model, text):
    input_format = """Perform Name Entity Recognition (NER) and extract knowledge graph triplets from the text. Identify relevant entity types and predicates in the text given.

        **Instructions:**
        1. Extract triplets (subject-predicate-object) based on the identified entities and predicates.

        **Text:**
        {text}

        """

    message = input_format.format(text=text)

    try:
        output = model.create_completion(
            message,
            max_tokens=2048,
            # stop=["</s>", "Human:", "H:"],
            echo=False,
            temperature = 0
        )
        return output['choices'][0]['text'].strip()
    except Exception as e:
        print(f"An error occurred: {e}")
        raise


In [None]:
text = """
Joseph Robinette Biden Jr. (/ˈbaɪdən/; born November 20, 1942) is an American politician who is the 46th and current president of the United States.
    A member of the Democratic Party, he previously served as the 47th vice president from 2009 to 2017 under President Barack Obama, and represented Delaware in the United States Senate from 1973 to 2009.
    Biden was born and raised in Scranton, Pennsylvania, and moved with his family to Delaware in 1953 when he was ten years old.
    He studied at the University of Delaware before earning his law degree from Syracuse University.
    He was elected to the New Castle County Council in 1970 and became the sixth-youngest senator in U.S. history after he was elected to the United States Senate from Delaware in 1972, at age 29.
    Biden was the chair or ranking member of the Senate Foreign Relations Committee for 12 years.
    He also chaired the Senate Judiciary Committee from 1987 to 1995; led the effort to pass the Violent Crime Control and Law Enforcement Act and the Violence Against Women Act; and oversaw six U.S. Supreme Court confirmation hearings, including the contentious hearings for Robert Bork and Clarence Thomas.
    Biden ran unsuccessfully for the Democratic presidential nomination in 1988 and 2008, before becoming Obama's vice president after they won the 2008 presidential election.
    During his two terms as vice president, Biden frequently represented the administration in negotiations with congressional Republicans and was a close counselor to President Obama.
    Biden and his running mate, Kamala Harris, defeated incumbent Donald Trump in the 2020 presidential election.
    Upon inauguration, he became the oldest president in U.S. history and the first to have a female vice president.
    Biden signed the American Rescue Plan Act to help the U.S. recover from the COVID-19 pandemic and subsequent recession.
    He proposed the American Jobs Plan, aspects of which were incorporated into the bipartisan Infrastructure Investment and Jobs Act.
    He proposed the American Families Plan, which was merged with other aspects of the American Jobs Plan into the proposed Build Back Better Act.
    After facing opposition in the Senate, the Build Back Better Act's size was reduced and it was comprehensively reworked into the Inflation Reduction Act of 2022, covering deficit reduction, climate change, healthcare, and tax reform.
    Biden appointed Ketanji Brown Jackson to the Supreme Court. In foreign policy, he restored the U.S. into the Paris Agreement on climate change.
    He completed the withdrawal of U.S. troops from Afghanistan, during which the Afghan government collapsed and the Taliban seized control.
    He responded to the 2022 Russian invasion of Ukraine by imposing sanctions on Russia and authorizing foreign aid and weapons shipments to Ukraine. (en)
"""

In [None]:
prediction = triplextract(model, text)
print(prediction)


llama_print_timings:        load time =  129525.20 ms
llama_print_timings:      sample time =       1.10 ms /    21 runs   (    0.05 ms per token, 19108.28 tokens per second)
llama_print_timings: prompt eval time =  209369.89 ms /   818 tokens (  255.95 ms per token,     3.91 tokens per second)
llama_print_timings:        eval time =    9360.09 ms /    20 runs   (  468.00 ms per token,     2.14 tokens per second)
llama_print_timings:       total time =  218759.50 ms /   838 tokens


```json
{
    "entities_and_triples": []
}
```


In [None]:
def triplextract(model, text):
    input_format = """Perform Name Entity Recognition (NER) and extract knowledge graph triplets from the text. Identify relevant entity types and predicates in the text given.

        **Instructions:**
        1. Extract triplets (subject-predicate-object) based on the identified entities and predicates.

        **Entity Types:**
        identify the entity types yourself

        **Predicates:**
        identify the predicates yourself

        **Text:**
        {text}

        """

    message = input_format.format(text=text)

    try:
        output = model.create_completion(
            message,
            max_tokens=2048,
            # stop=["</s>", "Human:", "H:"],
            echo=False,
            temperature = 0
        )
        return output['choices'][0]['text'].strip()
    except Exception as e:
        print(f"An error occurred: {e}")
        raise


In [None]:
text = """
Joseph Robinette Biden Jr. (/ˈbaɪdən/; born November 20, 1942) is an American politician who is the 46th and current president of the United States.
    A member of the Democratic Party, he previously served as the 47th vice president from 2009 to 2017 under President Barack Obama, and represented Delaware in the United States Senate from 1973 to 2009.
    Biden was born and raised in Scranton, Pennsylvania, and moved with his family to Delaware in 1953 when he was ten years old.
    He studied at the University of Delaware before earning his law degree from Syracuse University.
    He was elected to the New Castle County Council in 1970 and became the sixth-youngest senator in U.S. history after he was elected to the United States Senate from Delaware in 1972, at age 29.
    Biden was the chair or ranking member of the Senate Foreign Relations Committee for 12 years.
    He also chaired the Senate Judiciary Committee from 1987 to 1995; led the effort to pass the Violent Crime Control and Law Enforcement Act and the Violence Against Women Act; and oversaw six U.S. Supreme Court confirmation hearings, including the contentious hearings for Robert Bork and Clarence Thomas.
    Biden ran unsuccessfully for the Democratic presidential nomination in 1988 and 2008, before becoming Obama's vice president after they won the 2008 presidential election.
    During his two terms as vice president, Biden frequently represented the administration in negotiations with congressional Republicans and was a close counselor to President Obama.
    Biden and his running mate, Kamala Harris, defeated incumbent Donald Trump in the 2020 presidential election.
    Upon inauguration, he became the oldest president in U.S. history and the first to have a female vice president.
    Biden signed the American Rescue Plan Act to help the U.S. recover from the COVID-19 pandemic and subsequent recession.
    He proposed the American Jobs Plan, aspects of which were incorporated into the bipartisan Infrastructure Investment and Jobs Act.
    He proposed the American Families Plan, which was merged with other aspects of the American Jobs Plan into the proposed Build Back Better Act.
    After facing opposition in the Senate, the Build Back Better Act's size was reduced and it was comprehensively reworked into the Inflation Reduction Act of 2022, covering deficit reduction, climate change, healthcare, and tax reform.
    Biden appointed Ketanji Brown Jackson to the Supreme Court. In foreign policy, he restored the U.S. into the Paris Agreement on climate change.
    He completed the withdrawal of U.S. troops from Afghanistan, during which the Afghan government collapsed and the Taliban seized control.
    He responded to the 2022 Russian invasion of Ukraine by imposing sanctions on Russia and authorizing foreign aid and weapons shipments to Ukraine. (en)
"""

In [None]:
prediction = triplextract(model, text)
print(prediction)

Llama.generate: 72 prefix-match hit, remaining 778 prompt tokens to eval

llama_print_timings:        load time =  129525.20 ms
llama_print_timings:      sample time =       1.08 ms /    21 runs   (    0.05 ms per token, 19498.61 tokens per second)
llama_print_timings: prompt eval time =  208348.44 ms /   778 tokens (  267.80 ms per token,     3.73 tokens per second)
llama_print_timings:        eval time =    9252.35 ms /    20 runs   (  462.62 ms per token,     2.16 tokens per second)
llama_print_timings:       total time =  217633.24 ms /   798 tokens


```json
{
    "entities_and_triples": []
}
```


In [None]:
def triplextract(model, text):
    input_format = """

        **Instruction:**
        Extract triplets (subject-predicate-object) based on the identified entities and predicates available in the given text.

        **Entity Types:**
        identify the entity types from the text using your knowledge

        **Predicates:**
        identify the predicates from the text using your knowledge

        **Text:**
        {text}

        """

    message = input_format.format(text=text)

    try:
        output = model.create_completion(
            message,
            max_tokens=2048,
            # stop=["</s>", "Human:", "H:"],
            echo=False,
            temperature = 0
        )
        return output['choices'][0]['text'].strip()
    except Exception as e:
        print(f"An error occurred: {e}")
        raise


In [None]:
text = """
Joseph Robinette Biden Jr. (/ˈbaɪdən/; born November 20, 1942) is an American politician who is the 46th and current president of the United States.
    A member of the Democratic Party, he previously served as the 47th vice president from 2009 to 2017 under President Barack Obama, and represented Delaware in the United States Senate from 1973 to 2009.
    Biden was born and raised in Scranton, Pennsylvania, and moved with his family to Delaware in 1953 when he was ten years old.
    He studied at the University of Delaware before earning his law degree from Syracuse University.
    He was elected to the New Castle County Council in 1970 and became the sixth-youngest senator in U.S. history after he was elected to the United States Senate from Delaware in 1972, at age 29.
    Biden was the chair or ranking member of the Senate Foreign Relations Committee for 12 years.
    He also chaired the Senate Judiciary Committee from 1987 to 1995; led the effort to pass the Violent Crime Control and Law Enforcement Act and the Violence Against Women Act; and oversaw six U.S. Supreme Court confirmation hearings, including the contentious hearings for Robert Bork and Clarence Thomas.
    Biden ran unsuccessfully for the Democratic presidential nomination in 1988 and 2008, before becoming Obama's vice president after they won the 2008 presidential election.
    During his two terms as vice president, Biden frequently represented the administration in negotiations with congressional Republicans and was a close counselor to President Obama.
    Biden and his running mate, Kamala Harris, defeated incumbent Donald Trump in the 2020 presidential election.
    Upon inauguration, he became the oldest president in U.S. history and the first to have a female vice president.
    Biden signed the American Rescue Plan Act to help the U.S. recover from the COVID-19 pandemic and subsequent recession.
    He proposed the American Jobs Plan, aspects of which were incorporated into the bipartisan Infrastructure Investment and Jobs Act.
    He proposed the American Families Plan, which was merged with other aspects of the American Jobs Plan into the proposed Build Back Better Act.
    After facing opposition in the Senate, the Build Back Better Act's size was reduced and it was comprehensively reworked into the Inflation Reduction Act of 2022, covering deficit reduction, climate change, healthcare, and tax reform.
    Biden appointed Ketanji Brown Jackson to the Supreme Court. In foreign policy, he restored the U.S. into the Paris Agreement on climate change.
    He completed the withdrawal of U.S. troops from Afghanistan, during which the Afghan government collapsed and the Taliban seized control.
    He responded to the 2022 Russian invasion of Ukraine by imposing sanctions on Russia and authorizing foreign aid and weapons shipments to Ukraine. (en)
"""

In [None]:
prediction = triplextract(model, text)
print(prediction)


llama_print_timings:        load time =  129525.20 ms
llama_print_timings:      sample time =       1.07 ms /    21 runs   (    0.05 ms per token, 19644.53 tokens per second)
llama_print_timings: prompt eval time =  212854.66 ms /   830 tokens (  256.45 ms per token,     3.90 tokens per second)
llama_print_timings:        eval time =    9412.98 ms /    20 runs   (  470.65 ms per token,     2.12 tokens per second)
llama_print_timings:       total time =  222288.03 ms /   850 tokens


```json
{
    "entities_and_triples": []
}
```


### **Entity Linking**

In [None]:
!pip install funcy

Collecting funcy
  Downloading funcy-2.0-py2.py3-none-any.whl.metadata (5.9 kB)
Downloading funcy-2.0-py2.py3-none-any.whl (30 kB)
Installing collected packages: funcy
Successfully installed funcy-2.0


In [None]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM


genre_tokenizer = AutoTokenizer.from_pretrained("facebook/genre-linking-blink")
genre_model = AutoModelForSeq2SeqLM.from_pretrained("facebook/genre-linking-blink").eval()

In [None]:
def EL_GENRE(annotated_sentences, model, tokenizer):
    """A method to perform entity linking for entity-mentions annotated
    in sentences using the GENRE model.

    ```
    tokenizer = AutoTokenizer.from_pretrained("facebook/genre-linking-blink")
    model = AutoModelForSeq2SeqLM.from_pretrained("facebook/genre-linking-blink").eval()

    sentences = [
        "[START_ENT] England [END_ENT] won the cricket world cup in 2019",
        "I just finished reading [START_ENT] 'The Jungle Book' [END_ENT]",
        "India is a country in Asia. [START_ENT] It [END_ENT] has a rich cultural heritage"
    ]

    EL_GENRE(annotated_sentences=sentences, model=model, tokenizer=tokenizer)

    ```

    Args:
        annotated_sentences (list): A list of sentences annotated with entity-mentions
        model : GENRE model from huggingface hub
        tokenizer : Appropriate tokenizer for GENRE model
    """
    outputs = model.generate(
    **tokenizer(annotated_sentences, return_tensors="pt", padding=True),
    num_beams=5,
    num_return_sequences=1,
    # OPTIONAL: use constrained beam search
    # prefix_allowed_tokens_fn=lambda batch_id, sent: trie.get(sent.tolist()),
    )

    entites = tokenizer.batch_decode(outputs, skip_special_tokens=True)

    # These entites are in the form of wikipedia page titles. Need to
    # add the https://dbpedia/resource to each of them as postprocessing step
    return entites

In [None]:
def annotate_sentence(sentence, mention):
    match = re.search(re.escape(mention.lower()), sentence.lower())
    if match:
        start, end = match.span()
        sentence = sentence[:start] + " [START_ENT] " + sentence[start:end] + " [END_ENT] " + sentence[end:]
    else:
        sentence = f"{sentence} [START_ENT] {mention} [END_ENT]"
    return sentence

In [None]:
from funcy import print_durations

@print_durations
def get_triple_from_triple(sub, relation, obj, sentence):

    subject_entity = EL_GENRE(
        annotate_sentence(sentence, sub), genre_model, genre_tokenizer)[0]
    subject_entity = "https://dbpedia.org/resource/"+"_".join(subject_entity.split())

    object_entity = EL_GENRE(
        annotate_sentence(sentence, obj), genre_model, genre_tokenizer)[0]
    object_entity = "https://dbpedia.org/resource/"+"_".join(object_entity.split())


    return (subject_entity, predicate, object_entity)


In [None]:
linked_triples = []

for subject_id, predicate, object_id in triples:

    subject = entities[subject_id]['name']

    object = entities[object_id]['name']

    linked_triple = get_triple_from_triple(subject, predicate, object, text)
    linked_triples.append(linked_triple)

    7.46 s in get_triple_from_triple('San Francisco', 'LOCATION_IN', 'Northern California', '\nSan Francisco,[24] ...)
    6.15 s in get_triple_from_triple('San Francisco', 'POPULATION', '808,437', '\nSan Francisco,[24] ...)
    6.45 s in get_triple_from_triple('San Francisco', 'LOCATION_IN', 'California', '\nSan Francisco,[24] ...)
    5.56 s in get_triple_from_triple('California', 'LOCATION_IN', 'United States', '\nSan Francisco,[24] ...)
    7.44 s in get_triple_from_triple('San Francisco', 'LOCATION_IN', 'Northern California r..., '\nSan Francisco,[24] ...)


In [None]:
df = pd.DataFrame(linked_triples, columns=['Subject', 'Predicate', 'Object'])

# print("Extracted and Linked Triples:")
# print(df)

df

Extracted and Linked Triples:
                                      Subject    Predicate  \
0  https://dbpedia.org/resource/San_Francisco  LOCATION_IN   
1  https://dbpedia.org/resource/San_Francisco   POPULATION   
2  https://dbpedia.org/resource/San_Francisco  LOCATION_IN   
3     https://dbpedia.org/resource/California  LOCATION_IN   
4  https://dbpedia.org/resource/San_Francisco  LOCATION_IN   

                                              Object  
0   https://dbpedia.org/resource/Northern_California  
1  https://dbpedia.org/resource/List_of_cities_an...  
2            https://dbpedia.org/resource/California  
3         https://dbpedia.org/resource/United_States  
4   https://dbpedia.org/resource/Northern_California  


Unnamed: 0,Subject,Predicate,Object
0,https://dbpedia.org/resource/San_Francisco,LOCATION_IN,https://dbpedia.org/resource/Northern_California
1,https://dbpedia.org/resource/San_Francisco,POPULATION,https://dbpedia.org/resource/List_of_cities_an...
2,https://dbpedia.org/resource/San_Francisco,LOCATION_IN,https://dbpedia.org/resource/California
3,https://dbpedia.org/resource/California,LOCATION_IN,https://dbpedia.org/resource/United_States
4,https://dbpedia.org/resource/San_Francisco,LOCATION_IN,https://dbpedia.org/resource/Northern_California
