In [1]:
import os
import re
import pandas as pd
import pymupdf4llm
import json
import shutil
from langchain_openai import ChatOpenAI
from langchain.prompts.prompt import PromptTemplate
from langchain.prompts import PromptTemplate
from langchain.schema import Document
from langchain_openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.storage import InMemoryStore
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.text_splitter import MarkdownHeaderTextSplitter
from langchain.retrievers import ParentDocumentRetriever
from langchain.output_parsers import CommaSeparatedListOutputParser
from langchain.memory import ConversationBufferMemory
from langchain.memory import ChatMessageHistory
from langchain.chains import RetrievalQA, ConversationalRetrievalChain
from tempfile import NamedTemporaryFile
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains.question_answering import load_qa_chain
from langchain_openai import OpenAI
from langchain_core.prompts import ChatPromptTemplate

In [2]:
#set API Key from OpenAI
openai_api_key= os.environ.get("OPENAI_API_KEY_SBR")

In [3]:
def load_the_page(pdf_path, page_numbers):
    """load the document based on the given page number"""

    #load the document
    docs = pymupdf4llm.to_markdown(pdf_path, page_chunks=True)

    pages_content = []
    
    for page_number in page_numbers:
        page_index = page_number - 1       
        page = docs[page_index] 
        
        #extract the page content and append to the list
        pages_content.append(page['text'])

    full_content = "\n".join(pages_content)

    return full_content

In [4]:
def LCI_pathway_extraction(pdf_path, table_number, page_number, persist_directory):
    """Extract the pathway and activity from the inventory table"""

    docs = load_the_page(pdf_path, page_numbers)

    #pre-split the document to create an iterable document
    text_splitter = CharacterTextSplitter(
    separator="\n\n",
    chunk_size=3000,
    chunk_overlap=400,
    length_function=len,
    )

    split_text = text_splitter.create_documents([docs])
    
    #initial the child text splitter
    child_splitter = RecursiveCharacterTextSplitter(chunk_size=400)

    #initial the embedding model and vector database
    embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
    vectorstore = Chroma(collection_name="full_documents", embedding_function=embeddings, persist_directory=persist_directory)
    store = InMemoryStore()

    retriever = ParentDocumentRetriever(
        vectorstore=vectorstore,
        docstore=store, 
        child_splitter=child_splitter,
    )

    retriever.add_documents(split_text)

    #initial the llm
    llm = ChatOpenAI(
        model_name="gpt-4-1106-preview",
        temperature=0,
        max_tokens=4000,
        openai_api_key=openai_api_key
    )

    #initial the memory
    msgs = ChatMessageHistory()
    memory = ConversationBufferMemory(memory_key="chat_history", chat_memory=msgs, return_messages=True)

    #initial the prompt
    template1 = """
        You are a helpful assistant specializing in extracting life cycle assessment (LCA) data. Based on the provided context, use the definitions below to answer the question at the end. If you are unsure of the answer, state "I don't know" rather than conjecture.

        ### Extract the information based on the definitions below: 
        - **Pathway:** The sequence of processes and transformations that raw materials undergo to become a final product. This includes all intermediate steps, inputs, and outputs. Synonyms include "Processes," "Technical route," "Scenario," "Conversion." 
        - **Activity:** A unit process of converting raw materials into intermediates or products. It represents the smallest element in the product system for which input and output data are quantified. Examples include raw material production, transport, assembly, synthesis, production, and end-of-life treatment. 

        ### Instructions for Data Extraction: 
        - Identify and list the pathways and activities with minimum detail to understand their relationships. 
        - Only extract the pathway and activity from the given table's meta. Ensure completeness and precision in your extraction. 
        - Avoid extra explanations and format your response as follows:
            - "Pathway": [] # List format 
            - "Activity": [] # List format 
        - If you do not find any information about the pathway or activity, leave them as the empty list.

        ###Examples:
        1. Table: <The inventory data for PV/CCU-CH 3 OH technical route\n\nInputs Values Units Outputs Values Units\n\n**Hydrogen generation**\nElectricity 10070 kWh Hydrogen 190 kg\n\n
        Water 1710 MJ Waste water 87.4 kg\n\nPotassium hydroxide 0.57 kg Oxygen 1501 kg\n\nHydrogen plant 1.02E-6 Unit\n\n**Methanol synthesis and purification**\nElectricity 42.75 kWh Methanol 1000 kg\n\nHydrogen 190 MJ Waste water 630 kg\n\n
        Carbon dioxide (HP) 1455 kg\n\nAluminium oxide 0.012 kg\n\nCopper oxide 0.062 kg\n\nZinc oxide 0.029 kg>
        Answer: 
        - “Pathway”: [“PV/CCU-CH 3 OH technical route”]
        - “Activity”: [“Hydrogen generation”, “Methanol synthesis and purification”]
        2. Table: <Table. Life Cycle Energy Consumption Inventory Data of the CtEG Route for Producing 1 ton EG\n\nparameter unit coal mining and processing coal transportation coal to EG total life cycle\n\nMaterials Consumption [8]\n\ncoal t 3.17 × 10 [0] 3.17 × 10 [0]\n\n
        Energy Consumption [8] [,] [22] [,] [31]\n\nfuel coal GJ 2.89 × 10 [0] 4.73 × 10 [1] 5.02 × 10 [1]\n\ndiesel GJ 5.79 × 10 [−] [2] 2.47 × 10 [−] [1] 3.05 × 10 [−] [1]\n\ngasoline GJ 5.79 × 10 [−] [2] 1.73 × 10 [−] [2] 7.52 × 10 [−] [2]\n\nelectricity GJ 5.79 × 10 [−] [1] 1.47 × 10 [−] [1] 3.16 × 10 [0] 3.89 × 10 [0]\n\n
        Pollutant Emission [8] [,] [22] [,] [32]\n\nCO 2 kg 1.26 × 10 [2] 6.30 × 10 [1] 6.72 × 10 [3] 6.91 × 10 [3]\n\nCH 4 kg 2.37 × 10 [0] 3.42 × 10 [−] [1] 2.44 × 10 [1] 2.71 × 10 [1]\n\nN 2 O kg 2.13 × 10 [−] [3] 1.47 × 10 [−] [4] 7.69 × 10 [−] [3] 9.97 × 10 [−] [3]\n\nCO kg 1.51 × 10 [−] [2] 5.41 × 10 [−] [1] 1.06 × 10 [1] 1.12 × 10 [1]\n\n
        NO x kg 4.67 × 10 [−] [1] 1.17 × 10 [−] [1] 3.53 × 10 [0] 4.11 × 10 [0]\n\nSO 2 kg 1.69 × 10 [−] [1] 2.69 × 10 [−] [2] 1.44 × 10 [0] 1.64 × 10 [0]\n\nPM 10 kg 5.23 × 10 [−] [1] 1.08 × 10 [−] [1] 2.92 × 10 [0] 3.55 × 10 [0]\n\nVOC kg 8.24 × 10 [−] [3] 8.00 × 10 [−] [3] 8.58 × 10 [−] [2] 1.02 × 10 [−] [1]\n\n>
        Answer: 
        - “Pathway”: [“CtEG Route”]
        - “Activity”: [“Coal mining and processing”, “coal mining and processing”, “ Coal to EG”]
        3. Tanle: <Table.** Inventory to produce 1 metric ton of EG via different pathways[14, 18]\n\nUnit Petro-EG Coal-EG\n\nFeedstock\n\nCoal t 3.17\n\nMethanol t 0.60\n\nNH 3 t 0.30\n\nEthylene t 0.75\n\nWater t 3\n\nUtilites\n\nElectricity kWh 300 1200\n\n4.2 MPa steam t 0.10 0.42\n\n
        1.5 MPa steam t 0.40 4.50\n\n0.5 MPa steam t 0.10 3.90\n\nDirect CO 2 emissions t 0.94 5.44\n\n17\n>
        Answer:
        - "Pathway": ["Petro-EG", "Coal-EG"]
        - "Activity": [ ]

        {chat_history}
        {context}
        Question: {question}
        """

    prompt1 = PromptTemplate(
        template=template1, input_variables=["context", "chat_history", "question"], output_parser=CommaSeparatedListOutputParser()
    )

    #set up chain
    chain = ConversationalRetrievalChain.from_llm(
        llm=llm,
        chain_type="stuff",
        retriever=retriever,
        return_source_documents=False,
        memory=memory,
        combine_docs_chain_kwargs={'prompt': prompt1}
    )

    #get the result
    result1 = []

    query1 = f"extract the Pathway and Activity from the Table [{table_number}]."
    result = chain({"question": query1})
    result1.append(result["answer"].strip('```json').strip('```').strip())

    return result1, retriever

In [5]:
def LCI_data_extraction(table_number, pathway, activity, retriever):
    """Extract LCI data from the inventory table"""
    
    #initial the llm
    llm = ChatOpenAI(
        model_name="gpt-4-1106-preview",
        temperature=0,
        max_tokens=4000,
        openai_api_key=openai_api_key
    )

    #initial the memory
    msgs = ChatMessageHistory()
    memory = ConversationBufferMemory(memory_key="chat_history", chat_memory=msgs, return_messages=True)

    #initial the prompt
    template2 = """
        You are a helpful assistant specializing in extracting life cycle assessment (LCA) data. Based on the provided context, use the definitions below to answer the question at the end. If you are unsure of the answer, state "I don't know" rather than conjecture.

        ### Extract the information based on the definitions below: 
        - **Inputs**: is defined as input substances used to manufacture intermediates or products. Input materials generally include human-made systems and resources from the environment. The examples of inputs are feedstocks, energy, natural resources, intermediates, devices, transports, solvents, and chemicals. These input materials are quantified by a numeric value with a unit.
        - **Outputs**: is defined as output substances from the production activity. Output materials typically include emissions, wastes, and output products. These outputs are also quantified using specific numeric values with units.

        ### Instructions for data Extraction: 
        - Identify and list the inputs and outputs with minimum detail to understand their relationships. 
        - Inputs include both human-made products like raw materials, chemicals, intermediates, products, components, devices, solvents, electricity,  fuel and resources from the environment like water.
        - Outputs mainly include emissions to air, emissions to water, wastes to treatment, and output products.
        - Extract all parameters related to each input and output category from the given context. Ensure completeness and precision in your extraction. 
        - If the input or output has multiple distinct values, each value should be represented by a separate input/output entity.
        - The value of the input and output must be numeric and accompanied by a unit, and cannot be a string or text
        - Avoid extra explanations and format your response as follows:
        - "Inputs": [] # list format 
        - "Outputs": [] # list format 

        ### Steps of data Extraction: 
        1. Understand the format of the markdown table, including the meaning of each column and row.
        2. Based on the provided pathway and activity, extract all relevant inputs and outputs within the table.

        ###Examples:
        1. Question - Extract inputs and outputs of activity “Hydrogen generation” of pathway “PV/CCU-CH 3 OH technical route” from the Table: <The inventory data for PV/CCU-CH 3 OH technical route\n\nInputs Values Units Outputs Values Units\n\n**Hydrogen generation**\n
        Electricity 10070 kWh Hydrogen 190 kg\n\nWater 1710 MJ Waste water 87.4 kg\n\nPotassium hydroxide 0.57 kg Oxygen 1501 kg\n\nHydrogen plant 1.02E-6 Unit\n\n**Methanol synthesis and purification**\nElectricity 42.75 kWh Methanol 1000 kg\n\nHydrogen 190 MJ Waste water 630 kg\n\nCarbon dioxide (HP) 1455 kg\n\n
        Aluminium oxide 0.012 kg\n\nCopper oxide 0.062 kg\n\nZinc oxide 0.029 kg>
        Answer: 
        - “Inputs”: [“Electricity”: 10070 kWh, “Water”: 1710 MJ, “Potassium”: 0.57 kg, “Hydrogen plant”: 1.02E-6 Unit]
        - “Outputs”: [“Hydrogen”: 190 kg, “Waste water”: 87.4 kg, “Oxygen”: 1501 kg]
        2. Question - Extract inputs and outputs of activity “coal to EG” of pathway “CtEG route” from the Table: <Table. Life Cycle Energy Consumption Inventory Data of the CtEG Route for Producing 1 ton EG\n\nparameter unit coal mining and processing coal transportation coal to EG total life cycle\n\nMaterials Consumption [8]\n\n
        coal t 3.17 × 10 [0] 3.17 × 10 [0]\n\nEnergy Consumption [8] [,] [22] [,] [31]\n\nfuel coal GJ 2.89 × 10 [0] 4.73 × 10 [1] 5.02 × 10 [1]\n\ndiesel GJ 5.79 × 10 [−] [2] 2.47 × 10 [−] [1] 3.05 × 10 [−] [1]\n\ngasoline GJ 5.79 × 10 [−] [2] 1.73 × 10 [−] [2] 7.52 × 10 [−] [2]\n\nelectricity GJ 5.79 × 10 [−] [1] 1.47 × 10 [−] [1] 3.16 × 10 [0] 3.89 × 10 [0]\n\n
        Pollutant Emission [8] [,] [22] [,] [32]\n\nCO 2 kg 1.26 × 10 [2] 6.30 × 10 [1] 6.72 × 10 [3] 6.91 × 10 [3]\n\nCH 4 kg 2.37 × 10 [0] 3.42 × 10 [−] [1] 2.44 × 10 [1] 2.71 × 10 [1]\n\nN 2 O kg 2.13 × 10 [−] [3] 1.47 × 10 [−] [4] 7.69 × 10 [−] [3] 9.97 × 10 [−] [3]\n\nCO kg 1.51 × 10 [−] [2] 5.41 × 10 [−] [1] 1.06 × 10 [1] 1.12 × 10 [1]\n\nNO x kg 4.67 × 10 [−] [1] 1.17 × 10 [−] [1] 3.53 × 10 [0] 4.11 × 10 [0]\n\n
        SO 2 kg 1.69 × 10 [−] [1] 2.69 × 10 [−] [2] 1.44 × 10 [0] 1.64 × 10 [0]\n\nPM 10 kg 5.23 × 10 [−] [1] 1.08 × 10 [−] [1] 2.92 × 10 [0] 3.55 × 10 [0]\n\nVOC kg 8.24 × 10 [−] [3] 8.00 × 10 [−] [3] 8.58 × 10 [−] [2] 1.02 × 10 [−] [1]\n\n>
        - “Inputs”: [“coal”: 3.17 t, “fuel coal”: 47.3 GJ, “electricity”: 3.16 GJ]
        - “Outputs”: [“CO2”: 6720 kg, “CH4”: 24.4 kg, “N2O”: 0.00769 kg, “CO”: 10.6 kg, “NOx”: 3.53 kg, “SO2”: 1.44 kg, “PM10”: 2.92 kg, “VOC”: 0.0858 kg]
        3. Question - Extract inputs and outputs of activity “” of pathway “Petro-EG” from the Table: <Table.** Inventory to produce 1 metric ton of EG via different pathways[14, 18]\n\nUnit Petro-EG Coal-EG\n\nFeedstock\n\nCoal t 3.17\n\nMethanol t 0.60\n\nNH 3 t 0.30\n\nEthylene t 0.75\n\nWater t 3\n\nUtilites\n\nElectricity kWh 300 1200\n\n4.2 MPa steam t 0.10 0.42\n\n
        1.5 MPa steam t 0.40 4.50\n\n0.5 MPa steam t 0.10 3.90\n\nDirect CO 2 emissions t 0.94 5.44\n\n17\n>
        Answer:
        - “Inputs”: [“Ethylene”: 0.75 t, “Water”: 3 t, “Electricity”: 300 kWh, “4.2 MPa steam”: 0.1 t, “1.5 MPa steam”: 0.4 t, “0.5 MPa steam”: 0.1 t]
        - “Outputs”: [“Direct CO2 emissions”: 0.94 t]


        {chat_history}
        {context}
        Question: {question}
        """

    prompt2 = PromptTemplate(
        template=template2, input_variables=["context", "chat_history", "question"], output_parser=CommaSeparatedListOutputParser()
    )

    #set up chain
    chain = ConversationalRetrievalChain.from_llm(
        llm=llm,
        chain_type="stuff",
        retriever=retriever,
        return_source_documents=False,
        memory=memory,
        combine_docs_chain_kwargs={'prompt': prompt2}
    )

    #get the result
    result2 = []

    query2 = f"Extract inputs and outputs of the activity {activity} from the pathway {pathway} from the Table [{table_number}]."
    result = chain({"question": query2})
    result2.append(result["answer"].strip('```json').strip('```').strip())
    
    msgs.clear()
    memory.clear()
    
    return result2

In [6]:
def convert_to_dataframe (raw_text):
    """Convert results into the structured dataframe """
    
    llm = ChatOpenAI(
        model_name="gpt-4-1106-preview",
        temperature=0,
        max_tokens=4000,
        openai_api_key=openai_api_key
    )
    
    template3 = """
    You are a helpful assistant specializing in converting life cycle assessment (LCA) data format. Based on the provided context, use the definitions below to answer the question at the end.

    ### Convert the provided list into a structured format based on the definitions below: 
    - **Flow name**: the name of the flow.
    - **Input/Outputs**: specifies whether it is an input or an output flow.
    - **Amount**: the quantity of the flow.
    - **Unit**: the unit in which the amount is measured.
    
    ###Avoid extra explanations and format your response as follows::
    Data_list = [{{"Flow name": "", "Input/Output": "", "Amount": "", "Unit": ""}}, {{"Flow name": "", "Input/Output": "", "Amount": "", "Unit": ""}}]

    
    ###Examples:
    1. Question: <['- "Inputs": ["Electricity": 10070 kWh, "Water": 1710 MJ, "Potassium hydroxide": 0.57 kg, "Hydrogen plant": 1.02E-6 Unit]\n- "Outputs": ["Hydrogen": 190 kg, "Waste water": 87.4 kg, "Oxygen": 1501 kg]']>
    Answer: 
    Data_list = [
    {{"Flow name": "Electricity", "Input/Output": "Input", "Amount": 10070, "Unit": "kWh"}},
    {{"Flow name": "Water", "Input/Output": "Input", "Amount": 1710, "Unit": "MJ"}},
    {{"Flow name": "Potassium hydroxide", "Input/Output": "Input", "Amount": 0.57, "Unit": "kg"}},
    {{"Flow name": "Hydrogen plant", "Input/Output": "Input", "Amount": 1.02e-6, "Unit": "Unit"}},
    {{"Flow name": "Hydrogen", "Input/Output": "Output", "Amount": 190, "Unit": "kg"}},
    {{"Flow name": "Waste water", "Input/Output": "Output", "Amount": 87.4, "Unit": "kg"}},
    {{"Flow name": "Oxygen", "Input/Output": "Output", "Amount": 1501, "Unit": "kg"}}
    ]
    2. Question: <['- "Inputs": ["coal": 3.17 t, "fuel coal": 50.2 GJ, "diesel": 0.0305 GJ, "gasoline": 0.00752 GJ, "electricity": 3.16 GJ]\n- "Outputs": ["CO2": 6720 kg, "CH4": 24.4 kg, "N2O": 0.00769 kg, "CO": 10.6 kg, "NOx": 3.53 kg, "SO2": 1.44 kg, "PM10": 2.92 kg, "VOC": 0.0858 kg]']>
    Answer: 
    data_list = [
    {{"Flow name": "Coal", "Input/Output": "Input", "Amount": 3.17, "Unit": "t"}},
    {{"Flow name": "Fuel Coal", "Input/Output": "Input", "Amount": 50.2, "Unit": "GJ"}},
    {{"Flow name": "Diesel", "Input/Output": "Input", "Amount": 0.0305, "Unit": "GJ"}},
    {{"Flow name": "Gasoline", "Input/Output": "Input", "Amount": 0.00752, "Unit": "GJ"}},
    {{"Flow name": "Electricity", "Input/Output": "Input", "Amount": 3.16, "Unit": "GJ"}},
    {{"Flow name": "CO2", "Input/Output": "Output", "Amount": 6720, "Unit": "kg"}},
    {{"Flow name": "CH4", "Input/Output": "Output", "Amount": 24.4, "Unit": "kg"}},
    {{"Flow name": "N2O", "Input/Output": "Output", "Amount": 0.00769, "Unit": "kg"}},
    {{"Flow name": "CO", "Input/Output": "Output", "Amount": 10.6, "Unit": "kg"}},
    {{"Flow name": "NOx", "Input/Output": "Output", "Amount": 3.53, "Unit": "kg"}},
    {{"Flow name": "SO2", "Input/Output": "Output", "Amount": 1.44, "Unit": "kg"}},
    {{"Flow name": "PM10", "Input/Output": "Output", "Amount": 2.92, "Unit": "kg"}},
    {{"Flow name": "VOC", "Input/Output": "Output", "Amount": 0.0858, "Unit": "kg"}}
    ]
    3. Question: <['- "Inputs": ["Coal": 3.17 t, "Methanol": 0.60 t, "NH3": 0.30 t, "Ethylene": 0.75 t, "Water": 3 t, "Electricity": 300 kWh, "4.2 MPa steam": 0.10 t, "1.5 MPa steam": 0.40 t, "0.5 MPa steam": 0.10 t]\n- "Outputs": ["Direct CO2 emissions": 0.94 t]']>
    Answer: 
    data_list = [
    {{"Flow name": "Coal", "Input/Output": "Input", "Amount": 3.17, "Unit": "t"}},
    {{"Flow name": "Methanol", "Input/Output": "Input", "Amount": 0.60, "Unit": "t"}},
    {{"Flow name": "NH3", "Input/Output": "Input", "Amount": 0.30, "Unit": "t"}},
    {{"Flow name": "Ethylene", "Input/Output": "Input", "Amount": 0.75, "Unit": "t"}},
    {{"Flow name": "Water", "Input/Output": "Input", "Amount": 3, "Unit": "t"}},
    {{"Flow name": "Electricity", "Input/Output": "Input", "Amount": 300, "Unit": "kWh"}},
    {{"Flow name": "4.2 MPa steam", "Input/Output": "Input", "Amount": 0.10, "Unit": "t"}},
    {{"Flow name": "1.5 MPa steam", "Input/Output": "Input", "Amount": 0.40, "Unit": "t"}},
    {{"Flow name": "0.5 MPa steam", "Input/Output": "Input", "Amount": 0.10, "Unit": "t"}},
    {{"Flow name": "Direct CO2 emissions", "Input/Output": "Output", "Amount": 0.94, "Unit": "t"}}
    ]
    """
        
    prompt_template = ChatPromptTemplate.from_messages(
    [
        ("system", template3),
        ("human", "Respond to question: {question}")
    ])

    # Insert a question into the template and call the model
    result3 = []
    
    query3 = f"Convert the list {raw_text} into defined data format"
    full_prompt = prompt_template.format_messages(question=query3)
    result = llm.invoke(full_prompt)
    result3.append(result)

    data = result3[0].content

    json_str = data.split('Data_list = ')[1].strip()

    # Convert the escaped new line characters and quotes to actual characters
    json_str = json_str.replace('\\n', '\n').replace('\\"', '"')
    
    # Load the JSON string into a Python list of dictionaries
    data_list = json.loads(json_str)

    
    return data_list

In [7]:
def df_to_csv(df, file_path):
    """Write a DataFrame to a CSV file"""
    df.to_csv(file_path, index=False, escapechar='\\')

In [8]:
def remove_vectordb (base_persist_directory, number_of_papers):
    """Remove vectordb from the given directory"""

    for i in range(number_of_papers):  # Iterate over all files
        persist_directory = os.path.join(base_persist_directory, str(i + 1))
        
        #remove the persist_directory and all its contents
        if os.path.exists(persist_directory):
            shutil.rmtree(persist_directory)
            print(f"Removed directory: {persist_directory}")

In [9]:
#remove vectordb from the directory
number_of_papers = 20
base_persist_directory = 'C:/Users/89751/LangChain-Practise/embedding/chroma/demo1'  #directory of the vector database
remove_vectordb (base_persist_directory, number_of_papers)

Removed directory: C:/Users/89751/LangChain-Practise/embedding/chroma/demo1\1


In [10]:
pdf_path = "C:/Users/89751/OneDrive/Desktop/2.pdf"
persist_directory = 'C:/Users/89751/LangChain-Practise/embedding/chroma/demo1/1'
table_number = "1"
page_numbers = [6]
result1, retriever = LCI_pathway_extraction(pdf_path, table_number, page_numbers, persist_directory)
print(result1)

  warn_deprecated(


['- "Pathway": ["Coal Gasification to MeOH (CGTM)"]\n- "Activity": ["Coal mining and processing"]']


In [11]:
table_number = "1"
pathway = "Coal Gasification to MeOH (CGTM)"
activity = ""
result2 = LCI_data_extraction(table_number, pathway, activity, retriever)
print(result2)

['- "Inputs": [\n    "Electricity": 53.67 kWh,\n    "Fresh water": 0.69 t,\n    "Hard coal": 1.533 t,\n    "Oil": 1.53 kg,\n    "Hard coal": 1.38 t,\n    "HP-steam": 1.77 t,\n    "Electricity": 1108.38 kWh,\n    "Air": 4348.2 kg,\n    "Water": 134.01 t,\n    "Ammonia": 0.03 t,\n    "Synthetic gas": 1.23 t,\n    "Electricity": 825 kWh,\n    "NaOH (10%)": 1.00 t,\n    "Water": 14.2 kg\n]\n- "Outputs": [\n    "Hard coal": 1.38 t,\n    "Gangue": 153 kg,\n    "Methane": 7.64 kg,\n    "Synthetic gas": 1.23 t,\n    "Slag": 0.15 t,\n    "MP-N2": 1523.7 kg,\n    "CO2": 1.74 kg,\n    "Sulphur": 30 kg,\n    "Methanol": 1.00 t,\n    "Flue gas": 0.07 t,\n    "Effluents": 0.266 t\n]']


In [12]:
result3 = convert_to_dataframe (result2)
result3

[{'Flow name': 'Electricity',
  'Input/Output': 'Input',
  'Amount': 53.67,
  'Unit': 'kWh'},
 {'Flow name': 'Fresh water',
  'Input/Output': 'Input',
  'Amount': 0.69,
  'Unit': 't'},
 {'Flow name': 'Hard coal',
  'Input/Output': 'Input',
  'Amount': 1.533,
  'Unit': 't'},
 {'Flow name': 'Oil', 'Input/Output': 'Input', 'Amount': 1.53, 'Unit': 'kg'},
 {'Flow name': 'Hard coal',
  'Input/Output': 'Input',
  'Amount': 1.38,
  'Unit': 't'},
 {'Flow name': 'HP-steam',
  'Input/Output': 'Input',
  'Amount': 1.77,
  'Unit': 't'},
 {'Flow name': 'Electricity',
  'Input/Output': 'Input',
  'Amount': 1108.38,
  'Unit': 'kWh'},
 {'Flow name': 'Air', 'Input/Output': 'Input', 'Amount': 4348.2, 'Unit': 'kg'},
 {'Flow name': 'Water',
  'Input/Output': 'Input',
  'Amount': 134.01,
  'Unit': 't'},
 {'Flow name': 'Ammonia',
  'Input/Output': 'Input',
  'Amount': 0.03,
  'Unit': 't'},
 {'Flow name': 'Synthetic gas',
  'Input/Output': 'Input',
  'Amount': 1.23,
  'Unit': 't'},
 {'Flow name': 'Electricity

In [13]:
df = pd.DataFrame(result3)
df

Unnamed: 0,Flow name,Input/Output,Amount,Unit
0,Electricity,Input,53.67,kWh
1,Fresh water,Input,0.69,t
2,Hard coal,Input,1.533,t
3,Oil,Input,1.53,kg
4,Hard coal,Input,1.38,t
5,HP-steam,Input,1.77,t
6,Electricity,Input,1108.38,kWh
7,Air,Input,4348.2,kg
8,Water,Input,134.01,t
9,Ammonia,Input,0.03,t


In [15]:
file_path = "C:/Users/89751/OneDrive/Desktop/LCA ontology/LCI_output.csv"
df_to_csv(df, file_path)