In [12]:
import os
import yaml
from pyprojroot import here
from langchain_chroma import Chroma
from langchain_community.document_loaders import PyPDFLoader
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from dotenv import load_dotenv


class PrepareVectorDB:
    """
    A class to prepare and manage a Vector Database (VectorDB) using documents from a specified directory.
    The class performs the following tasks:
    - Loads and splits documents (PDFs).
    - Splits the text into chunks based on the specified chunk size and overlap.
    - Embeds the document chunks using a specified embedding model.
    - Stores the embedded vectors in a persistent VectorDB directory.

    Attributes:
        doc_dir (str): Path to the directory containing documents (PDFs) to be processed.
        chunk_size (int): The maximum size of each chunk (in characters) into which the document text will be split.
        chunk_overlap (int): The number of overlapping characters between consecutive chunks.
        embedding_model (str): The name of the embedding model to be used for generating vector representations of text.
        vectordb_dir (str): Directory where the resulting vector database will be stored.
        collection_name (str): The name of the collection to be used within the vector database.

    Methods:
        path_maker(file_name: str, doc_dir: str) -> str:
            Creates a full file path by joining the given directory and file name.

        run() -> None:
            Executes the process of reading documents, splitting text, embedding them into vectors, and 
            saving the resulting vector database. If the vector database directory already exists, it skips
            the creation process.
    """

    def __init__(self,
                 doc_dir: str,
                 chunk_size: int,
                 chunk_overlap: int,
                 embedding_model: str,
                 vectordb_dir: str,
                 collection_name: str
                 ) -> None:

        self.doc_dir = doc_dir
        self.chunk_size = chunk_size
        self.chunk_overlap = chunk_overlap
        self.embedding_model = embedding_model
        self.vectordb_dir = vectordb_dir
        self.collection_name = collection_name

    def path_maker(self, file_name: str, doc_dir):
        """
        Creates a full file path by joining the provided directory and file name.

        Args:
            file_name (str): Name of the file.
            doc_dir (str): Path of the directory.

        Returns:
            str: Full path of the file.
        """
        return os.path.join(here(doc_dir), file_name)

    def run(self):
        """
        Executes the main logic to create and store document embeddings in a VectorDB.

        If the vector database directory doesn't exist:
        - It loads PDF documents from the `doc_dir`, splits them into chunks,
        - Embeds the document chunks using the specified embedding model,
        - Stores the embeddings in a persistent VectorDB directory.

        If the directory already exists, it skips the embedding creation process.

        Prints the creation status and the number of vectors in the vector database.

        Returns:
            None
        """
        if not os.path.exists(here(self.vectordb_dir)):
            # If it doesn't exist, create the directory and create the embeddings
            os.makedirs(here(self.vectordb_dir))
            print(f"Directory '{self.vectordb_dir}' was created.")

            file_list = os.listdir(here(self.doc_dir))
            docs = [PyPDFLoader(self.path_maker(
                fn, self.doc_dir)).load_and_split() for fn in file_list]
            docs_list = [item for sublist in docs for item in sublist]

            text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
                chunk_size=self.chunk_size, chunk_overlap=self.chunk_overlap
            )
            doc_splits = text_splitter.split_documents(docs_list)
            # Add to vectorDB
            vectordb = Chroma.from_documents(
                documents=doc_splits,
                collection_name=self.collection_name,
                embedding=OpenAIEmbeddings(model=self.embedding_model),
                persist_directory=str(here(self.vectordb_dir))
            )
            print("VectorDB is created and saved.")
            print("Number of vectors in vectordb:",
                  vectordb._collection.count(), "\n\n")
        else:
            print(f"Directory '{self.vectordb_dir}' already exists.")


if __name__ == "__main__":
    load_dotenv()
    os.environ['OPENAI_API_KEY'] = os.getenv("OPENAI_API_KEY")

    with open(here("configs/app_config.yml")) as cfg:
        app_config = yaml.load(cfg, Loader=yaml.FullLoader)
        print("app_config loaded:", app_config)

    # Uncomment the following configs to run for swiss airline policy document
    chunk_size = app_config["paper_rag_configs"]["chunk_size"]
    chunk_overlap = app_config["paper_rag_configs"]["chunk_overlap"]
    embedding_model = app_config["paper_rag_configs"]["embedding_model"]
    vectordb_dir = app_config["paper_rag_configs"]["vectordb"]
    collection_name = app_config["paper_rag_configs"]["collection_name"]
    doc_dir = app_config["paper_rag_configs"]["unstructured_docs"]

    prepare_db_instance = PrepareVectorDB(
        doc_dir=doc_dir,
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,
        embedding_model=embedding_model,
        vectordb_dir=vectordb_dir,
        collection_name=collection_name)

    prepare_db_instance.run()


app_config loaded: {'directories': {'stored_csv_xlsx_directory': 'data/csv_xlsx', 'sqldb_directory': 'data/sqldb.db', 'uploaded_files_sqldb_directory': 'data/uploaded_files_sqldb.db', 'stored_csv_xlsx_sqldb_directory': 'data/csv_xlsx_sqldb.db', 'persist_directory': 'data/papers_vectordb'}, 'llm_config': {'agent_llm_system_role': 'Given the following user question, corresponding SQL query, and SQL result, answer the user question.\n Question: {question}\n SQL Query: {query}\n SQL Result: {result}\n Answer: ', 'rag_llm_system_role': "You will recieve the user's question along with the search results of that question over a database. Give the user the proper answer.", 'engine': 'gpt-4', 'temperature': 0.0}, 'comments_rag_config': {'collection_name': 'comments_energy', 'top_k': 1}, 'content_table_rag_config': {'unstructured_docs': 'data/table-definitions', 'vectordb': 'data/table_definitions_vectordb', 'collection_name': 'table-definitions-rag-chroma', 'llm': 'gpt-4', 'llm_temperature': 0.

In [13]:
import sys
import os
from pyprojroot import here
# Add the src directory to the Python path
sys.path.append(os.path.join(here(), "src"))
sys.path.append(str(here()))
from langchain_community.document_loaders import PyPDFLoader
from src.utils.load_config import LoadConfig
cfg = LoadConfig()
print("tenant:", getattr(cfg.chroma_client, "tenant", None))
print("database:", getattr(cfg.chroma_client, "database", None))

collections = cfg.chroma_client.list_collections()
print("collections:", collections)
print("Available collections:")
for c in collections:
    print("-", c)

tenant: default_tenant
database: default_database
collections: ['synthetis-papers']
Available collections:
- synthetis-papers


In [14]:
# Load the OpenAI API key from the environment variable
from dotenv import load_dotenv
load_dotenv()
# Set the OpenAI API key
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
from openai import OpenAI
import os
# Set the OpenAI API key
model_name = "gpt-4"
openai_api_key = os.environ["OPENAI_API_KEY"]

# instantiate the OpenAI client
client = OpenAI()

In [15]:
from src.utils.load_config import LoadConfig
cfg = LoadConfig()
print("tenant:", getattr(cfg.chroma_client, "tenant", None))
print("database:", getattr(cfg.chroma_client, "database", None))

collections = cfg.chroma_client.list_collections()
print("collections:", collections)
print("Available collections:")
for c in collections:
    print("-", c)

tenant: default_tenant
database: default_database
collections: ['synthetis-papers']
Available collections:
- synthetis-papers


In [16]:

vectordb = Chroma(
    collection_name=collection_name,
    persist_directory=str(here(vectordb_dir)),
    embedding_function=OpenAIEmbeddings(model=embedding_model)
)


In [17]:
# instantiate the OpenAI client
results = vectordb.similarity_search("Experimental synthesis procedure describing the preparation of materials, including chemical precursor mixing, dissolution, heating, annealing, combustion, quenching, washing, filtration steps, with specific temperatures, times, and atmosphere conditions.", k=5)
results

[Document(id='595441b5-0434-440b-aea7-052502ec53bf', metadata={'creationdate': '2016-04-04T15:36:18-04:00', 'creator': 'Arbortext Advanced Print Publisher 10.0.1465/W Unicode', 'jcalibrarybuild': '3.6.i11', 'moddate': '2025-04-28T12:24:04-07:00', 'page': 2, 'page_label': '3', 'producer': 'Acrobat Distiller 8.1.0 (Windows); modified using iText 4.2.0 by 1T3XT', 'source': '/Users/arashkhajeh/GitHub/LLM-for-Materials/data/papers/paper_1.pdf', 'title': 'cm5b04557 1..11', 'total_pages': 11}, page_content='electron microscopy (FESEM) was performed using a Zeiss Supra 55\nFESEM. A Dual-view Optima 5300 DV system was used for ICP-OES.\nEnergy dispersive X-ray spectroscopy (EDS) was performed on a\nJEOL JEM-2100. For the atomic absorption spectroscopy (AAS) a\nVarian SpectrAA 220FS spectrometer was used.\nElectrochemistry. Galvanostatic cycling was performed on a\nNeware battery tester system in a coin cell setup. Galvanostatic cycling\nfor theoperando XRD was conducted on a MTI CT-3008W-5 V1 m

In [18]:
# Create query embedding using OpenAI
# instantiate the OpenAI client
client = OpenAI()
query_texts = "synthesis dissolve mix stir heat combust anneal quench filter wash precursor powder chemical solution combustion solid-state hydrothermal sol-gel temperature time atmosphere air nitrogen argon"
response = client.embeddings.create(
        input = query_texts,
        model= "text-embedding-ada-002"
    )
query_embeddings = response.data[0].embedding
print("Query embedding:", query_embeddings)

Query embedding: [0.004105729050934315, 0.01737852953374386, -0.027043886482715607, -0.0023224842734634876, -0.015153313986957073, 0.02053205668926239, -0.00969266053289175, -0.00905785895884037, -0.03167179226875305, -0.024245301261544228, -0.0010605612769722939, 0.012402509339153767, 0.0011219936423003674, 0.006402616389095783, -0.0030835627112537622, -0.006467461585998535, 0.025624116882681847, 0.023890359327197075, 0.015385391190648079, 0.0056893182918429375, -0.02042284421622753, -0.0009052740060724318, -0.0017004816327244043, -0.015726681798696518, -0.024436425417661667, 0.006337771192193031, 0.01788363978266716, -0.024914231151342392, -0.008812129497528076, 0.00809541903436184, 0.0028548978734761477, -0.0002450894971843809, -0.021173683926463127, -0.012327425181865692, -0.006150061264634132, -0.0069043138064444065, -0.0211463812738657, 0.012614109553396702, 0.0025903976056724787, -0.01680516079068184, 0.006815578322857618, -0.02820427529513836, 0.0020409193821251392, 0.015412694

In [19]:
results_with_scores = vectordb.similarity_search_by_vector_with_relevance_scores(
    embedding=query_embeddings,
    k=5
)

for i, (doc, score) in enumerate(results_with_scores):
    print(f"\nResult {i+1}:")
    print(f"Relevance Score: {score:.4f}")
    print(f"Content: {doc.page_content[:300]}...")


Result 1:
Relevance Score: 0.4067
Content: mixed (molar ratio Na:Mn = 0.7) and subsequently dissolved in
deionized water. For NCO, additional Co(NO3)2 was added, such that
the molar ratio of Na:Co:Mn was 0.7:0.1:0.9. Concentrated HNO3
(≥69%, Honeywell) was added, followed by 1.5 g of gelatin. The
solution was heated until spontaneous combust...

Result 2:
Relevance Score: 0.4084
Content: electron microscopy (FESEM) was performed using a Zeiss Supra 55
FESEM. A Dual-view Optima 5300 DV system was used for ICP-OES.
Energy dispersive X-ray spectroscopy (EDS) was performed on a
JEOL JEM-2100. For the atomic absorption spectroscopy (AAS) a
Varian SpectrAA 220FS spectrometer was used.
Ele...

Result 3:
Relevance Score: 0.4333
Content: cells with a 3 mm hole drilled through the casing. Subsequently, they
were sealed by Kapton foil with a thickness of 25μm and epoxy resin.
A Na-metal ring was used as anode (Ø = 16 mm, hole = 3 mm in
diameter). To ensure a suﬃcient amount of active material in

In [120]:
# Print the first result with its score
doc, score = results_with_scores[2]
print("Similarity Score:", score)
print("Content:", doc.page_content)

Similarity Score: 1.957720934864532
Content: mixed (molar ratio Na:Mn = 0.7) and subsequently dissolved in
deionized water. For NCO, additional Co(NO3)2 was added, such that
the molar ratio of Na:Co:Mn was 0.7:0.1:0.9. Concentrated HNO3
(≥69%, Honeywell) was added, followed by 1.5 g of gelatin. The
solution was heated until spontaneous combustion occurred. The
resulting dark brownish powder was annealed at 800 °C for 4 h,
followed by another step at 610°C for 9 h and quenching to room
temperature. Elemental composition, as determined by inductively
coupled plasma optical emission spectrometry (ICP-OES), was
Na0.6MnO2+z for the NMO ﬂakes and Na0.6Co0.1Mn0.9O2+z for the
NCO ﬂakes; “z” in the above-mentioned formula units accounts for
Mn-vacancies and is typically between 0.05 and 0.25 for the P2
phase.13 Spherical NMO was synthesized as reported previously, with
a slightly modiﬁed annealing procedure.35 NH4HCO3 was dissolved in
deionized water, followed by a dropwise addition of ethanol 

# FAISS pipeline from Eunomia

In [26]:
from langchain.schema import Document
from typing import List, Optional
from copy import deepcopy


class LoadDoc:
    """
    A class to handle the loading and processing of different Docs.

    Attributes:
    paper_id : str
        ID of the paper to be loaded.
    paper_path : str
        Path of the paper to be loaded.
    loader : PyPDFLoader
        Instance of PyPDFLoader to load the document.
    pages : list
        Pages of the loaded document.
    """

    def __init__(self, file_name: str = None, text_input: str = None, **kwargs):
        """
        Parameters:
        file_name : str
            Path to file.
        text_input : str
            Direct text input.
        **kwargs are passed to the CSVLoader class.
        """
        if file_name is None and text_input is None:
            raise ValueError("Either 'file_name' or 'text_input' must be provided.")
        elif file_name and text_input:
            raise ValueError(
                "Only one of 'file_name' or 'text_input' should be provided as input."
            )

        if file_name:
            extension = file_name.split(".")[-1].lower()
            self._check_extension(extension)
            self.doc_path = file_name
            if self.type == "pdf":
                from langchain.document_loaders import PyPDFLoader

                self.loader = PyPDFLoader(file_name)
            if self.type == "md":
                from langchain.document_loaders import UnstructuredMarkdownLoader

                self.loader = UnstructuredMarkdownLoader(file_name)
            if self.type == "csv":
                from langchain.document_loaders.csv_loader import CSVLoader

                self.loader = CSVLoader(file_name, **kwargs)
            if self.type == "txt":
                from langchain.document_loaders import TextLoader

                self.loader = TextLoader(file_name)
            self.pages = self.loader.load_and_split()
        else:
            self.pages = [
                Document(page_content=text_input, metadata={"source": "local"})
            ]

    def _check_extension(self, extension: str):
        """
        Checks the provided file extension against the supported extensions.

        Parameters:
        extension : str
            File extension to check.

        Raises:
        Exception:
            If the file extension is not supported.
        NotImplementedError:
            If the file extension is 'xml', which is not yet implemented.
        """
        supported_extensions = {"pdf", "txt", "md", "csv"}
        if extension in supported_extensions:
            self.type = extension
        elif extension == "xml":
            raise NotImplementedError
        else:
            raise Exception(f"Eunomia supports {supported_extensions} doc files.")

    @staticmethod
    def cut_text(text: str, keywords: List[str]) -> str:
        """
        Cuts the given text up to the first found keyword.

        Parameters:
        text : str
            The text to be cut.
        keywords : List[str]
            List of keywords to find in the text.

        Returns:
        str
            The cut text.
        """
        lower_text = text.lower()
        indices = [
            lower_text.find(keyword)
            for keyword in keywords
            if lower_text.find(keyword) != -1
        ]
        min_index = min(indices)
        return text[:min_index].strip()  # remove any trailing spaces

    @staticmethod
    def find_in_document(document: Document, search_strings: List[str]) -> bool:
        """
        Searches for the given strings in the document content.

        Parameters:
        document : Document
            Document in which to search.
        search_strings : List[str]
            List of strings to search for.

        Returns:
        bool
            True if any of the search strings are found, False otherwise.
        """
        return any(
            search_string.lower() in document.page_content.lower()
            for search_string in search_strings
        )

    def filter_documents(
        self, documents: List[Document], search_strings: List[str]
    ) -> List[Document]:
        """
        Filters documents based on the presence of search strings.
        use this if you wish to remove "Acknowledgments or "References"
        in a long research article.

        Parameters:
        documents : List[Document]
            List of documents to filter.
        search_strings : List[str]
            List of strings to search for.

        Returns:
        List[Document]
            List of filtered documents.
        """
        filtered_documents = deepcopy(documents)  # Create a deep copy of documents
        for i, doc in enumerate(filtered_documents):
            if self.find_in_document(doc, search_strings):
                filtered_documents[i].page_content = self.cut_text(
                    filtered_documents[i].page_content,
                    keywords=search_strings,
                )
                filtered_documents = filtered_documents[: i + 1]
                break
        return filtered_documents

    def process(
        self,
        filter_words: List[str] = [],
        chunk_size: Optional[int] = None,
        chunk_overlap: Optional[int] = 0,
        chunking_type="fixed-size",
    ) -> List[Document]:
        """
        Process the document pages based on the search strings. Additionally, this function
        will split the document into chunks if a chunk size is provided.

        Parameters:
        filter_words : List[str]
            List of words to search and filter.
        chunk_size : Optional[int]
            The size of the chunks in which the document will be split. If this parameter
            is not provided, the document will not be split into chunks.
        chunk_overlap : Optional[int]
            The size of the overlap between chunks. If chunk_size is not provided, this
            parameter will not be used.

        Returns:
        List[Document]
            List of processed document chunks.
        """
        sliced_pages = self.filter_documents(self.pages, filter_words)
        text_splitter = None
        if chunk_size is not None:
            if chunking_type == "fixed-size":
                from langchain.text_splitter import (
                    CharacterTextSplitter,
                )

                text_splitter = CharacterTextSplitter(
                    chunk_size=chunk_size, chunk_overlap=chunk_overlap
                )
            if chunking_type == "latex":
                from langchain.text_splitter import LatexTextSplitter

                text_splitter = LatexTextSplitter(
                    chunk_size=chunk_size, chunk_overlap=chunk_overlap
                )
            if chunking_type == "NLTK":
                from langchain.text_splitter import NLTKTextSplitter

                text_splitter = NLTKTextSplitter(
                    chunk_size=chunk_size, chunk_overlap=chunk_overlap
                )
            if chunking_type == "spacy":
                from langchain.text_splitter import SpacyTextSplitter

                text_splitter = SpacyTextSplitter(
                    chunk_size=chunk_size, chunk_overlap=chunk_overlap
                )

            sliced_pages = text_splitter.split_documents(sliced_pages)

        return sliced_pages

In [101]:
paper_id = 1
data_dir = str(here('data/papers'))
paper_file = data_dir + f"/paper_{paper_id}.pdf"
print("paper_file:", paper_file)
docs_processor = LoadDoc(file_name=paper_file, encoding="utf8")
sliced_pages = docs_processor.process(['references ', 'acknowledgement', 'acknowledgments', 'references\n'],
                                              chunk_size=500, chunk_overlap=100, chunking_type='fixed-size')

paper_file: /Users/arashkhajeh/GitHub/LLM-for-Materials/data/papers/paper_1.pdf


In [102]:
sliced_pages

[Document(metadata={'producer': 'Acrobat Distiller 8.1.0 (Windows); modified using iText 4.2.0 by 1T3XT', 'creator': 'Arbortext Advanced Print Publisher 10.0.1465/W Unicode', 'creationdate': '2016-04-04T15:36:18-04:00', 'moddate': '2025-04-28T12:24:04-07:00', 'title': 'cm5b04557 1..11', 'jcalibrarybuild': '3.6.i11', 'source': '/Users/arashkhajeh/GitHub/LLM-for-Materials/data/papers/paper_1.pdf', 'total_pages': 11, 'page': 0, 'page_label': '1'}, page_content='P2−NaxCoyMn1−yO2 (y = 0, 0.1) as Cathode Materials in Sodium-Ion\nBatteries\ue0d5Eﬀects of Doping and Morphology To Enhance Cycling\nStability\nNicolas Bucher,∇,†,‡,§ Steﬀen Hartung,∇,†,‡,§ Joseph B. Franklin,∥ Anna M. Wise,⊥ Linda Y. Lim,⊥,#\nHan-Yi Chen,†,‡,§ Johanna Nelson Weker,⊥ Michael F. Toney,⊥ and Madhavi Srinivasan*,†,§,∥\n†TUM CREATE, Singapore 138602, Singapore\n‡Technical University of Munich, Garching 85748, Germany\n§School of Materials Science and Engineering, Nanyang Technological University, Singapore 639798, Sing

In [103]:
from langchain.vectorstores import FAISS
from langchain.embeddings.openai import OpenAIEmbeddings

Embedding_model = 'text-embedding-ada-002' 
faiss_index = FAISS.from_documents(sliced_pages, OpenAIEmbeddings(model=Embedding_model))
faiss_index

<langchain_community.vectorstores.faiss.FAISS at 0x1785badb0>

In [104]:
from langchain.vectorstores import FAISS
from langchain.embeddings.openai import OpenAIEmbeddings

# Assume faiss_index is already created as you did before

def retrieve_text(faiss_index, query: str, top_k: int = 3):
    """
    Retrieve the top_k most relevant chunks from the FAISS index based on a query.
    
    Args:
        faiss_index: A FAISS vectorstore object already built.
        query (str): The query string.
        top_k (int): Number of top results to return.

    Returns:
        List of (Document, similarity score) pairs.
    """
    # Search the FAISS index
    results = faiss_index.similarity_search_with_score(query, k=top_k)

    # Print the top results
    for i, (doc, score) in enumerate(results):
        print(f"\nResult {i+1} (score={score:.4f}):\n{doc.page_content}\n")
    
    return results

# Example usage:
query = "synthesis dissolve mix stir heat combust anneal quench filter wash precursor powder chemical solution combustion solid-state hydrothermal sol-gel temperature time atmosphere air nitrogen argon"
results = retrieve_text(faiss_index, query)




Result 1 (score=0.4174):
mixed (molar ratio Na:Mn = 0.7) and subsequently dissolved in
deionized water. For NCO, additional Co(NO3)2 was added, such that
the molar ratio of Na:Co:Mn was 0.7:0.1:0.9. Concentrated HNO3
(≥69%, Honeywell) was added, followed by 1.5 g of gelatin. The
solution was heated until spontaneous combustion occurred. The
resulting dark brownish powder was annealed at 800 °C for 4 h,
followed by another step at 610°C for 9 h and quenching to room
temperature. Elemental composition, as determined by inductively
coupled plasma optical emission spectrometry (ICP-OES), was
Na0.6MnO2+z for the NMO ﬂakes and Na0.6Co0.1Mn0.9O2+z for the
NCO ﬂakes; “z” in the above-mentioned formula units accounts for
Mn-vacancies and is typically between 0.05 and 0.25 for the P2
phase.13 Spherical NMO was synthesized as reported previously, with
a slightly modiﬁed annealing procedure.35 NH4HCO3 was dissolved in
deionized water, followed by a dropwise addition of ethanol (10%
volume of the 

In [108]:
results[0]

(Document(id='3af916d7-802f-4fd8-b8f9-8fc987fb6eca', metadata={'producer': 'Acrobat Distiller 8.1.0 (Windows); modified using iText 4.2.0 by 1T3XT', 'creator': 'Arbortext Advanced Print Publisher 10.0.1465/W Unicode', 'creationdate': '2016-04-04T15:36:18-04:00', 'moddate': '2025-04-28T12:24:04-07:00', 'title': 'cm5b04557 1..11', 'jcalibrarybuild': '3.6.i11', 'source': '/Users/arashkhajeh/GitHub/LLM-for-Materials/data/papers/paper_1.pdf', 'total_pages': 11, 'page': 1, 'page_label': '2'}, page_content='mixed (molar ratio Na:Mn = 0.7) and subsequently dissolved in\ndeionized water. For NCO, additional Co(NO3)2 was added, such that\nthe molar ratio of Na:Co:Mn was 0.7:0.1:0.9. Concentrated HNO3\n(≥69%, Honeywell) was added, followed by 1.5 g of gelatin. The\nsolution was heated until spontaneous combustion occurred. The\nresulting dark brownish powder was annealed at 800 °C for 4 h,\nfollowed by another step at 610°C for 9 h and quenching to room\ntemperature. Elemental composition, as det

mixed (molar ratio Na:Mn = 0.7) and subsequently dissolved in
deionized water. For NCO, additional Co(NO3)2 was added, such that
the molar ratio of Na:Co:Mn was 0.7:0.1:0.9. Concentrated HNO3
(≥69%, Honeywell) was added, followed by 1.5 g of gelatin. The
solution was heated until spontaneous combustion occurred. The
resulting dark brownish powder was annealed at 800 °C for 4 h,
followed by another step at 610°C for 9 h and quenching to room
temperature. Elemental composition, as determined by inductively
coupled plasma optical emission spectrometry (ICP-OES), was
Na0.6MnO2+z for the NMO ﬂakes and Na0.6Co0.1Mn0.9O2+z for the
NCO ﬂakes; “z” in the above-mentioned formula units accounts for
Mn-vacancies and is typically between 0.05 and 0.25 for the P2
phase.13 Spherical NMO was synthesized as reported previously, with
a slightly modiﬁed annealing procedure.35 NH4HCO3 was dissolved in
deionized water, followed by a dropwise addition of ethanol (10%
volume of the NH4HCO3 solution) and a so

In [106]:
print(results[1][0].page_content)

electron microscopy (FESEM) was performed using a Zeiss Supra 55
FESEM. A Dual-view Optima 5300 DV system was used for ICP-OES.
Energy dispersive X-ray spectroscopy (EDS) was performed on a
JEOL JEM-2100. For the atomic absorption spectroscopy (AAS) a
Varian SpectrAA 220FS spectrometer was used.
Electrochemistry. Galvanostatic cycling was performed on a
Neware battery tester system in a coin cell setup. Galvanostatic cycling
for theoperando XRD was conducted on a MTI CT-3008W-5 V1 mA-
S1 instrument. It should be noted that during the initial charge process
for the NCO spheres an interruption in the operando experiment
occurred due to technical diﬃculties. In typical coin cell tests, Co-
doped and undoped spheres show similar initial capacities (Figure 2).
GITT was performed on a Biologic VMP3 potentiostat. A constant
current pulse of 50 mA was applied for 200 s, followed by an OCV
relaxation period of 5 h. When the voltage changed by less than 2 mV
h−1 the next current pulse was applie

In [107]:
print(results[2][0].page_content)

promise regarding cycling stability and capacity.18−26 In these
Received: November 23, 2015
Revised: March 14, 2016
Published: March 15, 2016
Article
pubs.acs.org/cm
© 2016 American Chemical Society 2041 DOI: 10.1021/acs.chemmater.5b04557
Chem. Mater. 2016, 28, 2041−2051
Downloaded via TOYOTA MOTOR MFG & ENGRG NORTH AMER INC on April 28, 2025 at 19:24:04 (UTC).
See https://pubs.acs.org/sharingguidelines for options on how to legitimately share published articles.


In [111]:
# Print the top results
for i, (doc, score) in enumerate(results):
    print(f"\nResult {i+1} (score={score:.4f}):\n{doc.page_content[:100]}\n")


Result 1 (score=0.4174):
mixed (molar ratio Na:Mn = 0.7) and subsequently dissolved in
deionized water. For NCO, additional C


Result 2 (score=0.4327):
electron microscopy (FESEM) was performed using a Zeiss Supra 55
FESEM. A Dual-view Optima 5300 DV s


Result 3 (score=0.4435):
promise regarding cycling stability and capacity.18−26 In these
Received: November 23, 2015
Revised:

