In [1]:
import nest_asyncio

from llama_index.embeddings import HuggingFaceEmbedding

nest_asyncio.apply()

embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from llama_index.llms import LlamaCPP

# model_url = "https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/resolve/main/zephyr-7b-beta.Q4_K_M.gguf"

llm = LlamaCPP(
    # model_url="./llm/zephyr-7b-beta.Q4_K_M.gguf",
    # optionally, you can set the path to a pre-downloaded model instead of model_url
    model_path="./llm/llama-2-7b-chat.Q4_K_M.gguf",
    temperature=0.1,
    max_new_tokens=256,
    # llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room
    context_window=3900,
    # kwargs to pass to __call__()
    generate_kwargs={},
    # kwargs to pass to __init__()
    # set to at least 1 to use GPU
    model_kwargs={"n_gpu_layers": 1},
    verbose=True,
)

AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 | 


In [3]:
from llama_index import ServiceContext
from llama_index.callbacks import CallbackManager, LlamaDebugHandler

llama_debug = LlamaDebugHandler(print_trace_on_end=True)
callback_manager = CallbackManager([llama_debug])
service_context = ServiceContext.from_defaults(
    llm=llm, 
    embed_model=embed_model, 
    callback_manager=callback_manager,
)

In [4]:
import re
import glob
import chromadb

from llama_index import SimpleDirectoryReader, VectorStoreIndex
from llama_index.tools import QueryEngineTool, ToolMetadata
from llama_index.vector_stores import ChromaVectorStore
from llama_index.storage.storage_context import StorageContext

def parse_string(input_string):
    parsed_string = input_string.replace(' ', '_')
    parsed_string = parsed_string.replace('-', '_')
    parsed_string = re.sub(r'[^a-z0-9_]', '', parsed_string.lower())
    parsed_string = re.sub(r'(_)\1+', r'\1', parsed_string)
    
    return parsed_string

input_dir = "./ainu_papers/"
supported_files = [".pdf", ".csv", ".docx", ".txt", ".epub", ".hwp", ".mbox", ".ppt", ".pptm", ".pptx", ".ipynb", ".md"]
files = [f for f in glob.glob(f"{input_dir}**/*", recursive=True) if f".{f.split('.')[-1]}" in supported_files]

db = chromadb.PersistentClient(path="./db")
query_engine_tools = []

for file in files:
    new = False
    title = " ".join(file.replace("\\", "/").split("/")[-1].split(".")[:-1])
    collection_name = parse_string(title)[:63]
    print(f"Loading file titled: {title}")
    try:
        chroma_collection = db.get_collection(collection_name)
    except:
        new = True
        documents = SimpleDirectoryReader(input_files=[file]).load_data()
        chroma_collection = db.create_collection(collection_name)

    print(f"Storing and Indexing file title: {title}")
    vector_store = ChromaVectorStore(chroma_collection=chroma_collection)

    if new:
        storage_context = StorageContext.from_defaults(vector_store=vector_store)
        index = VectorStoreIndex.from_documents(
            documents, 
            storage_context=storage_context, 
            service_context=service_context,
            use_async=True,
        )
    else:
        index = VectorStoreIndex.from_vector_store(
        vector_store,
        service_context=service_context,
        use_async=True,
    )
        
    print(f"Init query engine and add to qe_tools")
    # Query Data from the persisted index
    query_engine = index.as_query_engine()
    query_engine_tools.append(
        QueryEngineTool(
            query_engine=query_engine,
            metadata=ToolMetadata(
                name=collection_name,
                description=f"A Paper titled: {title}",
            )
        )
    )

Loading file titled: Grain size strengthening in terms of dislocation density measured by resistivity
Storing and Indexing file title: Grain size strengthening in terms of dislocation density measured by resistivity
**********
Trace: index_construction
**********
Init query engine and add to qe_tools
Loading file titled: [Armstrong] 60 Years of Hall-Petch - Past to Present Nano-Scale Connections
Storing and Indexing file title: [Armstrong] 60 Years of Hall-Petch - Past to Present Nano-Scale Connections
**********
Trace: index_construction
**********
Init query engine and add to qe_tools


In [5]:
from llama_index.query_engine import SubQuestionQueryEngine

subq_engine = SubQuestionQueryEngine.from_defaults(
    query_engine_tools=query_engine_tools,
    service_context=service_context,
    use_async=True
)

In [6]:
response = subq_engine.query("Why Nickel is chosen as material for the experimentation?")
print(response)

Generated 1 sub questions.
[1;3;38;2;237;90;200m[grain_size_strengthening_in_terms_of_dislocation_density_measur] Q: What are the advantages of using nickel in experimental studies?
[0m

Llama.generate: prefix-match hit


[1;3;38;2;237;90;200m[grain_size_strengthening_in_terms_of_dislocation_density_measur] A:  Nickel has a high stacking fault energy, which makes it suitable for measuring dislocation density by resistivity.
[0m

Llama.generate: prefix-match hit


**********
Trace: query
    |_CBEventType.QUERY ->  178.281414 seconds
      |_CBEventType.TEMPLATING ->  0.0 seconds
      |_CBEventType.LLM ->  39.038289 seconds
      |_CBEventType.SUB_QUESTION ->  130.008607 seconds
        |_CBEventType.QUERY ->  130.007607 seconds
          |_CBEventType.RETRIEVE ->  0.599619 seconds
            |_CBEventType.EMBEDDING ->  0.552491 seconds
          |_CBEventType.SYNTHESIZE ->  129.407988 seconds
            |_CBEventType.TEMPLATING ->  0.0 seconds
            |_CBEventType.LLM ->  129.397978 seconds
            |_CBEventType.LLM ->  129.397978 seconds
      |_CBEventType.SYNTHESIZE ->  9.233516 seconds
        |_CBEventType.TEMPLATING ->  0.0 seconds
        |_CBEventType.LLM ->  9.231398 seconds
**********
 Nickel has a high stacking fault energy, which makes it suitable for measuring dislocation density by resistivity.


In [7]:
# iterate through sub_question items captured in SUB_QUESTION event
from llama_index.callbacks.schema import CBEventType, EventPayload

for i, (start_event, end_event) in enumerate(
    llama_debug.get_event_pairs(CBEventType.SUB_QUESTION)
):
    qa_pair = end_event.payload[EventPayload.SUB_QUESTION]
    print("Sub Question " + str(i) + ": " + qa_pair.sub_q.sub_question.strip())
    print("Answer: " + qa_pair.answer.strip())
    print("====================================")

Sub Question 0: What are the advantages of using nickel in the experiment
Answer: There are several advantages to using nickel in the experiment:

1. Nickel has a high stacking fault energy, which makes it an ideal material for studying dislocation density and its effect on flow stress.
2. Electrical resistivity measurements can be used to determine dislocation density up to larger strains than transmission electron microscopy.
3. The experiment allows for the measurement of dislocation density in a wider range of grain sizes than previously possible.
4. The relationship between dislocation density and flow stress is independent of grain size, deformation temperature, and plastic strain, making it possible to obtain a general equation for the Hall-Petch relation.
