In [67]:
import langchain
from langchain.chains import RetrievalQA
from langchain.document_loaders import ArxivLoader
from langchain.embeddings import LlamaCppEmbeddings
from langchain.indexes import VectorstoreIndexCreator
from langchain.llms import LlamaCpp
from langchain.prompts import PromptTemplate

In [68]:
# Globals
instruction_model_path = "/Users/bsantanna/dev/workspace/community/the_bloke/llama-2-7b-32k-instruct.Q5_K_M.gguf"
n_gpu_layers = 1
n_batch = 512
n_ctx = 4096

In [69]:
# initialize embeddings model
embedding = LlamaCppEmbeddings(
    model_path=instruction_model_path,
    n_gpu_layers=n_gpu_layers,
    n_batch=n_batch,
    f16_kv=True)

llama_model_loader: loaded meta data with 20 key-value pairs and 291 tensors from /Users/bsantanna/dev/workspace/community/the_bloke/llama-2-7b-32k-instruct.Q5_K_M.gguf (version GGUF V2 (latest))
llama_model_loader: - tensor    0:                token_embd.weight q5_K     [  4096, 32000,     1,     1 ]
llama_model_loader: - tensor    1:              blk.0.attn_q.weight q5_K     [  4096,  4096,     1,     1 ]
llama_model_loader: - tensor    2:              blk.0.attn_k.weight q5_K     [  4096,  4096,     1,     1 ]
llama_model_loader: - tensor    3:              blk.0.attn_v.weight q6_K     [  4096,  4096,     1,     1 ]
llama_model_loader: - tensor    4:         blk.0.attn_output.weight q5_K     [  4096,  4096,     1,     1 ]
llama_model_loader: - tensor    5:            blk.0.ffn_gate.weight q5_K     [  4096, 11008,     1,     1 ]
llama_model_loader: - tensor    6:            blk.0.ffn_down.weight q6_K     [ 11008,  4096,     1,     1 ]
llama_model_loader: - tensor    7:              

In [70]:
# Load LLM instruction following model
llm = LlamaCpp(
    model_path=instruction_model_path,
    n_gpu_layers=n_gpu_layers,
    n_batch=n_batch,
    n_ctx=n_ctx,
    f16_kv=True)

llama_model_loader: loaded meta data with 20 key-value pairs and 291 tensors from /Users/bsantanna/dev/workspace/community/the_bloke/llama-2-7b-32k-instruct.Q5_K_M.gguf (version GGUF V2 (latest))
llama_model_loader: - tensor    0:                token_embd.weight q5_K     [  4096, 32000,     1,     1 ]
llama_model_loader: - tensor    1:              blk.0.attn_q.weight q5_K     [  4096,  4096,     1,     1 ]
llama_model_loader: - tensor    2:              blk.0.attn_k.weight q5_K     [  4096,  4096,     1,     1 ]
llama_model_loader: - tensor    3:              blk.0.attn_v.weight q6_K     [  4096,  4096,     1,     1 ]
llama_model_loader: - tensor    4:         blk.0.attn_output.weight q5_K     [  4096,  4096,     1,     1 ]
llama_model_loader: - tensor    5:            blk.0.ffn_gate.weight q5_K     [  4096, 11008,     1,     1 ]
llama_model_loader: - tensor    6:            blk.0.ffn_down.weight q6_K     [ 11008,  4096,     1,     1 ]
llama_model_loader: - tensor    7:              

In [71]:
# Initialize documents
loader = ArxivLoader(query="dynamic scaling in self-similar systems", load_max_docs=20)
documents = loader.load()

In [72]:
# Initialize index
index = VectorstoreIndexCreator(embedding=embedding).from_documents(documents)


llama_print_timings:        load time =  1329.42 ms
llama_print_timings:      sample time =     0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_print_timings: prompt eval time =  1328.58 ms /   283 tokens (    4.69 ms per token,   213.01 tokens per second)
llama_print_timings:        eval time =     0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_print_timings:       total time =  1330.44 ms

llama_print_timings:        load time =  1329.42 ms
llama_print_timings:      sample time =     0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_print_timings: prompt eval time =   990.27 ms /   242 tokens (    4.09 ms per token,   244.38 tokens per second)
llama_print_timings:        eval time =     0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_print_timings:       total time =   991.84 ms

llama_print_timings:        load time =  1329.42 ms
llama_print_timings:   

In [73]:
# initialize retriever
retriever = index.vectorstore.as_retriever()

In [74]:
# Initialize prompt 
prompt_template = """[INST]
Use the following pieces of context to choose the best answer the question at the end.

{context}

Question:

{question}
[/INST]"""

prompt = PromptTemplate(
    template=prompt_template,
    input_variables=["context", "question"]
)

In [75]:
# initialize retrieval qa chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    chain_type_kwargs={
        'prompt': prompt
    },
)

In [76]:
# Format input
problem = """Which of the following is an accurate definition of dynamic scaling in self-similar systems?"""
alternatives = """
A - Dynamic scaling refers to the evolution of self-similar systems, where data obtained from snapshots at fixed times exhibits similarity to the respective data taken from snapshots of any earlier or later time. This similarity is tested by a certain time-dependent stochastic variable x.

B - Dynamic scaling refers to the non-evolution of self-similar systems, where data obtained from snapshots at fixed times is similar to the respective data taken from snapshots of any earlier or later time. This similarity is tested by a certain time-dependent stochastic variable x.
	
C - Dynamic scaling refers to the evolution of self-similar systems, where data obtained from snapshots at fixed times is dissimilar to the respective data taken from snapshots of any earlier or later time. This dissimilarity is tested by a certain time-independent stochastic variable y.

D - Dynamic scaling refers to the non-evolution of self-similar systems, where data obtained from snapshots at fixed times is dissimilar to the respective data taken from snapshots of any earlier or later time. This dissimilarity is tested by a certain time-independent stochastic variable y.

E - Dynamic scaling refers to the evolution of self-similar systems, where data obtained from snapshots at fixed times is independent of the respective data taken from snapshots of any earlier or later time. This independence is tested by a certain time-dependent stochastic variable z.
"""

In [77]:
query = prompt.format_prompt(
    context="",
    question=f"{problem}\n{alternatives}"
)

In [78]:
# langchain.debug = True
result = qa_chain.run(query.text)
# langchain.debug = False


llama_print_timings:        load time =  1329.42 ms
llama_print_timings:      sample time =     0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_print_timings: prompt eval time =  1725.04 ms /   365 tokens (    4.73 ms per token,   211.59 tokens per second)
llama_print_timings:        eval time =     0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_print_timings:       total time =  1726.42 ms

llama_print_timings:        load time =  2917.89 ms
llama_print_timings:      sample time =   180.55 ms /   256 runs   (    0.71 ms per token,  1417.91 tokens per second)
llama_print_timings: prompt eval time =  6789.09 ms /  1017 tokens (    6.68 ms per token,   149.80 tokens per second)
llama_print_timings:        eval time =  8374.55 ms /   255 runs   (   32.84 ms per token,    30.45 tokens per second)
llama_print_timings:       total time = 15685.62 ms


In [79]:
print(result)



The correct answer to the question you provided is option A:  "Dynamic scaling refers to the evolution of self-similar systems, where data obtained from snapshots at fixed times exhibits similarity to the respective data taken from snapshots of any earlier or later time. This similarity is tested by a certain time-dependent stochastic variable x."

This answer choice accurately defines dynamic scaling as a process that involves the evolution of self-similar systems and the testing of time-dependent stochastic variables. It also matches the description provided in the context you provided, which mentions that dynamic scaling refers to the similarity between data from different times.

Options B, C, D, and E do not accurately define dynamic scaling. Option B states that dynamic scaling refers to a non-evolutionary process, which is incorrect because self-similar systems are known to evolve over time. Options C and D describe dynamic scaling as a dissimilarity between data from differen