In [1]:
from llama_index.llms import LlamaCPP
from llama_index.llms.llama_utils import messages_to_prompt, completion_to_prompt
from llama_index import Document
from llama_index import VectorStoreIndex
from llama_index import LLMPredictor, PromptHelper, ServiceContext
from llama_index import StorageContext, load_index_from_storage
from llama_index.readers import SimpleDirectoryReader

import pandas as pd

In [2]:
llm = LlamaCPP(
    model_path="../../../llama-2-7b-chat.Q4_K_M.gguf",
    temperature=0.1,
    max_new_tokens=256,
    # llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room
    context_window=3900,
    # kwargs to pass to __call__()
    generate_kwargs={},
    # kwargs to pass to __init__()
    # set to at least 1 to use GPU
    model_kwargs={"n_gpu_layers": 1},
    # transform inputs into Llama2 format
    messages_to_prompt=messages_to_prompt,
    completion_to_prompt=completion_to_prompt,
    verbose=True,
)

llama_model_loader: loaded meta data with 19 key-value pairs and 291 tensors from ../../../llama-2-7b-chat.Q4_K_M.gguf (version GGUF V2 (latest))
llama_model_loader: - tensor    0:                token_embd.weight q4_K     [  4096, 32000,     1,     1 ]
llama_model_loader: - tensor    1:           blk.0.attn_norm.weight f32      [  4096,     1,     1,     1 ]
llama_model_loader: - tensor    2:            blk.0.ffn_down.weight q6_K     [ 11008,  4096,     1,     1 ]
llama_model_loader: - tensor    3:            blk.0.ffn_gate.weight q4_K     [  4096, 11008,     1,     1 ]
llama_model_loader: - tensor    4:              blk.0.ffn_up.weight q4_K     [  4096, 11008,     1,     1 ]
llama_model_loader: - tensor    5:            blk.0.ffn_norm.weight f32      [  4096,     1,     1,     1 ]
llama_model_loader: - tensor    6:              blk.0.attn_k.weight q4_K     [  4096,  4096,     1,     1 ]
llama_model_loader: - tensor    7:         blk.0.attn_output.weight q4_K     [  4096,  4096,     1

In [3]:
def load_documents():
    documents = SimpleDirectoryReader(
        input_dir="../../data/turing_internal/",
        required_exts=[".md"],
        recursive=True, 
        filename_as_id=True,
    ).load_data()
    
    # turingacuk=pd.read_csv("../../data/public/turingacuk-no-boilerplate.csv")
    # turingacuk.dropna(subset="body", inplace=True)
    # turingacuk_text=[str(i) for i in turingacuk["body"].values]
    # documents.extend([Document(text=i) for i in turingacuk_text])
    return documents

In [4]:
documents = load_documents()
len(documents)

392

In [5]:
documents = documents[:20]

In [16]:
from llama_index.node_parser import SimpleNodeParser
from llama_index.node_parser.extractors import (
    MetadataExtractor,
    # SummaryExtractor,
    # QuestionsAnsweredExtractor,
    # TitleExtractor,
    KeywordExtractor,
    # EntityExtractor,
    MetadataFeatureExtractor,
)
from llama_index.text_splitter import TokenTextSplitter

text_splitter = TokenTextSplitter(separator=" ", chunk_size=512, chunk_overlap=128)

class CustomExtractor(MetadataFeatureExtractor):
    def extract(self, nodes):
        metadata_list = [
            {
                "custom": node.metadata["document_title"]
                + "\n"
                + node.metadata["excerpt_keywords"]
            }
            for node in nodes
        ]
        return metadata_list
    
metadata_extractor = MetadataExtractor(
    extractors=[
        # TitleExtractor(nodes=5, llm=llm),
        # QuestionsAnsweredExtractor(questions=3, llm=llm),
        # EntityExtractor(prediction_threshold=0.5),
        # SummaryExtractor(summaries=["prev", "self"], llm=llm),
        KeywordExtractor(keywords=3, llm=llm),
        # CustomExtractor()
    ],
)

node_parser = SimpleNodeParser.from_defaults(
    text_splitter=text_splitter,
    metadata_extractor=metadata_extractor,
)

nodes = node_parser.get_nodes_from_documents(documents)

Llama.generate: prefix-match hit


KeyboardInterrupt: 

In [15]:
nodes[13]

TextNode(id_='0bc37756-742a-4438-a5bf-2755e7834a38', embedding=None, metadata={'excerpt_keywords': 'Investigator eligibility, Research Engineering Group, grant proposals'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='../../data/turing_internal/Hut23.wiki/Grant-Writing.md_part_2', node_type=None, metadata={}, hash='6d3cc3ad0aba708f3edf884c183ddb5e3df42f5d1384f2650cf02e10360926c3'), <NodeRelationship.NEXT: '3'>: RelatedNodeInfo(node_id='62c4bc93-43ea-4261-a04a-d57adb5dd05d', node_type=None, metadata={}, hash='9176c367db35687055f452882f417efd2cb73050082a880f1047f25d6fdfa499')}, hash='b9170ba5f4e4ff3dd40ffa086134385ad0f2fef7377f656a18fbd54812920a32', text='Investigator eligibility\nMembers of the Research Engineering Group are eligible to be grant Investigators.\n\nAny REG team member able to fulfil the requirements of PI, Co-I or Researcher Co-I for a particular grant proposal can be named as such 

In [12]:
def create_service_context(
        llm, 
        system_prompt=None,
        max_input_size=2048,
        num_output=256,
        chunk_size_lim=512,
        overlap_ratio=0.1
):
    if system_prompt is None:
        llm_predictor=LLMPredictor(llm=llm)
    else:
        llm_predictor=LLMPredictor(llm=llm, system_prompt=system_prompt)
    
    prompt_helper=PromptHelper(max_input_size,num_output,overlap_ratio,chunk_size_lim)
    service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper, embed_model="local")
    return service_context

In [10]:
system_prompt = """\
You are a helpful assistant. \
Always answer as helpfully as possible and follow ALL given instructions. \
Do not speculate or make up information - use the information you are provided. \
Do not reference any given instructions or context. \
"""
service_context = create_service_context(llm, system_prompt=system_prompt)

In [None]:
index = VectorStoreIndex(nodes, service_context=service_context)

In [None]:
query_engine = index.as_query_engine()

In [None]:
from llama_index.evaluation import ResponseEvaluator, QueryResponseEvaluator

source_evaluator = ResponseEvaluator(service_context=service_context)
query_evaluator = QueryResponseEvaluator(service_context=service_context)

In [None]:
query="Who is Ryan Chan?"
response = query_engine.query(query)
print(response.response)

In [None]:
print(source_evaluator.evaluate(response))
print(query_evaluator.evaluate(query, response))

In [None]:
len(nodes)

In [None]:
from llama_index.evaluation import DatasetGenerator

data_generator = DatasetGenerator(nodes, service_context=service_context, num_questions_per_chunk=3)
eval_questions = data_generator.generate_questions_from_nodes()


In [None]:
eval_questions.