References - https://github.com/openai/openai-cookbook/blob/main/examples/third_party/financial_document_analysis_with_llamaindex.ipynb

In [1]:
import os
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext, load_index_from_storage
from llama_index.llms.openai import OpenAI
from llama_index.core import ServiceContext
from llama_index.core.readers.json import JSONReader
from llama_index.core import Settings

In [2]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from llama_index.core.prompts.prompts import SimpleInputPrompt
import torch

In [3]:
# os.environ['OPENAI_API_KEY'] = ''

In [4]:
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
model_path = '../Model/tinylmma-1b'
model_name_litteral = "tinylmma-1b"

In [5]:
# Download and save the model
model = AutoModelForCausalLM.from_pretrained(model_name).to('cuda')
model.save_pretrained(model_path)

# Download and save the tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)
# tokenizer.save_pretrained(model_path)



Load Documents for Dataset

In [6]:
docs = SimpleDirectoryReader('../Summarizer result/').load_data()

Service Context

In [None]:
llm = OpenAI(temperature=1, model="gpt-3.5-turbo-16k")
service_context = ServiceContext.from_defaults(llm=llm)

  service_context = ServiceContext.from_defaults(llm=llm)


Vectoring Dataset and Save


In [6]:
import nest_asyncio
nest_asyncio.apply()

Define Model for embedding

In [7]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

embed_model = HuggingFaceEmbedding(
    model_name=model_name, 
    device="cuda",
    max_length=512
)

No sentence-transformers model found with name TinyLlama/TinyLlama-1.1B-Chat-v1.0. Creating a new one with MEAN pooling.


In [None]:
Settings.embed_model = embed_model
index = VectorStoreIndex.from_documents(docs)  
index.storage_context.persist(persist_dir="../VectorizedData/{model_name_litteral}/")

  attn_output = torch.nn.functional.scaled_dot_product_attention(


Load Dataset (VectorDB)

In [8]:
Settings.embed_model = embed_model
storage_context = StorageContext.from_defaults(persist_dir="../VectorizedData/{model_name_litteral}/") #fill with dataset that has been saved
index = load_index_from_storage(storage_context)

Query Engine

In [14]:
import tiktoken
from transformers import AutoTokenizer, AutoModelForCausalLM
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.core.prompts.prompts import SimpleInputPrompt
from llama_index.core.callbacks import CallbackManager, TokenCountingHandler
from llama_index.core import Settings

system_prompt = """# TinyLlama Game Recommendation Model
The TinyLlama Game Recommendation Model is an innovative language model created by the TinyLlama project, aimed at enhancing game recommendation systems within the Steam platform.
This model excels in understanding and generating diverse linguistic inputs, empowering it to provide highly personalized game recommendations.
Built on a sophisticated transformer architecture, the TinyLlama Game Recommendation Model is optimized to suggest games that align closely with individual user preferences and gaming behaviors, elevating the gaming journey for Steam enthusiasts.
"""
query_wrapper_prompt = SimpleInputPrompt("{query_str}")

llm = HuggingFaceLLM(
    context_window=1450, 
    max_new_tokens=400,
    system_prompt=system_prompt,
    generate_kwargs={"temperature": 0.2, "do_sample": True},
    model_name=model_name,
    model=model,
    tokenizer=tokenizer,
    tokenizer_kwargs={"truncation": True},
    model_kwargs={"torch_dtype": torch.float32, "pad_token_id": tokenizer.pad_token_id, "device": "cuda"},
)

print("mask_token_id:", tokenizer.mask_token_id)
print("sep_token_id:", tokenizer.sep_token_id)
print("pad_token_id:", tokenizer.pad_token_id)
print("eos_token_id:", tokenizer.eos_token_id)
print("cls_token_id:", tokenizer.cls_token_id)
query_engine = index.as_query_engine(similarity_top_k=3, llm=llm)
response = query_engine.query("What do players do in 'Stardew Valley'?")
print("Response overall:")
print(response)
for node in response.source_nodes:
    file_name = node.node.metadata['file_name']
    node_id = node.node.id_
    node_score = node.score
    print(f"File ID: {file_name}, Node ID: {node_id},Score: {node_score},")

The model `TinyLlama/TinyLlama-1.1B-Chat-v1.0` and tokenizer `StabilityAI/stablelm-tuned-alpha-3b` are different, please ensure that they are compatible.


mask_token_id: None
sep_token_id: None
pad_token_id: 2
eos_token_id: 2
cls_token_id: None
