### Necessary imports


In [2]:
!pip install -q -U torch datasets transformers tensorflow langchain playwright html2text sentence_transformers faiss-cpu
!pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 trl==0.4.7

### Dependencies


In [1]:
import os
import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    pipeline,
)
from datasets import load_dataset
from peft import LoraConfig, PeftModel

from langchain.text_splitter import CharacterTextSplitter
from langchain.document_transformers import Html2TextTransformer
from langchain.document_loaders import AsyncChromiumLoader

from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

from langchain.prompts import PromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain.llms import HuggingFacePipeline
from langchain.chains import LLMChain

  from .autonotebook import tqdm as notebook_tqdm


### Load quantized Mistal 7B


In [2]:
#################################################################
# Tokenizer
#################################################################

model_name = "/mnt/sda/Mistral_v0.2/"

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

#################################################################
# bitsandbytes parameters
#################################################################

# Activate 4-bit precision base model loading
use_4bit = True

# Compute dtype for 4-bit base models
bnb_4bit_compute_dtype = "float16"

# Quantization type (fp4 or nf4)
bnb_4bit_quant_type = "nf4"

# Activate nested quantization for 4-bit base models (double quantization)
use_nested_quant = False

#################################################################
# Set up quantization config
#################################################################
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)

# Check GPU compatibility with bfloat16
if compute_dtype == torch.float16 and use_4bit:
    major, _ = torch.cuda.get_device_capability()
    if major >= 8:
        print("=" * 80)
        print("Your GPU supports bfloat16: accelerate training with bf16=True")
        print("=" * 80)

#################################################################
# Load pre-trained config
#################################################################
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
)

Your GPU supports bfloat16: accelerate training with bf16=True


Loading checkpoint shards: 100%|██████████| 3/3 [00:03<00:00,  1.04s/it]


### Count number of trainable parameters


In [3]:
def print_number_of_trainable_model_parameters(model):
    trainable_model_params = 0
    all_model_params = 0
    for _, param in model.named_parameters():
        all_model_params += param.numel()
        if param.requires_grad:
            trainable_model_params += param.numel()
    return f"trainable model parameters: {trainable_model_params}\nall model parameters: {all_model_params}\npercentage of trainable model parameters: {100 * trainable_model_params / all_model_params:.2f}%"


print(print_number_of_trainable_model_parameters(model))

trainable model parameters: 262410240
all model parameters: 3752071168
percentage of trainable model parameters: 6.99%


### Build Mistral text generation pipeline


In [4]:
text_generation_pipeline = pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation",
    temperature=0.2,
    repetition_penalty=1.1,
    return_full_text=True,
    max_new_tokens=1000,
)

In [5]:
mistral_llm = HuggingFacePipeline(pipeline=text_generation_pipeline)

### Load and chunk documents. Load chunked documents into FAISS index


In [7]:
repo_path = "/mnt/nvme1n1/Aamir/CurePilot/WebServices/HISApi"

In [8]:
from langchain_community.document_loaders.generic import GenericLoader
from langchain_community.document_loaders.parsers import LanguageParser
from langchain.text_splitter import Language, RecursiveCharacterTextSplitter


loader = GenericLoader.from_filesystem(
    repo_path,
    glob="**/*",
    suffixes=[".cs"],
    exclude=["**/non-utf8-encoding.py"],
    parser=LanguageParser(language=Language.CSHARP, parser_threshold=500),
)
documents = loader.load()
len(documents)

229

In [11]:
python_splitter = RecursiveCharacterTextSplitter.from_language(
    language=Language.CSHARP, chunk_size=2000, chunk_overlap=200
)
chunked_documents = python_splitter.split_documents(documents)

# Load chunked documents into the FAISS index
db = FAISS.from_documents(
    chunked_documents,
    HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2"),
)

retriever = db.as_retriever()

No sentence-transformers model found with name /mnt/sda/Mistral_v0.2/. Creating a new one with MEAN pooling.
Loading checkpoint shards: 100%|██████████| 3/3 [00:01<00:00,  1.59it/s]


OutOfMemoryError: CUDA out of memory. Tried to allocate 224.00 MiB. GPU 0 has a total capacty of 23.68 GiB of which 129.06 MiB is free. Including non-PyTorch memory, this process has 23.52 GiB memory in use. Of the allocated memory 23.08 GiB is allocated by PyTorch, and 147.20 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

### Create PromptTemplate and LLMChain


In [22]:
prompt_template = """
### [INST] Instruction: Answer the question based on your fantasy football knowledge. Here is context to help:

{context}

### QUESTION:
{question} [/INST]
 """

# Create prompt from prompt template
prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=prompt_template,
)

# Create llm chain
llm_chain = LLMChain(llm=mistral_llm, prompt=prompt)

In [30]:
llm_chain.invoke(
    {"context": "", "question": "Should I start Gibbs in week 16 for fantasy?"}
)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


{'context': '',
 'question': 'Should I start Gibbs in week 16 for fantasy?',
 'text': "\nBased on my fantasy football knowledge, it depends on the specific league and roster you have. If you are looking for a wide receiver option for week 16, Gibbs could be a decent choice if he is available on your waiver wire. However, it's important to consider other options as well and make sure you have a solid lineup before making any decisions. Additionally, keep an eye on any potential injuries or changes to Gibbs' status that could impact his availability for week 16."}

### Build RAG Chain


In [31]:
rag_chain = {"context": retriever, "question": RunnablePassthrough()} | llm_chain

result = rag_chain.invoke("Should I start Gibbs next week for fantasy?")

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


In [32]:
result["context"]

[Document(page_content='This week, Harris faces the bottom-of-the-barrel Packers’ run defense that\nallows the ninth-most fantasy points per game to the running back position.\nHarris will give you a higher-volume RB with a low rostership percentage this\nweek.', metadata={'source': 'https://www.fantasypros.com/2023/11/nfl-dfs-week-10-stacking-advice-picks-2023-fantasy-football/'}),
 Document(page_content='could start cutting into his workload. Furthermore, his rest of the season\nschedule isn’t fantasy-friendly. Try to flip Edwards and a WR3 for Kenneth\nWalker or Tony Pollard', metadata={'source': 'https://www.fantasypros.com/2023/11/players-to-buy-low-sell-high-trade-advice-2023-fantasy-football/'}),
 Document(page_content='“ **Gus Edwards** has been on fire lately. He is the RB1 over the past three\nweeks, averaging 22.2 half-point PPR fantasy points and two rushing touchdowns\nper game. However, over 54% of his fantasy production came from the six\nrushing touchdowns. Meanwhile, t

In [33]:
print(result["text"])


Based on the information provided, it seems like there are several factors to consider before deciding whether to start Gibbs next week for fantasy. Here are some key points to keep in mind:

* Gibbs has been performing well so far this season, averaging 18.9 fantasy points per game and scoring four touchdowns in his last five games.
* However, he may face some competition for touches next week, as the Packers' run defense has allowed the ninth-most fantasy points per game to the running back position. This could potentially cut into Gibbs' workload.
* Additionally, Gibbs' schedule for the rest of the season may not be particularly favorable for fantasy owners. He has some tough matchups coming up against teams with strong defenses, such as the Bills and the Steelers.
* On the other hand, Gus Edwards has been on fire lately and is currently the RB1 over the past three weeks. However, he is a touchdown-or-bust player and his production comes mostly from rushing touchdowns.
* Finally, B