In [1]:
!pip install -U transformers

Collecting transformers
  Downloading transformers-4.43.3-py3-none-any.whl.metadata (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.7/43.7 kB[0m [31m163.5 kB/s[0m eta [36m0:00:00[0m [36m0:00:01[0mm
Downloading transformers-4.43.3-py3-none-any.whl (9.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.4/9.4 MB[0m [31m827.3 kB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: transformers
  Attempting uninstall: transformers
    Found existing installation: transformers 4.36.2
    Uninstalling transformers-4.36.2:
      Successfully uninstalled transformers-4.36.2
Successfully installed transformers-4.43.3


In [3]:
import os
import torch
from transformers import (
  AutoTokenizer, 
  AutoModelForCausalLM, 
  BitsAndBytesConfig,
  pipeline
)

from transformers import BitsAndBytesConfig
import transformers
from langchain.text_splitter import CharacterTextSplitter
from langchain.document_transformers import Html2TextTransformer
from langchain.document_loaders import AsyncChromiumLoader

from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

from langchain.prompts import PromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain.llms import HuggingFacePipeline
from langchain.chains import LLMChain

import nest_asyncio
#################################################################
# Tokenizer
#################################################################

from huggingface_hub import login
login(token='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')

model_name='mistralai/Mistral-7B-Instruct-v0.1'

model_config = transformers.AutoConfig.from_pretrained(
    model_name,
)

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

#################################################################
# bitsandbytes parameters
#################################################################

# Activate 4-bit precision base model loading
use_4bit = True

# Compute dtype for 4-bit base models
bnb_4bit_compute_dtype = "float16"

# Quantization type (fp4 or nf4)
bnb_4bit_quant_type = "nf4"

# Activate nested quantization for 4-bit base models (double quantization)
use_nested_quant = False

#################################################################
# Set up quantization config
#################################################################
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)

# Check GPU compatibility with bfloat16
if compute_dtype == torch.float16 and use_4bit:
    major, _ = torch.cuda.get_device_capability()
    if major >= 8:
        print("=" * 80)
        print("Your GPU supports bfloat16: accelerate training with bf16=True")
        print("=" * 80)

#################################################################
# Load pre-trained config
#################################################################
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
)


def print_number_of_trainable_model_parameters(model):
    trainable_model_params = 0
    all_model_params = 0
    for _, param in model.named_parameters():
        all_model_params += param.numel()
        if param.requires_grad:
            trainable_model_params += param.numel()
    return f"trainable model parameters: {trainable_model_params}\nall model parameters: {all_model_params}\npercentage of trainable model parameters: {100 * trainable_model_params / all_model_params:.2f}%"

print(print_number_of_trainable_model_parameters(model))

text_generation_pipeline = pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation",
    temperature=0.2,
    repetition_penalty=1.1,
    return_full_text=True,
    max_new_tokens=1000,
)

mistral_llm = HuggingFacePipeline(pipeline=text_generation_pipeline)

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /home/vinay/.cache/huggingface/token
Login successful


`low_cpu_mem_usage` was None, now set to True since model is quantized.


Your GPU supports bfloat16: accelerate training with bf16=True


Downloading shards: 100%|██████████| 2/2 [27:50<00:00, 835.48s/it]
Loading checkpoint shards: 100%|██████████| 2/2 [05:55<00:00, 177.65s/it]


trainable model parameters: 262410240
all model parameters: 3752071168
percentage of trainable model parameters: 6.99%


  warn_deprecated(


In [4]:
# !pip install -U tokenizers
# !pip install playwright
# !pip install sentence-transformers

In [30]:
%pip install --upgrade --quiet html2text
import nest_asyncio
nest_asyncio.apply()

from langchain_community.document_loaders import AsyncHtmlLoader

# # Articles to index
articles = ["https://www.mayoclinic.org/diseases-conditions/skin-cancer/symptoms-causes/syc-20377605",
            "https://www.skincancer.org/skin-cancer-information/",
            "https://www.cancer.gov/types/skin",
            "https://www.cancer.gov/types/skin/patient/skin-treatment-pdq",
            "https://www.aad.org/media/stats-skin-cancer"]

# # Scrapes the blogs above
# loader = AsyncChromiumLoader(articles)
# docs = loader.load()


loader = AsyncHtmlLoader(articles)
docs = loader.load()
# Converts HTML to plain text 
html2text = Html2TextTransformer()
docs_transformed = html2text.transform_documents(docs)

# Chunk text
text_splitter = CharacterTextSplitter(chunk_size=100, 
                                      chunk_overlap=0)
chunked_documents = text_splitter.split_documents(docs_transformed)

# Load chunked documents into the FAISS index
db = FAISS.from_documents(chunked_documents, 
                          HuggingFaceEmbeddings(model_name='sentence-transformers/all-mpnet-base-v2'))

retriever = db.as_retriever(
    search_type="similarity",
    search_kwargs={'k': 4}
)

# Create prompt template
prompt_template = """
### [INST] Instruction: Answer the question based on your fantasy football knowledge. Here is context to help:

{context}

### QUESTION:
{question} [/INST]
 """

# Create prompt from prompt template 
prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=prompt_template,
)

# Create llm chain 
llm_chain = LLMChain(llm=mistral_llm, prompt=prompt)

rag_chain = ( 
 {"context": retriever, "question": RunnablePassthrough()}
    | llm_chain
)

data = rag_chain.invoke("Should I start Gibbs next week for fantasy?")

Note: you may need to restart the kernel to use updated packages.


Fetching pages: 100%|##########| 5/5 [00:00<00:00,  7.62it/s]
Created a chunk of size 203, which is longer than the specified 100
Created a chunk of size 105, which is longer than the specified 100
Created a chunk of size 296, which is longer than the specified 100
Created a chunk of size 154, which is longer than the specified 100
Created a chunk of size 109, which is longer than the specified 100
Created a chunk of size 214, which is longer than the specified 100
Created a chunk of size 113, which is longer than the specified 100
Created a chunk of size 181, which is longer than the specified 100
Created a chunk of size 139, which is longer than the specified 100
Created a chunk of size 293, which is longer than the specified 100
Created a chunk of size 261, which is longer than the specified 100
Created a chunk of size 132, which is longer than the specified 100
Created a chunk of size 234, which is longer than the specified 100
Created a chunk of size 341, which is longer than the 

In [36]:
res = rag_chain.invoke("What kind of treatments are available for invasive melanoma?")['text']
print(res[(res.find('[/INST]') + len('[/INST]')):])




 
According to the National Cancer Institute (NCI), there are several treatments available for invasive melanoma, including:

1. Biological therapy: This type of therapy uses substances made from living cells to treat cancer. It works by boosting the body's immune system to fight the cancer.
2. Chemotherapy: This is a type of drug therapy that uses chemicals to kill cancer cells. It may be used alone or in combination with other treatments.
3. Mohs surgery: This is a surgical procedure that removes layers of skin until all cancerous tissue has been removed. It is often used for skin cancer that has not spread to other parts of the body.
4. Photodynamic therapy: This is a type of light therapy that uses a special light to destroy cancer cells. It is often used in combination with other treatments.
5. Radiation therapy: This is a type of radiation treatment that uses high-energy particles to destroy cancer cells. It may be used alone or in combination with other treatments.
6. Skin biop

In [33]:
res = rag_chain.invoke("What should I do at home if I have benign keratosis-like lesions?")['text']
print(res[(res.find('[/INST]') + len('[/INST]')):])




 
If you have benign keratosis-like lesions, it's important to monitor them regularly for any changes in size, shape, color, or texture. If you notice any changes or if the lesion becomes painful or bleeds, you should see a healthcare provider for evaluation. In the meantime, you can use sunscreen with at least SPF 30 to protect the area from further sun damage. It's also a good idea to avoid picking or scratching the lesion, as this can introduce bacteria and potentially lead to infection.


In [38]:
!pip install flask
from flask import Flask

app = Flask(__name__)

@app.route("/")
def hello_world():
    return "<p>Hello, World!</p>"

Collecting flask
  Using cached flask-3.0.3-py3-none-any.whl.metadata (3.2 kB)
Collecting Werkzeug>=3.0.0 (from flask)
  Downloading werkzeug-3.0.3-py3-none-any.whl.metadata (3.7 kB)
Collecting itsdangerous>=2.1.2 (from flask)
  Using cached itsdangerous-2.2.0-py3-none-any.whl.metadata (1.9 kB)
Collecting blinker>=1.6.2 (from flask)
  Downloading blinker-1.8.2-py3-none-any.whl.metadata (1.6 kB)
Using cached flask-3.0.3-py3-none-any.whl (101 kB)
Downloading blinker-1.8.2-py3-none-any.whl (9.5 kB)
Using cached itsdangerous-2.2.0-py3-none-any.whl (16 kB)
Downloading werkzeug-3.0.3-py3-none-any.whl (227 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m227.3/227.3 kB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: Werkzeug, itsdangerous, blinker, flask
Successfully installed Werkzeug-3.0.3 blinker-1.8.2 flask-3.0.3 itsdangerous-2.2.0


In [41]:
!flask --app RAG run

Usage: flask run [OPTIONS]
Try 'flask run --help' for help.

Error: Could not import 'RAG'.
