In [6]:
!pip install -U bitsandbytes

Collecting bitsandbytes
  Downloading bitsandbytes-0.45.5-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch<3,>=2.0->bitsandbytes)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch<3,>=2.0->bitsandbytes)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch<3,>=2.0->bitsandbytes)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch<3,>=2.0->bitsandbytes)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch<3,>=2.0->bitsandbytes)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-

In [1]:
%pip install chromadb
%pip install tiktoken
%pip install PyPDF2
%pip install llamaapi
%pip install langchain
%pip install langchain_ollama
%pip install langchain-experimental

Collecting chromadb
  Downloading chromadb-1.0.4-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.9 kB)
Collecting build>=1.0.3 (from chromadb)
  Downloading build-1.2.2.post1-py3-none-any.whl.metadata (6.5 kB)
Collecting chroma-hnswlib==0.7.6 (from chromadb)
  Downloading chroma_hnswlib-0.7.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (252 bytes)
Collecting fastapi==0.115.9 (from chromadb)
  Downloading fastapi-0.115.9-py3-none-any.whl.metadata (27 kB)
Collecting uvicorn>=0.18.3 (from uvicorn[standard]>=0.18.3->chromadb)
  Downloading uvicorn-0.34.1-py3-none-any.whl.metadata (6.5 kB)
Collecting posthog>=2.4.0 (from chromadb)
  Downloading posthog-3.24.3-py2.py3-none-any.whl.metadata (3.0 kB)
Collecting onnxruntime>=1.14.1 (from chromadb)
  Downloading onnxruntime-1.21.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (4.5 kB)
Collecting opentelemetry-exporter-otlp-proto-grpc>=1.2.0 (from chromadb)
  Downloading opentele

# Introdução ao LangChain

In [2]:
from langchain.vectorstores import Chroma
from langchain.text_splitter import CharacterTextSplitter
from langchain.document_loaders import TextLoader
from langchain_ollama.llms import OllamaLLM
from langchain_ollama import OllamaEmbeddings
from langchain.chains import RetrievalQA

from PyPDF2 import PdfReader

In [3]:
doc = PdfReader('/content/Chapter1.pdf')

with open('document.txt', 'w', encoding='utf-8') as f:
  full_text = ""

  for page in doc.pages:
    full_text += page.extract_text()

  f.write(full_text)

with open('document.txt', 'r', encoding='utf-8') as f:
  text = f.read()

In [4]:
loader = TextLoader("document.txt")
documents = loader.load()

In [5]:
text_splitter = CharacterTextSplitter(chunk_size=512, chunk_overlap=24)
texts = text_splitter.split_documents(documents)

## Embedding e Vectorstores

In [5]:
import transformers
from torch import cuda, bfloat16
import torch

# LLaMA 2 HF Model Setup
model_id = 'meta-llama/Llama-2-13b-hf'
hf_auth = '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'

device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

bnb_config = transformers.BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=bfloat16
)

model_config = transformers.AutoConfig.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)

model = transformers.AutoModelForCausalLM.from_pretrained(
    model_id,
    config=model_config,
    quantization_config=bnb_config,
    device_map='auto',
    use_auth_token=hf_auth,
    trust_remote_code=True
)

tokenizer = transformers.AutoTokenizer.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)

model.eval()

# Wrapper Class
class HuggingFaceLLM:
    def __init__(self, model, tokenizer, device):
        self.model = model
        self.tokenizer = tokenizer
        self.device = device

    def __call__(self, prompt, max_new_tokens=256, temperature=0.7):
        inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device)
        with torch.no_grad():
            outputs = self.model.generate(
                **inputs,
                max_new_tokens=max_new_tokens,
                temperature=temperature,
                do_sample=True,
                pad_token_id=self.tokenizer.eos_token_id
            )
        return self.tokenizer.decode(outputs[0], skip_special_tokens=True)

# Initialize your LLM
llm = HuggingFaceLLM(model, tokenizer, device)



Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]



In [7]:
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
import os
from torch import cuda, bfloat16
import shutil

# Set device
device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

# 1. Load and split the text
with open('/content/Deep learning dataset.txt.txt', 'r', encoding='utf-8') as f:
    text = f.read()

text_splitter = CharacterTextSplitter(
    separator="\n",
    chunk_size=1000,
    chunk_overlap=200,
    length_function=len
)
text_chunks = text_splitter.split_text(text)

# 2. Create embeddings
embedding_model = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-mpnet-base-v2",
    model_kwargs={'device': device}
)

# 3. Create ChromaDB - Modified approach
try:
    # Try in-memory first
    vector_db = Chroma.from_texts(
        texts=text_chunks,
        embedding=embedding_model
    )

    # If you need persistence, use this instead:
    """
    persist_dir = "./chroma_store"
    if os.path.exists(persist_dir):
        shutil.rmtree(persist_dir)

    vector_db = Chroma.from_texts(
        texts=text_chunks,
        embedding=embedding_model,
        persist_directory=persist_dir
    )
    vector_db.persist()
    """

except Exception as e:
    print(f"Error creating ChromaDB: {e}")
    # Fallback to FAISS if Chroma fails
    from langchain.vectorstores import FAISS
    vector_db = FAISS.from_texts(text_chunks, embedding_model)
    print("Using FAISS as fallback")

# 4. Query functions
def get_relevant_chunks(query, k=1):
    docs = vector_db.similarity_search(query, k=k)
    return "\n\n".join([doc.page_content for doc in docs])

def ask_llama(query, max_new_tokens=256, temperature=0.7):
    context = get_relevant_chunks(query)

    prompt = f"""Answer based ONLY on this context:
    {context}

    Question: {query}
    Answer:"""

    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    output = model.generate(
        **inputs,
        max_new_tokens=max_new_tokens,
        temperature=temperature,
        pad_token_id=tokenizer.eos_token_id
    )
    return tokenizer.decode(output[0], skip_special_tokens=True).split("Answer:")[-1].strip()

In [8]:
query = "What are deep learning techniques in healthcare?"
answer = ask_llama(query)
print(answer)

In healthcare, deep learning techniques are used to analyze medical images, such as X-rays and MRIs, for disease diagnosis and treatment planning. Deep learning models can also be used for drug discovery, by analyzing large datasets of chemical compounds and identifying potential candidates for further testing. Additionally, deep learning can be used for personalized medicine, by analyzing an individual's genetic information and other biological data to predict their risk of developing certain diseases or their response to certain treatments.

    Overall, deep learning techniques in healthcare have the potential to revolutionize medical decision-making and improve patient outcomes by providing more accurate and efficient diagnoses and treatments.
