<a href="https://colab.research.google.com/github/fritzmartin003/RAG-System-Projekt/blob/main/Hugging_Face_Fritz.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Notwendige Bibliotheken installieren
!pip install faiss-cpu transformers sentence-transformers pymupdf numpy scipy


Collecting faiss-cpu
  Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.4 kB)
Collecting pymupdf
  Downloading pymupdf-1.25.3-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (3.4 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==

In [2]:
# Imports
import fitz  # PyMuPDF
import numpy as np
import faiss
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from sentence_transformers import SentenceTransformer

In [3]:
# PDF-Text extrahieren
def extract_text_from_pdf(pdf_path):
    text = ""
    with fitz.open(pdf_path) as doc:
        for page in doc:
            text += page.get_text("text") + "\n"
    return text

pdf_path = "SakowskiBuch.pdf"
pdf_text = extract_text_from_pdf(pdf_path)


In [4]:
# Text in Chunks teilen
def split_text(text, chunk_size=500, overlap=100):
    chunks = []
    start = 0
    while start < len(text):
        end = start + chunk_size
        chunks.append(text[start:end])
        start += chunk_size - overlap
    return chunks

chunks = split_text(pdf_text)
print(f"✅ PDF in {len(chunks)} Chunks unterteilt!")


✅ PDF in 1559 Chunks unterteilt!


In [12]:
from google.colab import userdata
HuggingFaceAPIKey = 'HF2'
# Embeddings mit Hugging Face Sentence Transformer
embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

def get_embedding(text):
    return embedding_model.encode(text, convert_to_numpy=True)

chunk_embeddings = np.array([get_embedding(chunk) for chunk in chunks])


In [13]:
# FAISS Vektordatenbank aufsetzen
dimension = chunk_embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(chunk_embeddings)
print("✅ FAISS Vektordatenbank erstellt!")


✅ FAISS Vektordatenbank erstellt!


In [14]:
# Ähnlichkeitssuche in FAISS
def get_relevant_chunks(query, top_k=3):
    query_embedding = get_embedding(query).reshape(1, -1)
    distances, indices = index.search(query_embedding, top_k)
    return [chunks[i] for i in indices[0]]


In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import os

!huggingface-cli login

os.environ["HUGGINGFACE_API_KEY"] = "HF2"
model_name = "mistralai/Mistral-7B-v0.1"  # Alternativ: "meta-llama/Llama-2-7b-chat-hf"

# Tokenizer & Modell mit explizitem Token laden
pipe = pipeline("text-generation", model="mistralai/Mistral-7B-v0.1")
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")
model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1")


def generate_answer(query):
    relevant_chunks = get_relevant_chunks(query, top_k=3)
    context = "\n".join(relevant_chunks)
    prompt = f"Beantworte die Frage basierend auf diesem Kontext:\n\n{context}\n\nFrage: {query}\nAntwort:"

    response = pipe(prompt, max_length=512, truncation=True)
    return response[0]["generated_text"]

# 🔥 Test
frage = "Welche Schutzrechte gibt es für werdende Mütter im Arbeitsrecht?"
antwort = generate_answer(frage)
print("Antwort:", antwort)

# Tokenizer & Modell mit explizitem Token laden
tokenizer = AutoTokenizer.from_pretrained(model_name, token=HUGGINGFACE_API_KEY)
model = AutoModelForCausalLM.from_pretrained(model_name, token=HUGGINGFACE_API_KEY)

pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

def generate_answer(query):
    relevant_chunks = get_relevant_chunks(query, top_k=3)
    context = "\n".join(relevant_chunks)
    prompt = f"Beantworte die Frage basierend auf diesem Kontext:\n\n{context}\n\nFrage: {query}\nAntwort:"

    response = pipe(prompt, max_length=512, truncation=True)
    return response[0]["generated_text"]

# 🔥 Test
frage = "Welche Schutzrechte gibt es für werdende Mütter im Arbeitsrecht?"
antwort = generate_answer(frage)
print("Antwort:", antwort)



    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    A token is already saved on your machine. Run `huggingface-cli whoami` to get more information or `huggingface-cli logout` if you want to log out.
    Setting a new token will erase the existing one.
    To log in, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): 
Add token as git credential? (Y/n) n
Token is valid (permission: fineG

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
# Test: Frage stellen
frage = "Welche Schutzrechte gibt es für werdende Mütter im Arbeitsrecht?"
antwort = generate_answer(frage)
print("Antwort:", antwort)