## 1. Install Ollama / Chat with the model in the terminal

## 2. Install the requirements

In [None]:
import chromadb
import pandas as pd
from llama_index.core import (
    PromptTemplate,
    Settings,
    SimpleDirectoryReader,
    StorageContext,
    VectorStoreIndex,
)
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.ollama import Ollama
from llama_index.vector_stores.chroma import ChromaVectorStore

### Ollama

In [None]:
# ollama

# Language model from Ollama
# llm = Ollama(model="llama3.2:1b", request_timeout=120.0)
llm = Ollama(model="llama3", request_timeout=120.0)

# Set it as the default LLM in LlamaIndex
Settings.llm = llm

In [None]:
prompt = "What is EPFL?"
response = llm.stream_complete(prompt)

for r in response:
    print(r.delta, end="")

EPFL stands for École Polytechnique Fédérale de Lausanne, which is a Swiss federal institute of technology located in Lausanne, Switzerland. It is one of the two Swiss Federal Institutes of Technology, the other being ETH Zurich.

EPFL is a research-focused institution that offers undergraduate and graduate programs in various fields such as engineering, natural sciences, mathematics, computer science, and management. It has a strong reputation for its research excellence, innovation, and entrepreneurship.

EPFL is known for its interdisciplinary approach to education and research, with a focus on applied sciences and technology. The institute has a strong industry connection, with many partnerships and collaborations with leading companies, startups, and research institutions around the world.

Some of the notable programs and specializations offered by EPFL include:

* Engineering: mechanical engineering, electrical engineering, computer science, etc.
* Natural Sciences: physics, che

### Embedding model

In [None]:
# Embeddings model from HuggingFace
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5")

# Set it as the default embedding model in LlamaIndex
Settings.embed_model = embed_model

### Reading documents with LlamaIndex

In [None]:
documents = SimpleDirectoryReader("./docs", recursive=True).load_data()

In [None]:
documents[:5]

[Document(id_='e4f4fc99-ccb2-48a3-ad4c-0bc368e0c3ce', embedding=None, metadata={'page_label': '1', 'file_name': 'red-blue-teams.pdf', 'file_path': '/Users/victor.gillioz/Documents/Projects/LauzHack/local-rag-lauzhack-workshop/docs/red-blue-teams.pdf', 'file_type': 'application/pdf', 'file_size': 2587768, 'creation_date': '2024-11-08', 'last_modified_date': '2024-11-08'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, text='3/23/24, 1:18 PM The Role of Red Team and Blue Team in Cybersecurity\nhttps://maddevs.io/blog/red-team-vs-blue-team-in-cybersecurity/ 1/245.0 article rate Created:Sep 20, 202314 min read\nRed Team vs. Blue\nTeam in Cybersecurity\nMekan Bairyev\nCybersecurity LeadCybersecurity\nCybersecurity is becoming increasingly relevant in an era of eve

In [None]:
# Other ways to create documents?... Other split method?..

### ChromaDB

In [None]:
# Ephermeral client for Chroma
chroma_client = chromadb.EphemeralClient()
# chroma_collection = chroma_client.create_collection("mydocs")
chroma_collection = chroma_client.get_or_create_collection("mydocs")

# Vector store
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
# Storage context
storage_context = StorageContext.from_defaults(vector_store=vector_store)

# use this to set custom chunk size and splitting
# https://docs.llamaindex.ai/en/stable/module_guides/loading/node_parsers/

In [None]:
index = VectorStoreIndex.from_documents(
    documents,
    storage_context=storage_context,
    embed_model=embed_model,
    show_progress=True,
)

Parsing nodes: 100%|██████████| 46/46 [00:00<00:00, 3475.61it/s]
Generating embeddings: 100%|██████████| 46/46 [00:01<00:00, 29.41it/s]


In [None]:
result = chroma_collection.get()
result.keys()

dict_keys(['ids', 'embeddings', 'metadatas', 'documents', 'uris', 'data', 'included'])

In [None]:
data = {
    "IDs": result["ids"],
    "Documents": result["documents"],
    "Metadata": result["metadatas"],
}

df = pd.DataFrame(data)

In [None]:
df.head()

Unnamed: 0,IDs,Documents,Metadata
0,04631968-0389-4189-8c01-323cc685f3f4,"3/23/24, 1:17 PM The Role of Red Teaming in Re...","{'_node_content': '{""id_"": ""04631968-0389-4189..."
1,07ef73eb-4139-485c-af04-c6ca0d16553d,"3/23/24, 1:17 PM The Role of Red Teaming in Re...","{'_node_content': '{""id_"": ""07ef73eb-4139-485c..."
2,09595b36-e757-46ce-82b1-94e45619273e,"3/23/24, 1:18 PM The Role of Red Team and Blue...","{'_node_content': '{""id_"": ""09595b36-e757-46ce..."
3,0a108b75-3da0-4e17-b44b-b7157ee83774,"3/23/24, 1:08 PM Red teaming 101: An introduct...","{'_node_content': '{""id_"": ""0a108b75-3da0-4e17..."
4,121495c8-616c-41d5-a0a3-5e76029fc87e,"3/23/24, 1:18 PM The Role of Red Team and Blue...","{'_node_content': '{""id_"": ""121495c8-616c-41d5..."


In [None]:
result["metadatas"][0].keys()

dict_keys(['_node_content', '_node_type', 'creation_date', 'doc_id', 'document_id', 'file_name', 'file_path', 'file_size', 'file_type', 'last_modified_date', 'page_label', 'ref_doc_id'])

## Indexer and engines

### Retrieve

In [None]:
retriever = index.as_retriever(
    similarity_top_k=3,
)

retriever.retrieve("What is Red Teaming?")

[NodeWithScore(node=TextNode(id_='3de935a5-f100-4af3-87d9-2971ed12c653', embedding=None, metadata={'page_label': '2', 'file_name': 'red-team-regulatory.pdf', 'file_path': '/Users/victor.gillioz/Documents/Projects/LauzHack/local-rag-lauzhack-workshop/docs/red-team-regulatory.pdf', 'file_type': 'application/pdf', 'file_size': 4589421, 'creation_date': '2024-11-08', 'last_modified_date': '2024-11-08'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='fab49662-8d3b-400c-96c2-d789ab28787b', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'page_label': '2', 'file_name': 'red-team-regulatory.pdf', 'file_path': '/Users/victor.gillioz/Documents/Projects/LauzHack/local-rag-lauzhack-workshop/docs/red-team-regulatory.pd

### Query

In [None]:
query_engine = index.as_query_engine(
    llm=llm,
    similartiy_top_k=3,
)

response = query_engine.query("What is Red Teaming?")

In [None]:
print(response)

A structured and strategic approach to evaluating an organization's security measures. It simulates real-world cyber threats and assesses an organization's overall preparedness by probing for weaknesses and vulnerabilities within its security systems and procedures. A red team consists of cybersecurity experts who play the role of adversaries, mimicking the tactics, techniques, and procedures of real-world malicious threat actors to highlight gaps in the security response.


In [None]:
print(response.get_formatted_sources())

> Source (Doc id: 3de935a5-f100-4af3-87d9-2971ed12c653): 3/23/24, 1:17 PM The Role of Red Teaming in Regulatory Compliance and Risk Management | by John N...

> Source (Doc id: b86dfac4-2359-4f0d-aed8-e43d2d7a1f49): 3/23/24, 1:08 PM Red teaming 101: An introduction to red teaming and how it improves your cyber s...


In [None]:
query_engine = index.as_query_engine(
    llm=llm,
    similartiy_top_k=3,
    streaming=True,
)

response = query_engine.query("What is Red Teaming?")

In [None]:
response.print_response_stream()

A structured and strategic approach to evaluating an organization's security measures, simulating real-world cyber threats, and assessing overall preparedness. It involves a team of cybersecurity experts playing the role of adversaries, probing for weaknesses and vulnerabilities within an organization's security systems and procedures.

In [None]:
query_engine = index.as_query_engine(
    llm=llm,
    similartiy_top_k=3,
    streaming=True,
)

response = query_engine.query("What is Red Teaming?")

In [None]:
# custome prompt template
template = (
    "Imagine you are an advanced AI expert in cyber security laws, with access to all current and relevant legal documents, "
    "case studies, and expert analyses. Your goal is to provide insightful, accurate, and concise answers to questions in this domain.\n\n"
    "Here is some context related to the query:\n"
    "-----------------------------------------\n"
    "{context_str}\n"
    "-----------------------------------------\n"
    "Considering the above information, please respond to the following inquiry with detailed references to applicable laws, "
    "precedents, or principles where appropriate:\n\n"
    "Question: {query_str}\n\n"
    "Answer succinctly, starting with the phrase 'According to cyber security law,' and ensure your response is understandable to someone without a legal background."
)
qa_template = PromptTemplate(template)


query_engine = index.as_query_engine(
    llm=llm,
    similartiy_top_k=3,
    streaming=True,
    text_qa_template=qa_template,
)

response = query_engine.query("What is Red Teaming?")

In [None]:
response.print_response_stream()

According to cyber security law, Red Teaming refers to a structured and strategic approach to evaluating an organization's security measures by simulating real-world cyber threats and assessing its overall preparedness. This involves a team of cybersecurity experts playing the role of adversaries, probing for weaknesses and vulnerabilities within an organization's security systems and procedures.

Red Teaming services are designed to simulate how an organization could be targeted in a real-world attack, testing its capabilities to detect and respond to such an attack. The process is informed by threat intelligence reports, identifying relevant threat actors or current trends seen in the wild (such as human-operated ransomware campaigns).

[References: 

* John Nathan's article on "The Role of Red Teaming in Regulatory Compliance and Risk Management" (Medium)
* PwC UK's insight paper on "What is Red Teaming?" (PwC UK)

Precedents: The concept of Red Teaming is rooted in the idea that or

In [None]:
# Chat mode (not presented)

### Persistent DB

In [None]:
# Persistent Client
chroma_client = chromadb.PersistentClient("./chroma")
chroma_collection = chroma_client.get_or_create_collection("mydocs")

In [None]:
# Vector store
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)

In [None]:
# Index from existing vector store
index = VectorStoreIndex.from_vector_store(
    vector_store,
    embed_model=embed_model,
)

In [None]:
documents = SimpleDirectoryReader("./docs", recursive=True).load_data()

In [None]:
for doc in documents:
    index.insert(doc)

In [None]:
# Clear
# index.vector_store.clear()

In [None]:
# custome prompt template
template = (
    "Imagine you are an advanced AI expert in cyber security laws, with access to all current and relevant legal documents, "
    "case studies, and expert analyses. Your goal is to provide insightful, accurate, and concise answers to questions in this domain.\n\n"
    "Here is some context related to the query:\n"
    "-----------------------------------------\n"
    "{context_str}\n"
    "-----------------------------------------\n"
    "Considering the above information, please respond to the following inquiry with detailed references to applicable laws, "
    "precedents, or principles where appropriate:\n\n"
    "Question: {query_str}\n\n"
    "Answer succinctly, starting with the phrase 'According to cyber security law,' and ensure your response is understandable to someone without a legal background."
)
qa_template = PromptTemplate(template)


query_engine = index.as_query_engine(
    llm=llm,
    similartiy_top_k=3,
    streaming=True,
    text_qa_template=qa_template,
)

In [None]:
response = query_engine.query("What is Red Teaming?")

In [None]:
response.print_response_stream()

According to cybersecurity law, Red Teaming refers to a structured and strategic approach to evaluating an organization's security measures by simulating real-world cyber threats and assessing overall preparedness. This involves a team of cybersecurity experts playing the role of adversaries, probing for weaknesses and vulnerabilities within an organization's security systems and procedures.

(Citations: John Nathan, "The Role of Red Teaming in Regulatory Compliance and Risk Management" (2023), Medium; relevant legal documents and precedents not explicitly stated).

In simpler terms, Red Teaming is a simulated cyberattack designed to test the robustness of an organization's cybersecurity measures. It goes beyond traditional security assessments by mimicking real-world attacks, helping organizations identify vulnerabilities and improve their overall preparedness.

Please note that this response focuses on the context provided, which primarily discusses the concept of Red Teaming in the 