# ATT&CK Knowledge Graph Exploration

This notebook walks through building and querying the MITRE ATT&CK knowledge graph.

## Setup

First, let's import the modules and set up paths.

In [None]:
import sys
sys.path.insert(0, '..')

from pathlib import Path

DATA_DIR = Path('../data')
GRAPH_DIR = DATA_DIR / 'graph'
VECTOR_DIR = DATA_DIR / 'vectors'

## 1. Download STIX Data

Download the ATT&CK STIX bundle from GitHub.

In [None]:
from src.ingest.download import download_attack_data, load_stix_bundle, print_stix_summary

# Download (uses cache if already downloaded)
stix_file = download_attack_data(DATA_DIR)

# Load and inspect
bundle = load_stix_bundle(stix_file)
print_stix_summary(bundle)

## 2. Explore STIX Structure

Let's look at what a STIX technique object looks like.

In [None]:
from src.ingest.download import get_objects_by_type
import json

by_type = get_objects_by_type(bundle)

# Find a specific technique (Password Spraying)
techniques = by_type['attack-pattern']
password_spraying = next(
    (t for t in techniques if 'Password Spraying' in t.get('name', '')),
    None
)

if password_spraying:
    print(json.dumps(password_spraying, indent=2))

## 3. Convert to RDF

Convert the STIX bundle to RDF triples.

In [None]:
from src.ingest.stix_to_rdf import StixToRdfConverter

converter = StixToRdfConverter()
graph = converter.convert(bundle)

# Save as Turtle
ttl_file = DATA_DIR / 'attack.ttl'
converter.save(ttl_file)

## 4. Query with SPARQL

Now let's run some SPARQL queries.

In [None]:
from src.store.graph import AttackGraph

# Load the graph (in-memory for exploration)
attack_graph = AttackGraph()
attack_graph.load_from_file(ttl_file)

# Get stats
stats = attack_graph.get_stats()
print("Knowledge Graph Stats:")
for key, value in stats.items():
    print(f"  {key}: {value}")

In [None]:
# Query: What techniques are in the Credential Access tactic?
attack_graph.print_query_results("""
PREFIX attack: <https://attack.mitre.org/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

SELECT ?id ?name WHERE {
    ?technique a attack:Technique ;
               attack:tactic attack:tactic/credential-access ;
               attack:attackId ?id ;
               rdfs:label ?name .
}
ORDER BY ?id
LIMIT 20
""", title="Credential Access Techniques")

In [None]:
# Query: What groups use Password Spraying (T1110.003)?
groups = attack_graph.get_groups_using_technique('T1110.003')
print(f"\nGroups using Password Spraying (T1110.003): {len(groups)}")
for g in groups[:10]:
    print(f"  - {g['name']} ({g['attack_id']})")

In [None]:
# Query: What mitigations help with Password Spraying?
mitigations = attack_graph.get_mitigations_for_technique('T1110.003')
print(f"\nMitigations for Password Spraying: {len(mitigations)}")
for m in mitigations:
    print(f"  - {m['name']} ({m['attack_id']})")

In [None]:
# Query: What techniques does APT29 use?
apt29_techniques = attack_graph.get_techniques_for_group('G0016')
print(f"\nAPT29 Techniques: {len(apt29_techniques)}")
for t in apt29_techniques[:15]:
    print(f"  - {t['name']} ({t['attack_id']})")
if len(apt29_techniques) > 15:
    print(f"  ... and {len(apt29_techniques) - 15} more")

## 5. Build Vector Store

Generate embeddings for semantic search.

In [None]:
from src.ingest.embeddings import build_vector_store

# Build vector store (this may take a minute)
vector_store = build_vector_store(attack_graph, VECTOR_DIR)

## 6. Semantic Search

Find techniques using natural language.

In [None]:
from src.store.vectors import SemanticSearch

searcher = SemanticSearch(VECTOR_DIR)

# Search for techniques
searcher.print_search_results("credential stuffing against web applications")

In [None]:
# Search with a finding description
searcher.print_search_results("""
The OWA instance accepts basic authentication, enabling password 
spraying attacks without triggering conditional access policies.
""")

In [None]:
# Search for lateral movement techniques
searcher.print_search_results("using stolen credentials to access other systems")

## 7. Hybrid Queries

Combine semantic search with graph queries.

In [None]:
from src.query.hybrid import HybridQueryEngine

hybrid = HybridQueryEngine(GRAPH_DIR, VECTOR_DIR)

# Query and get enriched results
result = hybrid.query("password spraying attacks")

print(f"Found {len(result.techniques)} techniques:\n")
for tech in result.techniques:
    print(f"\n{tech.name} ({tech.attack_id}) - Similarity: {tech.similarity:.3f}")
    print(f"  Tactics: {', '.join(tech.tactics)}")
    if tech.groups:
        print(f"  Used by: {', '.join(g['name'] for g in tech.groups[:5])}")
    if tech.mitigations:
        print(f"  Mitigated by: {', '.join(m['name'] for m in tech.mitigations[:3])}")

In [None]:
# Auto-tag a finding and get defense recommendations
finding = """
The Azure AD tenant allows password spray attacks due to:
- No smart lockout policy configured
- Legacy authentication protocols enabled
- No conditional access policies blocking risky sign-ins
"""

recommendations = hybrid.find_defenses_for_finding(finding)

print("Matched Techniques:")
for tech in recommendations['techniques']:
    print(f"  - {tech['name']} ({tech['attack_id']}) [{tech['similarity']:.2f}]")

print("\nRecommended Mitigations:")
for mit in recommendations['recommended_mitigations']:
    print(f"  - {mit['name']} ({mit['attack_id']})")
    print(f"    Addresses: {', '.join(mit['addresses_techniques'])}")

## 8. Threat Context

Get full context for a technique.

In [None]:
context = hybrid.get_threat_context('T1110.003')

print(f"Technique: {context['technique']['name']}")
print(f"\nDescription: {context['technique']['description'][:500]}...")

print(f"\nThreat Actors ({len(context['threat_actors'])}):\n")
for actor in context['threat_actors'][:10]:
    print(f"  - {actor['name']} ({actor['attack_id']})")

print(f"\nSimilar Techniques:")
for sim in context['similar_techniques']:
    print(f"  - {sim['name']} ({sim['attack_id']}) - {sim['similarity']:.3f}")

## Next Steps

- Add LLM integration for natural language Q&A
- Build CLI for interactive use
- Create API for Rails integration