# Target Pack Generator 
* Uses Mitre Attack to generate a threat hunt based on known techniques. 
    - https://raw.githubusercontent.com/mitre/cti/master/enterprise-attack/enterprise-attack.json
* Search for matching Sigma Rules 
    - https://github.com/SigmaHQ/sigma

### Fetch required data

In [None]:
import os
import requests

file_path = "mitre/enterprise-attack.json"
url = "https://raw.githubusercontent.com/mitre/cti/master/enterprise-attack/enterprise-attack.json"

if not os.path.exists(file_path):
    response = requests.get(url)
    with open(file_path, "wb") as file:
        file.write(response.content)
        print("File downloaded successfully.")
else:
    print("File already exists.")

### Load group ATT&CK groups from enterprise-attack

In [None]:
from mitreattack.stix20 import MitreAttackData

mitre_attack_data = MitreAttackData("mitre/enterprise-attack.json")
groups = mitre_attack_data.get_groups(remove_revoked_deprecated=True)
print(f"Retrieved {len(groups)} ATT&CK groups.")

### Select group to target

In [None]:
import ipywidgets as widgets

group_names = [group.name for group in groups]
selected_group = widgets.Select(options=group_names, description='Select Group:')
display(selected_group)

In [None]:
group = next((group for group in groups if group.name == selected_group.value), None)
print(group.description + "\n")
techniques_used_by_group = mitre_attack_data.get_techniques_used_by_group(group.id)
print(f"Number of techniques used by {group.name} : {len(techniques_used_by_group)}")

mitre_techniques_used_by_group = []

for t in techniques_used_by_group:
    if t["object"]:
        technique = t["object"]
        print(f"* {technique.name} ({mitre_attack_data.get_attack_id(technique.id)})")
        mitre_techniques_used_by_group.append(mitre_attack_data.get_attack_id(technique.id))
    else:
        print(str(t))


In [None]:
import os
import shutil
from git import Repo
import yaml
import pandas as pd

# URL of the Sigma repository
SIGMA_REPO_URL = 'https://github.com/SigmaHQ/sigma.git'
# Local folder to clone the repository into
LOCAL_FOLDER = 'sigma-rules'

# Clone the repository if the local folder does not exist
if not os.path.exists(LOCAL_FOLDER):
    Repo.clone_from(SIGMA_REPO_URL, LOCAL_FOLDER)

# Matched Sigma rules
sigma_rules = []

def search_sigma_rules(techniques, local_folder):
    """
    Search for Sigma rules in the specified local folder that match the given techniques.

    Parameters:
    techniques (list): A list of techniques to search for in the Sigma rules.
    local_folder (str): The local directory to search within.

    Returns:
    pd.DataFrame: A DataFrame containing the matched Sigma rules.
    """
    total_sigma_rules = 0

    # Iterate over each technique
    for technique in techniques:
        # Walk through the directory tree
        for root, dirs, files in os.walk(local_folder):
            for filename in files:
                if filename.endswith(".yml"):
                    filepath = os.path.join(root, filename)
                    try:
                        with open(filepath, "r") as file:
                            file_content = file.read()
                            file.seek(0)
                            try:
                                # Use safe_load_all to load multiple documents
                                for data in yaml.safe_load_all(file_content):
                                    # Check if the technique is in tags or file content
                                    if any(technique in str(tag) for tag in data.get("tags", [])) or technique in file_content:
                                        # Append the data to the sigma_rules list as a DataFrame
                                        sigma_rules.append(pd.DataFrame([data]))
                                        total_sigma_rules += 1
                            except yaml.YAMLError as e:
                                print(f"Error parsing YAML in {filepath}: {e}")
                    except FileNotFoundError:
                        print(f"File not found: {filepath}")
                    except Exception as e:
                        print(f"An error occurred while processing {filepath}: {e}")

    print(f"Total Sigma rules matched: {total_sigma_rules}")
    return pd.concat(sigma_rules, ignore_index=True) if sigma_rules else pd.DataFrame()


sigma_detection_rules = search_sigma_rules(mitre_techniques_used_by_group, LOCAL_FOLDER)

In [None]:
# Create visualizations for sigma_detection_rules DataFrame
import matplotlib.pyplot as plt
import seaborn as sns

# Count the number of rules per status
status_counts = sigma_detection_rules['status'].value_counts()

sigma_detection_rules['logsource_category'] = sigma_detection_rules['logsource'].apply(lambda x: x.get('category') if isinstance(x, dict) else None)
sigma_detection_rules['logsource_product'] = sigma_detection_rules['logsource'].apply(lambda x: x.get('product') if isinstance(x, dict) else None)


# Plot 1: Bar plot for Sigma Detection Rule Status Counts
plt.figure(figsize=(12, 6))
status_counts = sigma_detection_rules['status'].value_counts()
sns.barplot(x=status_counts.index, y=status_counts.values, palette='viridis')
plt.title('Sigma Detection Rule Status Counts')
plt.xlabel('Status')
plt.ylabel('Count')
plt.xticks(rotation=45)
plt.show()

# Plot 2: Bar plot for Distribution of Rules by Tags
plt.figure(figsize=(12, 6))
tags_counts = sigma_detection_rules['tags'].explode().value_counts()
sns.barplot(x=tags_counts.index, y=tags_counts.values, palette='viridis')
plt.title('Distribution of Rules by Tags')
plt.xlabel('Tags')
plt.ylabel('Count')
plt.xticks(rotation=45)
plt.show()

# Plot 3: Count plot for Rules by Category
if 'logsource_category' in sigma_detection_rules.columns:
    plt.figure(figsize=(12, 6))
    sns.countplot(data=sigma_detection_rules, x='logsource_category', palette='viridis')
    plt.title('Rules Count by Logsource Category')
    plt.xlabel('Logsource Category')
    plt.ylabel('Count')
    plt.xticks(rotation=45)
    plt.show()
else:
    print("Column 'logsource_category' not found in the DataFrame.")

# Plot 4: Count plot for Rules by Product
if 'logsource_product' in sigma_detection_rules.columns:
    plt.figure(figsize=(12, 6))
    sns.countplot(data=sigma_detection_rules, x='logsource_product', palette='viridis')
    plt.title('Rules Count by Logsource Product')
    plt.xlabel('Logsource Product')
    plt.ylabel('Count')
    plt.xticks(rotation=45)
    plt.show()
else:
    print("Column 'logsource_product' not found in the DataFrame.")

# Plot 5: PIE chart the column 'level' in sigma_detection_rules
plt.figure(figsize=(12, 6))
level_counts = sigma_detection_rules['level'].value_counts()
plt.pie(level_counts, labels=level_counts.index, autopct='%1.1f%%', startangle=140, colors=sns.color_palette('viridis', len(level_counts)))
plt.axis('equal')
plt.title('Distribution of Rules by Level')
plt.show()


In [None]:
# Table of Sigma detection rules names
sigma_detection_rules[['title']]