Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add elasticsearch support, add --depth to git clone commands + other improvments #147

Merged
merged 1 commit into from Aug 13, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
5 changes: 4 additions & 1 deletion Dockerfile
Expand Up @@ -4,6 +4,7 @@ ENV PYTHONUNBUFFERED 1
ENV DJANGO_SETTINGS_MODULE intel_owl.settings
ENV PYTHONPATH /opt/deploy/intel_owl
ENV LOG_PATH /var/log/intel_owl
ENV ELASTICSEARCH_DSL_VERSION 7.1.4

RUN mkdir -p ${LOG_PATH} \
${LOG_PATH}/django ${LOG_PATH}/uwsgi \
Expand All @@ -21,7 +22,9 @@ RUN pip3 install --upgrade pip
COPY requirements.txt $PYTHONPATH/requirements.txt
WORKDIR $PYTHONPATH

RUN pip3 install --compile -r requirements.txt
RUN pip3 install --no-cache-dir --compile -r requirements.txt
# install elasticsearch-dsl's appropriate version as specified by user
RUN pip3 install --no-cache-dir django-elasticsearch-dsl==${ELASTICSEARCH_DSL_VERSION}

COPY . $PYTHONPATH

Expand Down
6 changes: 3 additions & 3 deletions api_app/api.py
Expand Up @@ -245,9 +245,9 @@ def send_analysis_request(request):

# save the arrived data plus new params into a new job object
serializer.save(**params)
job_id = serializer.data.get("id", "")
job_id = serializer.data.get("id", None)
md5 = serializer.data.get("md5", "")
logger.info(f"new job_id {job_id} for md5 {md5}")
logger.info(f"New Job added with ID: #{job_id} and md5: {md5}.")
if not job_id:
return Response({"error": "815"}, status=status.HTTP_400_BAD_REQUEST)

Expand All @@ -258,7 +258,7 @@ def send_analysis_request(request):
{"error": error_message}, status=status.HTTP_400_BAD_REQUEST
)

is_sample = serializer.data.get("is_sample", "")
is_sample = serializer.data.get("is_sample", False)
if not test:
general.start_analyzers(
params["analyzers_to_execute"], analyzers_config, job_id, md5, is_sample
Expand Down
48 changes: 48 additions & 0 deletions api_app/documents.py
@@ -0,0 +1,48 @@
from django_elasticsearch_dsl import Document, fields
from django_elasticsearch_dsl.registries import registry
from .models import Job


@registry.register_document
class JobDocument(Document):
# Object/List fields
analyzers_requested = fields.ListField(fields.KeywordField())
analyzers_to_execute = fields.ListField(fields.KeywordField())
analysis_reports = fields.ObjectField()
# Normal fields
errors = fields.TextField()
# Keyword fields to allow aggregations/vizualizations
source = fields.KeywordField()
md5 = fields.KeywordField()
status = fields.KeywordField()
observable_name = fields.KeywordField()
observable_classification = fields.KeywordField()
file_name = fields.KeywordField()
file_mimetype = fields.KeywordField()
# Nested (ForeignKey) fields
tags = fields.NestedField(
properties={"label": fields.KeywordField(), "color": fields.TextField()}
)

def prepare_analysis_reports(self, instance):
"""
https://github.com/django-es/django-elasticsearch-dsl/issues/36
"""
return instance.analysis_reports

class Index:
# Name of the Elasticsearch index
name = "jobs"

class Django:
model = Job # The model associated with this Document

# The fields of the model you want to be indexed in Elasticsearch
fields = [
"is_sample",
"run_all_available_analyzers",
"received_request_time",
"finished_analysis_time",
"force_privacy",
"disable_external_analyzers",
]
15 changes: 8 additions & 7 deletions api_app/models.py
Expand Up @@ -6,7 +6,8 @@


def file_directory_path(instance, filename):
return f"job_{timezone.now().strftime('%Y_%m_%d_%H_%M_%S')}_{filename}"
now = timezone.now().strftime("%Y_%m_%d_%H_%M_%S")
return f"job_{now}_{filename}"


STATUS = [
Expand All @@ -29,20 +30,20 @@ def __str__(self):
class Job(models.Model):
source = models.CharField(max_length=50, blank=False, default="none")
is_sample = models.BooleanField(blank=False, default=False)
md5 = models.CharField(max_length=50, blank=False)
observable_name = models.CharField(max_length=128, blank=True)
observable_classification = models.CharField(max_length=50, blank=True)
md5 = models.CharField(max_length=32, blank=False)
observable_name = models.CharField(max_length=512, blank=True)
observable_classification = models.CharField(max_length=12, blank=True)
file_name = models.CharField(max_length=50, blank=True)
file_mimetype = models.CharField(max_length=50, blank=True)
status = models.CharField(
max_length=32, blank=False, choices=STATUS, default="pending"
)
analyzers_requested = postgres_fields.ArrayField(
models.CharField(max_length=900), blank=True, default=list
models.CharField(max_length=128), blank=True, default=list
)
run_all_available_analyzers = models.BooleanField(blank=False, default=False)
analyzers_to_execute = postgres_fields.ArrayField(
models.CharField(max_length=900), blank=True, default=list
models.CharField(max_length=128), blank=True, default=list
)
analysis_reports = postgres_fields.JSONField(default=list, null=True, blank=True)
received_request_time = models.DateTimeField(auto_now_add=True)
Expand All @@ -63,7 +64,7 @@ def object_by_job_id(cls, job_id, transaction=False):
else:
job_object = cls.objects.get(id=job_id)
except cls.DoesNotExist:
raise AnalyzerRunException(f"no job_id {job_id} retrieved")
raise AnalyzerRunException(f"No Job with ID:{job_id} retrieved")

return job_object

Expand Down
16 changes: 8 additions & 8 deletions api_app/script_analyzers/classes.py
Expand Up @@ -133,14 +133,14 @@ def __init__(

def before_run(self):
logger.info(
"STARTED analyzer: {}, job_id: {}, observable: {}"
"".format(self.analyzer_name, self.job_id, self.observable_name)
f"STARTED analyzer: {self.__repr__()} -> "
f"Observable: {self.observable_name}."
)

def after_run(self):
logger.info(
f"ENDED analyzer: {self.analyzer_name}, job_id: {self.job_id},"
f"observable: {self.observable_name}"
f"FINISHED analyzer: {self.__repr__()} -> "
f"Observable: {self.observable_name}."
)


Expand All @@ -166,14 +166,14 @@ def __init__(

def before_run(self):
logger.info(
f"STARTED analyzer: {self.analyzer_name}, job_id: #{self.job_id}"
f" ({self.filename}, md5: {self.md5})"
f"STARTED analyzer: {self.__repr__()} -> "
f"File: ({self.filename}, md5: {self.md5})"
)

def after_run(self):
logger.info(
f"ENDED analyzer: {self.analyzer_name}, job_id: #{self.job_id},"
f" ({self.filename}, md5: {self.md5})"
f"FINISHED analyzer: {self.__repr__()} -> "
f"File: ({self.filename}, md5: {self.md5})"
)


Expand Down
8 changes: 4 additions & 4 deletions api_app/script_analyzers/yara_repo_downloader.sh
Expand Up @@ -9,13 +9,13 @@
cd /opt/deploy/yara

# Intezer rules
git clone https://github.com/intezer/yara-rules intezer_rules
git clone --depth 1 https://github.com/intezer/yara-rules intezer_rules

# McAfee rules
git clone https://github.com/advanced-threat-research/Yara-Rules mcafee_rules
git clone --depth 1 https://github.com/advanced-threat-research/Yara-Rules mcafee_rules

# Yara community rules
git clone https://github.com/Yara-Rules/rules.git
git clone --depth 1 https://github.com/Yara-Rules/rules.git
community_yara_index="/opt/deploy/yara/rules/index.yar"

# remove broken or unwanted rules in Yara community rules
Expand All @@ -34,7 +34,7 @@ sed -i "/RANSOM_acroware.yar/d" $community_yara_index
sed -i "/TOOLKIT_THOR_HackTools.yar/d" $community_yara_index

# Florian Roth rules
git clone https://github.com/Neo23x0/signature-base.git
git clone --depth 1 https://github.com/Neo23x0/signature-base.git

# removed signatures that use external variables
cd /opt/deploy/yara/signature-base/yara
Expand Down
3 changes: 2 additions & 1 deletion api_app/serializers.py
Expand Up @@ -81,7 +81,8 @@ class JobSerializer(ObjectPermissionsAssignmentMixin, serializers.ModelSerialize

class Meta:
model = Job
exclude = ("file",)
fields = "__all__"
extra_kwargs = {"file": {"write_only": True}}

def get_permissions_map(self, created):
"""
Expand Down
9 changes: 9 additions & 0 deletions configuration/Kibana-Saved-Conf.ndjson

Large diffs are not rendered by default.

97 changes: 97 additions & 0 deletions docs/source/Advanced-Usage.md
@@ -0,0 +1,97 @@
# Advanced Usage

This page includes details about some advanced features that Intel Owl provides which can be optionally enabled. Namely,

- [Elastic Search (with Kibana)](#elastic-search)
- [Django Groups & Permissions](#django-groups-&-permissions)
- [Optional Analyzers](#optional-analyzers)

## Elastic Search

Intel Owl makes use of [django-elasticsearch-dsl](https://django-elasticsearch-dsl.readthedocs.io/en/latest/about.html) to index Job results into elasticsearch. The `save` and `delete` operations are auto-synced so you always have the latest data in ES.

In the `env_file_app_template`, you'd see various elasticsearch related environment variables. The user should spin their own Elastic Search instance and configure these variables.

#### Kibana

Intel Owl provides a saved configuration (with example dashboard and visualizations) for Kibana. It can be downloaded from [here](https://github.com/intelowlproject/IntelOwl/blob/develop/configuration/Kibana-Saved-Conf.ndjson) and can be imported into Kibana.

#### Example Configuration

1. Setup [Elastic Search and Kibana](https://hub.docker.com/r/nshou/elasticsearch-kibana/) and say it is running in a docker service with name `elk` on port `9200` which is exposed to the shared docker network.
2. In the `env_file_app`, we set `ELASTICSEARCH_ENABLED` to `True` and `ELASTICSEARCH_HOST` to `elk:9200`.
3. In the `Dockerfile`, set the correct version in `ELASTICSEARCH_DSL_VERSION` [depending on the version](https://django-elasticsearch-dsl.readthedocs.io/en/latest/about.html#features) of our elasticsearch server. Default value is `7.1.4`.
4. Rebuild the docker images with `docker-compose build` (required only if `ELASTICSEARCH_DSL_VERSION` was changed)
5. Now start the docker containers and execute,

```bash
docker exec -ti intel_owl_uwsgi python manage.py search_index --rebuild
```

This will build and populate all existing job objects into the `jobs` index.


## Django Groups & Permissions
The application makes use of [Django's built-in permissions system](https://docs.djangoproject.com/en/3.0/topics/auth/default/#permissions-and-authorization). It provides a way to assign permissions to specific users and groups of users.

As an administrator here's what you need to know,
- Each user should belong to atleast a single group and permissions should be assigned to these groups. Please refrain from assigning user level permissions.
- When you create a first normal user, a group with name `DefaultGlobal` is created with all permissions granted. Every new user automatically gets added to this group.
- This is done because most admins won't need to deal with user permissions and this way, they don't have to.
- If you don't want a global group (with all permissions) but custom groups with custom permissions,
just strip `DefaultGlobal` of all permissions but do *not* delete it.

The permissions work the way one would expect,
- `api_app | job | Can view job` allows users to fetch list of all jobs he/she has permission for or a particular job with it's ID.
- `api_app | job | Can create job` allows users to request new analysis. When user creates a job (requests new analysis),
- the object level `view` permission is applied to all groups the requesting user belongs to or to all groups (depending on the parameters passed).
- the object level `change` and `delete` permission is restricted to superusers/admin.
- `api_app | tag | Can create tag` allows users to create new tags. When user creates a new tag,
- this new tag is visible (object level `view` permission) to each and every group but,
- the object level `change` and `delete` permission is given to only those groups the requesting user belongs to.
- This is done because tag labels and colors are unique columns and the admin in most cases would want to define tags that are usable (but not modifiable) by users of all groups.
- `api_app | tag | Can view tag` allows users to fetch list of all tags or a particular tag with it's ID.
- `api_app | tag | Can change tag` allows users to edit a tag granted that user has the object level permission for the particular tag.

## Optional Analyzers
Some analyzers which run in their own Docker containers are kept disabled by default. They are disabled by default to prevent accidentally starting too many containers and making your computer unresponsive.

<style>
table, th, td {
padding: 5px;
border: 1px solid black;
border-collapse: collapse;
}
</style>
<table style="width:100%">
<tr>
<th>Name</th>
<th>Analyzers</th>
</tr>
<tr>
<td>PEframe</td>
<td><code>PEframe_Scan</code></td>
</tr>
<tr>
<td>Thug</td>
<td><code>Thug_URL_Info_*</code>, <code>Thug_HTML_Info_*</code></td>
</tr>
<tr>
<td>FireEye Capa</td>
<td><code>Capa_Info</code></td>
</tr>
<tr>
<td>Box-JS</td>
<td><code>BoxJS_Scan_JavaScript</code></td>
</tr>
<tr>
<td>APK Analyzers</td>
<td><code>APKiD_Scan_APK_DEX_JAR</code></td>
</tr>
</table>

In the project, you can find template files named `.env_template` and `.env_file_integrations_template`.
You have to create new files named `.env` and `env_file_integrations` from these two templates.

Docker services defined in the compose files added in `COMPOSE_FILE` variable present in the `.env` file are ran on `docker-compose up`. So, modify it to include only the analyzers you wish to use.
Such compose files are available under `integrations/`.