intelowlproject · eshaan7 · Aug 13, 2020 · Aug 13, 2020
diff --git a/Dockerfile b/Dockerfile
@@ -4,6 +4,7 @@ ENV PYTHONUNBUFFERED 1
 ENV DJANGO_SETTINGS_MODULE intel_owl.settings
 ENV PYTHONPATH /opt/deploy/intel_owl
 ENV LOG_PATH /var/log/intel_owl
+ENV ELASTICSEARCH_DSL_VERSION 7.1.4
 
 RUN mkdir -p ${LOG_PATH} \
     ${LOG_PATH}/django ${LOG_PATH}/uwsgi \
@@ -21,7 +22,9 @@ RUN pip3 install --upgrade pip
 COPY requirements.txt $PYTHONPATH/requirements.txt
 WORKDIR $PYTHONPATH
 
-RUN pip3 install --compile -r requirements.txt
+RUN pip3 install --no-cache-dir --compile -r requirements.txt
+# install elasticsearch-dsl's appropriate version as specified by user
+RUN pip3 install --no-cache-dir django-elasticsearch-dsl==${ELASTICSEARCH_DSL_VERSION}
 
 COPY . $PYTHONPATH
 

diff --git a/api_app/api.py b/api_app/api.py
@@ -245,9 +245,9 @@ def send_analysis_request(request):
 
             # save the arrived data plus new params into a new job object
             serializer.save(**params)
-            job_id = serializer.data.get("id", "")
+            job_id = serializer.data.get("id", None)
             md5 = serializer.data.get("md5", "")
-            logger.info(f"new job_id {job_id} for md5 {md5}")
+            logger.info(f"New Job added with ID: #{job_id} and md5: {md5}.")
             if not job_id:
                 return Response({"error": "815"}, status=status.HTTP_400_BAD_REQUEST)
 
@@ -258,7 +258,7 @@ def send_analysis_request(request):
                 {"error": error_message}, status=status.HTTP_400_BAD_REQUEST
             )
 
-        is_sample = serializer.data.get("is_sample", "")
+        is_sample = serializer.data.get("is_sample", False)
         if not test:
             general.start_analyzers(
                 params["analyzers_to_execute"], analyzers_config, job_id, md5, is_sample

diff --git a/api_app/documents.py b/api_app/documents.py
@@ -0,0 +1,48 @@
+from django_elasticsearch_dsl import Document, fields
+from django_elasticsearch_dsl.registries import registry
+from .models import Job
+
+
+@registry.register_document
+class JobDocument(Document):
+    # Object/List fields
+    analyzers_requested = fields.ListField(fields.KeywordField())
+    analyzers_to_execute = fields.ListField(fields.KeywordField())
+    analysis_reports = fields.ObjectField()
+    # Normal fields
+    errors = fields.TextField()
+    # Keyword fields to allow aggregations/vizualizations
+    source = fields.KeywordField()
+    md5 = fields.KeywordField()
+    status = fields.KeywordField()
+    observable_name = fields.KeywordField()
+    observable_classification = fields.KeywordField()
+    file_name = fields.KeywordField()
+    file_mimetype = fields.KeywordField()
+    # Nested (ForeignKey) fields
+    tags = fields.NestedField(
+        properties={"label": fields.KeywordField(), "color": fields.TextField()}
+    )
+
+    def prepare_analysis_reports(self, instance):
+        """
+        https://github.com/django-es/django-elasticsearch-dsl/issues/36
+        """
+        return instance.analysis_reports
+
+    class Index:
+        # Name of the Elasticsearch index
+        name = "jobs"
+
+    class Django:
+        model = Job  # The model associated with this Document
+
+        # The fields of the model you want to be indexed in Elasticsearch
+        fields = [
+            "is_sample",
+            "run_all_available_analyzers",
+            "received_request_time",
+            "finished_analysis_time",
+            "force_privacy",
+            "disable_external_analyzers",
+        ]
diff --git a/api_app/models.py b/api_app/models.py
@@ -6,7 +6,8 @@
 
 
 def file_directory_path(instance, filename):
-    return f"job_{timezone.now().strftime('%Y_%m_%d_%H_%M_%S')}_{filename}"
+    now = timezone.now().strftime("%Y_%m_%d_%H_%M_%S")
+    return f"job_{now}_{filename}"
 
 
 STATUS = [
@@ -29,20 +30,20 @@ def __str__(self):
 class Job(models.Model):
     source = models.CharField(max_length=50, blank=False, default="none")
     is_sample = models.BooleanField(blank=False, default=False)
-    md5 = models.CharField(max_length=50, blank=False)
-    observable_name = models.CharField(max_length=128, blank=True)
-    observable_classification = models.CharField(max_length=50, blank=True)
+    md5 = models.CharField(max_length=32, blank=False)
+    observable_name = models.CharField(max_length=512, blank=True)
+    observable_classification = models.CharField(max_length=12, blank=True)
     file_name = models.CharField(max_length=50, blank=True)
     file_mimetype = models.CharField(max_length=50, blank=True)
     status = models.CharField(
         max_length=32, blank=False, choices=STATUS, default="pending"
     )
     analyzers_requested = postgres_fields.ArrayField(
-        models.CharField(max_length=900), blank=True, default=list
+        models.CharField(max_length=128), blank=True, default=list
     )
     run_all_available_analyzers = models.BooleanField(blank=False, default=False)
     analyzers_to_execute = postgres_fields.ArrayField(
-        models.CharField(max_length=900), blank=True, default=list
+        models.CharField(max_length=128), blank=True, default=list
     )
     analysis_reports = postgres_fields.JSONField(default=list, null=True, blank=True)
     received_request_time = models.DateTimeField(auto_now_add=True)
@@ -63,7 +64,7 @@ def object_by_job_id(cls, job_id, transaction=False):
             else:
                 job_object = cls.objects.get(id=job_id)
         except cls.DoesNotExist:
-            raise AnalyzerRunException(f"no job_id {job_id} retrieved")
+            raise AnalyzerRunException(f"No Job with ID:{job_id} retrieved")
 
         return job_object
 

diff --git a/api_app/script_analyzers/classes.py b/api_app/script_analyzers/classes.py
@@ -133,14 +133,14 @@ def __init__(
 
     def before_run(self):
         logger.info(
-            "STARTED analyzer: {}, job_id: {}, observable: {}"
-            "".format(self.analyzer_name, self.job_id, self.observable_name)
+            f"STARTED analyzer: {self.__repr__()} -> "
+            f"Observable: {self.observable_name}."
         )
 
     def after_run(self):
         logger.info(
-            f"ENDED analyzer: {self.analyzer_name}, job_id: {self.job_id},"
-            f"observable: {self.observable_name}"
+            f"FINISHED analyzer: {self.__repr__()} -> "
+            f"Observable: {self.observable_name}."
         )
 
 
@@ -166,14 +166,14 @@ def __init__(
 
     def before_run(self):
         logger.info(
-            f"STARTED analyzer: {self.analyzer_name}, job_id: #{self.job_id}"
-            f" ({self.filename}, md5: {self.md5})"
+            f"STARTED analyzer: {self.__repr__()} -> "
+            f"File: ({self.filename}, md5: {self.md5})"
         )
 
     def after_run(self):
         logger.info(
-            f"ENDED analyzer: {self.analyzer_name}, job_id: #{self.job_id},"
-            f" ({self.filename}, md5: {self.md5})"
+            f"FINISHED analyzer: {self.__repr__()} -> "
+            f"File: ({self.filename}, md5: {self.md5})"
         )
 
 

diff --git a/api_app/script_analyzers/yara_repo_downloader.sh b/api_app/script_analyzers/yara_repo_downloader.sh
@@ -9,13 +9,13 @@
 cd /opt/deploy/yara
 
 # Intezer rules
-git clone https://github.com/intezer/yara-rules intezer_rules
+git clone --depth 1 https://github.com/intezer/yara-rules intezer_rules
 
 # McAfee rules
-git clone https://github.com/advanced-threat-research/Yara-Rules mcafee_rules
+git clone --depth 1 https://github.com/advanced-threat-research/Yara-Rules mcafee_rules
 
 # Yara community rules
-git clone https://github.com/Yara-Rules/rules.git
+git clone --depth 1 https://github.com/Yara-Rules/rules.git
 community_yara_index="/opt/deploy/yara/rules/index.yar"
 
 # remove broken or unwanted rules in Yara community rules
@@ -34,7 +34,7 @@ sed -i "/RANSOM_acroware.yar/d" $community_yara_index
 sed -i "/TOOLKIT_THOR_HackTools.yar/d" $community_yara_index
 
 # Florian Roth rules
-git clone https://github.com/Neo23x0/signature-base.git
+git clone --depth 1 https://github.com/Neo23x0/signature-base.git
 
 # removed signatures that use external variables
 cd /opt/deploy/yara/signature-base/yara

diff --git a/api_app/serializers.py b/api_app/serializers.py
@@ -81,7 +81,8 @@ class JobSerializer(ObjectPermissionsAssignmentMixin, serializers.ModelSerialize
 
     class Meta:
         model = Job
-        exclude = ("file",)
+        fields = "__all__"
+        extra_kwargs = {"file": {"write_only": True}}
 
     def get_permissions_map(self, created):
         """

diff --git a/configuration/Kibana-Saved-Conf.ndjson b/configuration/Kibana-Saved-Conf.ndjson
diff --git a/docs/source/Advanced-Usage.md b/docs/source/Advanced-Usage.md
@@ -0,0 +1,97 @@
+# Advanced Usage
+
+This page includes details about some advanced features that Intel Owl provides which can be optionally enabled. Namely,
+
+- [Elastic Search (with Kibana)](#elastic-search)
+- [Django Groups & Permissions](#django-groups-&-permissions)
+- [Optional Analyzers](#optional-analyzers)
+
+## Elastic Search
+
+Intel Owl makes use of [django-elasticsearch-dsl](https://django-elasticsearch-dsl.readthedocs.io/en/latest/about.html) to index Job results into elasticsearch. The `save` and `delete` operations are auto-synced so you always have the latest data in ES.
+
+In the `env_file_app_template`, you'd see various elasticsearch related environment variables. The user should spin their own Elastic Search instance and configure these variables.
+
+#### Kibana
+
+Intel Owl provides a saved configuration (with example dashboard and visualizations) for Kibana. It can be downloaded from [here](https://github.com/intelowlproject/IntelOwl/blob/develop/configuration/Kibana-Saved-Conf.ndjson) and can be imported into Kibana.
+
+#### Example Configuration
+
+1. Setup [Elastic Search and Kibana](https://hub.docker.com/r/nshou/elasticsearch-kibana/) and say it is running in a docker service with name `elk` on port `9200` which is exposed to the shared docker network.
+2. In the `env_file_app`, we set `ELASTICSEARCH_ENABLED` to `True` and `ELASTICSEARCH_HOST` to `elk:9200`.
+3. In the `Dockerfile`, set the correct version in `ELASTICSEARCH_DSL_VERSION` [depending on the version](https://django-elasticsearch-dsl.readthedocs.io/en/latest/about.html#features) of our elasticsearch server. Default value is `7.1.4`.
+4. Rebuild the docker images with `docker-compose build` (required only if `ELASTICSEARCH_DSL_VERSION` was changed)
+5. Now start the docker containers and execute,
+
+  ```bash
+  docker exec -ti intel_owl_uwsgi python manage.py search_index --rebuild
+  ```
+
+  This will build and populate all existing job objects into the `jobs` index.
+
+
+## Django Groups & Permissions
+The application makes use of [Django's built-in permissions system](https://docs.djangoproject.com/en/3.0/topics/auth/default/#permissions-and-authorization). It provides a way to assign permissions to specific users and groups of users.
+
+As an administrator here's what you need to know,
+- Each user should belong to atleast a single group and permissions should be assigned to these groups. Please refrain from assigning user level permissions.
+- When you create a first normal user, a group with name `DefaultGlobal` is created with all permissions granted. Every new user automatically gets added to this group.
+   - This is done because most admins won't need to deal with user permissions and this way, they don't have to.
+   - If you don't want a global group (with all permissions) but custom groups with custom permissions,
+   just strip `DefaultGlobal` of all permissions but do *not* delete it.
+
+The permissions work the way one would expect,
+- `api_app | job | Can view job` allows users to fetch list of all jobs he/she has permission for or a particular job with it's ID.
+- `api_app | job | Can create job` allows users to request new analysis. When user creates a job (requests new analysis),
+    - the object level `view` permission is applied to all groups the requesting user belongs to or to all groups (depending on the parameters passed). 
+    - the object level `change` and `delete` permission is restricted to superusers/admin.
+- `api_app | tag | Can create tag` allows users to create new tags. When user creates a new tag,
+    - this new tag is visible (object level `view` permission) to each and every group but,
+    - the object level `change` and `delete` permission is given to only those groups the requesting user belongs to. 
+    - This is done because tag labels and colors are unique columns and the admin in most cases would want to define tags that are usable (but not modifiable) by users of all groups.
+- `api_app | tag | Can view tag` allows users to fetch list of all tags or a particular tag with it's ID.
+- `api_app | tag | Can change tag` allows users to edit a tag granted that user has the object level permission for the particular tag.
+
+## Optional Analyzers
+Some analyzers which run in their own Docker containers are kept disabled by default. They are disabled by default to prevent accidentally starting too many containers and making your computer unresponsive.
+
+<style>
+table, th, td {
+  padding: 5px;
+  border: 1px solid black;
+  border-collapse: collapse;
+}
+</style>
+<table style="width:100%">
+  <tr>
+    <th>Name</th>
+    <th>Analyzers</th>
+  </tr>
+  <tr>
+    <td>PEframe</td>
+    <td><code>PEframe_Scan</code></td>
+  </tr>
+  <tr>
+    <td>Thug</td>
+    <td><code>Thug_URL_Info_*</code>, <code>Thug_HTML_Info_*</code></td>
+  </tr>
+  <tr>
+    <td>FireEye Capa</td>
+    <td><code>Capa_Info</code></td>
+  </tr>
+  <tr>
+    <td>Box-JS</td>
+    <td><code>BoxJS_Scan_JavaScript</code></td>
+  </tr>
+  <tr>
+    <td>APK Analyzers</td>
+    <td><code>APKiD_Scan_APK_DEX_JAR</code></td>
+  </tr>
+</table>
+
+In the project, you can find template files named `.env_template` and `.env_file_integrations_template`.
+You have to create new files named `.env` and `env_file_integrations` from these two templates.
+
+Docker services defined in the compose files added in `COMPOSE_FILE` variable present in the `.env` file are ran on `docker-compose up`. So, modify it to include only the analyzers you wish to use.
+Such compose files are available under `integrations/`.