Merge pull request #565 from cisagov/DJ_Reportlab_generator_PR

Update report generator to use reportlab
cisagov · Sep 19, 2023 · 315a0a1 · 315a0a1
2 parents 998fa20 + b0ce88b
commit 315a0a1
Show file tree

Hide file tree

Showing 15 changed files with 4,122 additions and 463 deletions.
diff --git a/setup.py b/setup.py
@@ -106,6 +106,7 @@ def get_version(version_file):
         "chevron == 0.14.0",
         "celery",
         "click",
+        "demoji",
         "dnstwist",
         "docopt",
         "DShield",

diff --git a/src/pe_reports/__init__.py b/src/pe_reports/__init__.py
@@ -46,6 +46,7 @@
 app.config["CELERY_RESULT_BACKEND"] = "redis://localhost:6379/0"
 
 CENTRAL_LOGGING_FILE = "pe_reports_logging.log"
+PROJECT_ROOT = os.path.abspath(os.path.dirname(__file__))
 DEBUG = False
 # Setup Logging
 """Set up logging and call the run_pe_script function."""

diff --git a/src/pe_reports/charts.py b/src/pe_reports/charts.py
@@ -4,9 +4,13 @@
 import os
 
 # Third-Party Libraries
+import matplotlib
 import matplotlib.pyplot as plt
 from matplotlib.ticker import MaxNLocator
 
+matplotlib.use("Agg")
+
+
 # Factor to convert cm to inches
 CM_CONVERSION_FACTOR = 2.54
 
@@ -161,43 +165,122 @@ def line_chart(self):
         width = self.width
         height = self.height
         name = self.name
-        color = ["#1357BE", "#D0342C"]
+        color = ["#7aa5c1", "#e08493"]
         fig, ax = plt.subplots()
         ax.spines["right"].set_visible(False)
         ax.spines["top"].set_visible(False)
         plt.set_loglevel("WARNING")
-        # Generate lines for chart and add data labels
-        for col in range(len(df.columns)):
+        # Plot first line on chart
+        plt.plot(
+            df.index,
+            df[df.columns[0]],
+            color=color[0],
+            label=df.columns[0],
+            linewidth=3,
+            marker=".",
+            markersize=10,
+        )
+        # If there is another column chart the second line
+        if len(df.columns) == 2:
             plt.plot(
-                df.index, df[df.columns[col]], color=color[col], label=df.columns[col]
+                df.index,
+                df[df.columns[1]],
+                color=color[1],
+                label=df.columns[1],
+                linewidth=3,
+                linestyle="dashed",
+                marker=".",
+                markersize=10,
             )
-            for i, j in df[df.columns[col]].items():
-                if int(j):
-                    plt.annotate(
-                        str(int(j)),
-                        xy=(i, j),
-                        textcoords="offset points",  # how to position the text
-                        xytext=(
-                            0,
-                            5,
-                        ),  # distance from text to points (x,y)
-                        ha="center",  # horizontal alignment can be left, right or center
-                    )
-        # Specify axis attributes
-        plt.ylim(ymin=0, ymax=int(df[df.columns].max().max() * 1.15))
+        # Set the y-max to 110% of the max y value
+        y_max = int(df[df.columns].max().max() * 1.1)
+        plt.ylim(ymin=0, ymax=y_max * 1.10)
+        # Place the legend in the upper right corner
+        plt.legend(loc="upper right")
+        # Set size of the chart
+        plt.gcf().set_size_inches(
+            width / CM_CONVERSION_FACTOR, height / CM_CONVERSION_FACTOR
+        )
+        # Format tick marks and grid layout
         plt.xticks(fontsize=7)
         plt.yticks(fontsize=7)
         plt.gca().set_ylabel(y_label, labelpad=10, fontdict={"size": 8})
         plt.xlabel(x_label, labelpad=10, fontdict={"size": 8})
         plt.xticks(rotation=0)
         plt.grid(axis="y")
-        # Add legend
-        plt.legend(loc="upper right")
-        # Set sizing for image
-        plt.gcf().set_size_inches(
-            width / CM_CONVERSION_FACTOR, height / CM_CONVERSION_FACTOR
-        )
         plt.tight_layout()
+
+        # Add data labels
+        # Loop through the dataframe
+        for row in df.itertuples():
+            # Check if there is only one row of values
+            if len(row) == 2:
+                plt.annotate(
+                    str(int(row[1])),
+                    xy=(row[0], row[1]),
+                    textcoords="offset points",  # Set the manner to position the text
+                    xytext=(
+                        0,
+                        8,
+                    ),  # Distance from text to points (x,y)
+                    ha="center",  # Set horizontal alignment to center
+                    color="#003e67",
+                )
+                # Check if there are two rows of data
+            elif len(row) == 3:
+                # Check if the two values are within 1/10th of the max y value
+                value_diff = abs(row[1] - row[2])
+                if value_diff < y_max / 10:
+                    # If the values are on the bottom quarter of the graph don't label below values
+                    if min(row[1], row[2]) < y_max / 4:
+                        y1 = y2 = max(row[1], row[2])
+                        if row[1] > row[2]:
+                            y1_offset = 18
+                            y2_offset = 8
+                        else:
+                            y1_offset = 8
+                            y2_offset = 18
+                    # If the values are not in the bottom quarter place the lower value below the point
+                    else:
+                        y1 = row[1]
+                        y2 = row[2]
+                        if row[1] > row[2]:
+                            y1_offset = 8
+                            y2_offset = -17
+                        else:
+                            y1_offset = -17
+                            y2_offset = 8
+                # If values are not close to each other put the labels directly above the value
+                else:
+                    y1 = row[1]
+                    y2 = row[2]
+                    y1_offset = 8
+                    y2_offset = 8
+
+                # Annotate the data points
+                plt.annotate(
+                    str(int(row[1])),
+                    xy=(row[0], y1),
+                    textcoords="offset points",  # Set how to position the text
+                    xytext=(
+                        0,
+                        y1_offset,
+                    ),  # Distance from text to points (x,y)
+                    ha="center",  # Horizontal alignment can be left, right or center
+                    color="#005288",
+                )
+                plt.annotate(
+                    str(int(row[2])),
+                    xy=(row[0], y2),
+                    textcoords="offset points",  # Set how to position the text
+                    xytext=(
+                        0,
+                        y2_offset,
+                    ),  # Distance from text to points (x,y)
+                    ha="center",  # Set horizontal alignment to center
+                    # fontsize=2,
+                    color="#c41230",
+                )
         # Save chart to assets directory
         plt.savefig(
             BASE_DIR + "/assets/" + name, transparent=True, dpi=500, bbox_inches="tight"

diff --git a/src/pe_reports/data/db_query.py b/src/pe_reports/data/db_query.py
@@ -170,12 +170,36 @@ def query_breachdetails_view(org_uid, start_date, end_date):
 
 
 def query_domMasq(org_uid, start_date, end_date):
-    """Query domain masquerading table."""
+    """Query domain_permuations associated with a given org."""
     conn = connect()
     try:
-        sql = """SELECT * FROM dnstwist_domain_masq
+        sql = """SELECT * FROM domain_permutations
         WHERE organizations_uid = %(org_uid)s
-        AND date_observed BETWEEN %(start_date)s AND %(end_date)s"""
+        AND date_active BETWEEN %(start_date)s AND %(end_date)s"""
+        df = pd.read_sql(
+            sql,
+            conn,
+            params={
+                "org_uid": org_uid,
+                "start_date": start_date,
+                "end_date": end_date,
+            },
+        )
+        return df
+    except (Exception, psycopg2.DatabaseError) as error:
+        LOGGER.error("There was a problem with your database query %s", error)
+    finally:
+        if conn is not None:
+            close(conn)
+
+
+def query_domMasq_alerts(org_uid, start_date, end_date):
+    """Query domain alerts table."""
+    conn = connect()
+    try:
+        sql = """SELECT * FROM domain_alerts
+        WHERE organizations_uid = %(org_uid)s
+        AND date BETWEEN %(start_date)s AND %(end_date)s"""
         df = pd.read_sql(
             sql,
             conn,
@@ -205,10 +229,15 @@ def query_shodan(org_uid, start_date, end_date, table):
     """Query Shodan table."""
     conn = connect()
     try:
+        df = pd.DataFrame()
+        df_list = []
+        chunk_size = 1000
         sql = """SELECT * FROM %(table)s
         WHERE organizations_uid = %(org_uid)s
         AND timestamp BETWEEN %(start_date)s AND %(end_date)s"""
-        df = pd.read_sql(
+        count = 0
+        # Batch SQL call to reduce memory (https://pythonspeed.com/articles/pandas-sql-chunking/)
+        for chunk_df in pd.read_sql(
             sql,
             conn,
             params={
@@ -217,7 +246,24 @@ def query_shodan(org_uid, start_date, end_date, table):
                 "start_date": start_date,
                 "end_date": end_date,
             },
-        )
+            chunksize=chunk_size,
+        ):
+            count += 1
+            df_list.append(chunk_df)
+
+        if len(df_list) == 0:
+            df = pd.read_sql(
+                sql,
+                conn,
+                params={
+                    "table": AsIs(table),
+                    "org_uid": org_uid,
+                    "start_date": start_date,
+                    "end_date": end_date,
+                },
+            )
+        else:
+            df = pd.concat(df_list, ignore_index=True)
         return df
     except (Exception, psycopg2.DatabaseError) as error:
         LOGGER.error("There was a problem with your database query %s", error)

diff --git a/src/pe_reports/fonts/FranklinGothicBook.ttf b/src/pe_reports/fonts/FranklinGothicBook.ttf
diff --git a/src/pe_reports/fonts/FranklinGothicBookItalic.ttf b/src/pe_reports/fonts/FranklinGothicBookItalic.ttf
diff --git a/src/pe_reports/fonts/FranklinGothicDemiRegular.ttf b/src/pe_reports/fonts/FranklinGothicDemiRegular.ttf
diff --git a/src/pe_reports/fonts/FranklinGothicMediumItalic.ttf b/src/pe_reports/fonts/FranklinGothicMediumItalic.ttf
diff --git a/src/pe_reports/fonts/FranklinGothicMediumRegular.ttf b/src/pe_reports/fonts/FranklinGothicMediumRegular.ttf