In [1]:
import requests
import pyspark
from pyspark.sql import SparkSession

spark = SparkSession.builder.master("local") \
    .appName("react-iceberg") \
    .config("spark.sql.extensions","org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions") \
    .config("spark.sql.catalog.spark_catalog","org.apache.iceberg.spark.SparkSessionCatalog") \
    .config("spark.sql.catalog.spark_catalog.type","hive") \
    .config("spark.sql.catalog.local","org.apache.iceberg.spark.SparkCatalog") \
    .config("spark.sql.catalog.local.type","hadoop") \
    .config("spark.sql.catalog.local.warehouse","/workspaces/react-iceberg/data/warehouse") \
    .getOrCreate()

df = spark.sql("select 'spark' as hello ")

df.show()

+-----+
|hello|
+-----+
|spark|
+-----+



In [2]:
import requests
from io import BytesIO
from zipfile import ZipFile

spark.conf.set("spark.sql.caseSensitive", "true")
url = 'https://raw.githubusercontent.com/OTRF/mordor/master/datasets/small/windows/persistence/host/proxylogon_ssrf_rce_poc.zip'
zipFileRequest = requests.get(url)
zipFile = ZipFile(BytesIO(zipFileRequest.content))
jsonFilePath = zipFile.extract(zipFile.namelist()[0],"./data")
jsonFilePath

'/workspaces/react-iceberg/proxylogon_ssrf_rce_poc_2021-03-14T01401970.json'

In [3]:
# Creating a Spark Dataframe
df2 = spark.read.json(jsonFilePath)
# Validating Type of Output
df2.createOrReplaceTempView("mordorTable")

spark.sql(
'''
SELECT Hostname,Channel,EventID, Count(*) as count
FROM mordorTable
GROUP BY Hostname,Channel,EventID
ORDER BY count DESC
'''
).show(truncate=False)

+----------------------+---------------------------------------------------+-------+-----+
|Hostname              |Channel                                            |EventID|count|
+----------------------+---------------------------------------------------+-------+-----+
|MXS01.azsentinel.local|Microsoft-Windows-Sysmon/Operational               |10     |3626 |
|MXS01.azsentinel.local|Security                                           |4658   |2398 |
|MXS01.azsentinel.local|Security                                           |4656   |1199 |
|MXS01.azsentinel.local|Security                                           |4690   |1199 |
|MXS01.azsentinel.local|Microsoft-Windows-Sysmon/Operational               |7      |427  |
|MXS01.azsentinel.local|Microsoft-Windows-Sysmon/Operational               |11     |301  |
|MXS01.azsentinel.local|Microsoft-Exchange-ActiveMonitoring/MonitorResult  |3      |293  |
|MXS01.azsentinel.local|Microsoft-Exchange-ActiveMonitoring/ResponderResult|4      |167  |

In [4]:
df2.write.format("iceberg").saveAsTable("local.db.table")               
# THIS IS THE CORRECT SYNTAX FOR Spark 3.1+ (but Iceberg doesn't work with it yet)
#     .writeTo("local.db.table").append()