In [74]:
import logging
!pip install pyspark
from pyspark.sql import SparkSession
from pyspark.streaming import StreamingContext
from pyspark.sql.types import *

Defaulting to user installation because normal site-packages is not writeable


In [75]:
spark = (SparkSession.builder.appName("itoss-ai")
         .config("spark.jars","./postgresql-42.7.3.jar")
         .getOrCreate())

In [76]:
jdbcUrl = "jdbc:postgresql://localhost:5432/itossconfig"
connection_properties = {
    "driver": "org.postgresql.Driver",
    "user": "itoss",
    "password": "admin"
}
query = "(select c.*, w.name as ctWorkgroup, l.name as ctLocation, case when cs.down then 'down' else 'up' end as ctStatus, cs.last_status_change from ct c \
         inner join ct_status cs on c.id = cs.id \
         inner join workgroup w on c.workgroup_id = w.id \
         inner join location l on c.location_id = l.id \
         inner join ct_type ct on c.type_id = ct.id \
) as subquery"  
ctDf = spark.read.jdbc(url=jdbcUrl,table=query,properties=connection_properties)
ctDf.printSchema()



root
 |-- id: long (nullable = true)
 |-- created_by: string (nullable = true)
 |-- creation_date: timestamp (nullable = true)
 |-- last_modified_by: string (nullable = true)
 |-- last_modified_date: timestamp (nullable = true)
 |-- attributes: string (nullable = true)
 |-- key: string (nullable = true)
 |-- environment: string (nullable = true)
 |-- name: string (nullable = true)
 |-- old_password: string (nullable = true)
 |-- state: string (nullable = true)
 |-- collector_id: long (nullable = true)
 |-- company_id: long (nullable = true)
 |-- contact_id: long (nullable = true)
 |-- location_id: long (nullable = true)
 |-- monitoring_profile_id: long (nullable = true)
 |-- support_user_id: long (nullable = true)
 |-- type_id: long (nullable = true)
 |-- workgroup_id: long (nullable = true)
 |-- instrumentation_parameter_values: string (nullable = true)
 |-- old_crypted_property_values: string (nullable = true)
 |-- integration_id: string (nullable = true)
 |-- ctworkgroup: string (nu

In [77]:
queryStatusDelta = "(select * from ct_status_delta) as subquery"  
ctStatusDeltaDf = spark.read.jdbc(url=jdbcUrl,table=queryStatusDelta,properties=connection_properties)
ctStatusDeltaDf.printSchema()


root
 |-- ct_id: long (nullable = true)
 |-- status: string (nullable = true)
 |-- last_status_change: timestamp (nullable = true)
 |-- timestamp: timestamp (nullable = true)



In [78]:
from pyspark.sql.functions import col, count, lit, when, current_timestamp, expr, collect_list, struct
from pyspark.sql.types import StringType, IntegerType
import json

# 1. Filter the ct_status_delta table to include only records within the last hour
one_hour_ago = current_timestamp() - expr('INTERVAL 1 HOUR')
filtered_status_delta = ctStatusDeltaDf.filter(col("timestamp") >= one_hour_ago)

# 2. Join the filtered ct_status_delta table with the ct table
joinedDf = ctDf.join(filtered_status_delta, ctDf.id == filtered_status_delta.ct_id, "inner")

# 3. Calculate the counts for different statuses and environments within the last hour
statusCounts = joinedDf.groupBy("ctStatus").agg(count("*").alias("count_last_hour"))
#envCounts = joinedDf.groupBy("environment").agg(count("*").alias("count_last_hour"))

# Calculate total counts in the ct table
total_statusCounts = ctDf.groupBy("ctStatus").agg(count("*").alias("total_count"))
#total_envCounts = ctDf.groupBy("environment").agg(count("*").alias("total_count"))

statusCounts.show(5)
#envCounts.show(5)
total_statusCounts.show(5)
#total_envCounts.show(5)



+--------+---------------+
|ctStatus|count_last_hour|
+--------+---------------+
|    down|            927|
|      up|          10175|
+--------+---------------+

+--------+-----------+
|ctStatus|total_count|
+--------+-----------+
|    down|        924|
|      up|      10172|
+--------+-----------+



In [79]:

# 4. Calculate quantity and percentage variations
statusCounts = statusCounts.join(total_statusCounts, "ctStatus") \
    .withColumn("quantityVariation", col("count_last_hour") - col("total_count")) \
    .withColumn("percentageVariation", col("quantityVariation") / col("total_count") * 100)
statusCounts.printSchema()
statusCounts.show(5)


root
 |-- ctStatus: string (nullable = true)
 |-- count_last_hour: long (nullable = false)
 |-- total_count: long (nullable = false)
 |-- quantityVariation: long (nullable = false)
 |-- percentageVariation: double (nullable = true)

+--------+---------------+-----------+-----------------+--------------------+
|ctStatus|count_last_hour|total_count|quantityVariation| percentageVariation|
+--------+---------------+-----------+-----------------+--------------------+
|    down|            927|        924|                3|  0.3246753246753247|
|      up|          10175|      10172|                3|0.029492725127801808|
+--------+---------------+-----------+-----------------+--------------------+



In [80]:

#envCounts = envCounts.join(total_envCounts, "environment") \
#    .withColumn("quantityVariation", col("count_last_hour") - col("total_count")) \
#    .withColumn("percentageVariation", col("quantityVariation") / col("total_count") * 100)

#envCounts.printSchema()
#envCounts.show(5)

In [81]:

# 5. Calculate the trend
statusCounts = statusCounts.withColumn("trend", when(col("count_last_hour") > col("total_count"), "up")
                                      .when(col("count_last_hour") < col("total_count"), "down")
                                      .otherwise("stable"))

#envCounts = envCounts.withColumn("trend", when(col("count_last_hour") > col("total_count"), "up")
 #                                 .when(col("count_last_hour") < col("total_count"), "down")
 #                                 .otherwise("stable"))

statusCounts.printSchema()
statusCounts.show(5)

#envCounts.printSchema()
#envCounts.show(5)


root
 |-- ctStatus: string (nullable = true)
 |-- count_last_hour: long (nullable = false)
 |-- total_count: long (nullable = false)
 |-- quantityVariation: long (nullable = false)
 |-- percentageVariation: double (nullable = true)
 |-- trend: string (nullable = false)

+--------+---------------+-----------+-----------------+--------------------+-----+
|ctStatus|count_last_hour|total_count|quantityVariation| percentageVariation|trend|
+--------+---------------+-----------+-----------------+--------------------+-----+
|    down|            927|        924|                3|  0.3246753246753247|   up|
|      up|          10175|      10172|                3|0.029492725127801808|   up|
+--------+---------------+-----------+-----------------+--------------------+-----+

