In [28]:
from pyspark.sql.types import *
from pyspark.sql.functions import *


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

In [29]:
#Method to check the physical port
def is_physical_port(interface):
    return interface.startswith("ge-") or interface.startswith("xe-") or interface.startswith("re") or interface.startswith("et")


"""
Only External Iterface field is required to calculate the Uplink port so eliminating the other fields. 
This method eliminates the unrequired fields and returns list of external_interface
"""
@udf(returnType= ArrayType(StringType()))
def get_external_interface(ipsec):
	external_interfaces = []
	for item in ipsec:
		external_interfaces.append(item[22])
	return external_interfaces


#Reordering external interface on the basis of SVIStats
@udf(returnType= ArrayType(StringType()))
def external_interface_ordering(external_interfaces, svistats):
	result = []
	dev = svistats[0][0] if svistats and svistats[0] else None
	if dev:
		for external_interface in external_interfaces:
			if dev == external_interface:
				if len(result) > 0:
					result.append(result[0])
					result[0] = external_interface
				else:
					result.append(external_interface)
				continue
			result.append(external_interface)
	return result if len(result) > 0 else external_interfaces

#Method to match reordered ExternalInterface with input ExternalInterface 
@udf(returnType= BooleanType())
def match_external_interface(external_interfaces, reordered_external_interfaces):
	return external_interfaces == reordered_external_interfaces

"""
Only Destination of type broadcast id required for calculting uplink by Static Route method.
This method eliminates the ip4routes for which destination is not of broadcast types.
"""

@udf(returnType= ArrayType(StructType([
	StructField("dst", StringType(), False),
	StructField("gw", StringType(), False)
])))
def get_filtered_ip4routes(ip4routes):
	ip4_routes = []
	for item in ip4routes:
		if item[0].startswith("0.0.0.0"):
			ip4_routes.append(item)
	return ip4_routes

#Method to calculate the uplink port by provding higher prefrence to Static Route
@udf(returnType = StringType())
def uplink_static_route_ipsec(external_interfaces, ip4route, svistats):
	for item in ip4route:
		dst = item[0]
		if dst.startswith("0.0.0.0"):
			subnet = ".".join(item[1].split(".")[0:3])
			for svistat in svistats:
				dev = svistat[0]
				is_phy = is_physical_port(dev)
				if is_phy:
					for ip in svistat[2]:
						if ip.startswith(subnet) and "." in dev:
							return dev.split(".")[0]

	for external_interface in external_interfaces:
		interface = external_interface.split(".")[0]
		check_external_interface = is_physical_port(interface)
		if check_external_interface:
			return external_interface.split(".")[0]
	
	return ""

#Method match the generated uplink by above two methods with uplink generated by strom topology(uplink_heuristic)
@udf(returnType = BooleanType())
def match_uplink(uplink_generated, uplink_heuristic):
    return uplink_generated.split(".")[0] == uplink_heuristic[0] 

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

In [30]:
basePath='s3://mist-secorapp-production/'

paths = ["s3://mist-secorapp-production/oc-stats-analytics/oc-stats-analytics-production/dt=2021-10-1*/"]

org_list = ['9777c1a0-6ef6-11e6-8bbf-02e208b2d34f',  
           '313cd174-b2e1-40cf-8908-b4ba11a7c85f',     
            'e98954f8-7833-43b7-b41a-3032937815d3',   
            '9291176a-6e1e-11e5-9cdd-02e208b2d34f']

df =spark.read.option("basePath",basePath).parquet(*paths).where(~ col('org_id').isin(org_list))

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

In [31]:
#Fetching SSR gateways for which uplink was calculated by heuristic logic(oc_stats_topology)
df_heuristic = df.where((col("device_type") == 2) & (col("model") != "SSR")).where(size(col("uplink_heuristic")) > 0)

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

In [32]:
df_heuristic = df_heuristic.select(col("mac"), col("ipsec"), col("ipv4_route"), col("svistats"), col("uplink_heuristic")).distinct()


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

In [33]:

df_heuristic = df_heuristic.withColumn("external_interface",get_external_interface(col("ipsec"))).withColumn("ip4routes_filtered", get_filtered_ip4routes(col("ipv4_route"))).withColumn("external_interface_reorder", external_interface_ordering(col("external_interface"),col("svistats"))).withColumn("is_equal_external_interface_external_interface_reorder", match_external_interface(col("external_interface"), col("external_interface_reorder"))).drop(col("ipsec")).drop(col("ipv4_route"))


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

In [34]:
#Showing the result for which external interface order got changed after reodering of IPSEC(Exyetnal Interface)
df_heuristic.select(col("mac"), col("external_interface"), col("external_interface_reorder"), col("is_equal_external_interface_external_interface_reorder")).where(col("is_equal_external_interface_external_interface_reorder")== False).distinct().show(truncate=False)

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

+------------+--------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------+------------------------------------------------------+
|mac         |external_interface                                                                                |external_interface_reorder                                                                        |is_equal_external_interface_external_interface_reorder|
+------------+--------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------+------------------------------------------------------+
|94bf94b65501|[ge-0/0/2, ge-0/0/2.0, ge-0/0/2.0, ge-0/0/2.0, ge-0/0/2.0, ge-0/0/2.0, ge-0/0/2.0, , ]            |[ge-0/0/2.0, ge-0/0/2, ge-0/0/2.0, ge-0/0/2.0, ge-0/0/2.0, ge-0/0/2.0, ge-0/0/2.0, 

In [35]:
df_heuristic = df_heuristic.withColumn("uplink_staticroute", uplink_static_route_ipsec(col("external_interface"), col("ip4routes_filtered"),col("svistats"))).withColumn("uplink_staticroute_reorder", uplink_static_route_ipsec(col("external_interface_reorder"), col("ip4routes_filtered"),col("svistats")))


df_heuristic.persist()


df_heuristic = df_heuristic.select(col("mac"), col("external_interface"), col("ip4routes_filtered"), col("svistats"), col("uplink_heuristic"), col("uplink_staticroute"), col("uplink_staticroute_reorder"))

df_heuristic = df_heuristic.withColumn("is_staicroute_match", match_uplink(col("uplink_staticroute"), col("uplink_heuristic"))).withColumn("is_staicroute_reoder_match", match_uplink(col("uplink_staticroute_reorder"), col("uplink_heuristic")))



FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

In [None]:
#Showing the result for which uplink detected by heuristic logic will get change after reordering of IPSEC(external interface)
df_heuristic.where((col("is_staicroute_match") == True) & (col("is_staicroute_reoder_match") == False)).distinct().show(truncate=False)

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

In [None]:
print("End")