In [42]:
from pyspark.sql.types import *
from pyspark.sql.functions import *


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

In [43]:
#Method to check the physical port
def is_physical_port(interface):
    return interface.startswith("ge-") or interface.startswith("xe-") or interface.startswith("re") or interface.startswith("et")


"""
Only External Iterface field is required to calculate the Uplink port so eliminating the other fields. 
This method eliminates the unrequired fields and returns list of external_interface
"""
@udf(returnType= ArrayType(StringType()))
def get_external_interface(ipsec):
	external_interfaces = []
	for item in ipsec:
		external_interfaces.append(item[22])
	return external_interfaces

"""
Only Destination of type broadcast id required for calculting uplink by Static Route method.
This method eliminates the ip4routes for which destination is not of broadcast types.
"""

@udf(returnType= ArrayType(StructType([
	StructField("dst", StringType(), False),
	StructField("gw", StringType(), False)
])))
def get_filtered_ip4routes(ip4routes):
	ip4_routes = []
	for item in ip4routes:
		if item[0].startswith("0.0.0.0"):
			ip4_routes.append(item)
	return ip4_routes


#Method to calculate the uplink port by provding higher prefrence to IPSEC
@udf(returnType = StringType())
def uplink_ipsec_static_route(external_interfaces, ip4route, svistats):
	for external_interface in external_interfaces:
		interface = external_interface.split(".")[0]
		check_external_interface = is_physical_port(interface)
		if check_external_interface:
			return external_interface.split(".")[0]

	for item in ip4route:
		dst = item[0]
		if dst.startswith("0.0.0.0"):
			subnet = ".".join(item[1].split(".")[0:3])
			for svistat in svistats:
				dev = svistat[0]
				is_phy = is_physical_port(dev)
				if is_phy:
					for ip in svistat[2]:
						if ip.startswith(subnet) and "." in dev:
							return dev.split(".")[0]
	return ""


#Method to calculate the uplink port by provding higher prefrence to Static Route
@udf(returnType = StringType())
def uplink_static_route_ipsec(external_interfaces, ip4route, svistats):
	for item in ip4route:
		dst = item[0]
		if dst.startswith("0.0.0.0"):
			subnet = ".".join(item[1].split(".")[0:3])
			for svistat in svistats:
				dev = svistat[0]
				is_phy = is_physical_port(dev)
				if is_phy:
					for ip in svistat[2]:
						if ip.startswith(subnet) and "." in dev:
							return dev.split(".")[0]

	for external_interface in external_interfaces:
		interface = external_interface.split(".")[0]
		check_external_interface = is_physical_port(interface)
		if check_external_interface:
			return external_interface.split(".")[0]
	
	return ""

#Method to calculate the uplink port by provding higher prefrence to Static Route with handling of st0
@udf(returnType = StringType())
def uplink_static_route_ipsec_st0_handling(external_interfaces, ip4route, svistats):
	if not svistats[0][0].startswith("st0"):
		for item in ip4route:
			dst = item[0]
			if dst.startswith("0.0.0.0"):
				subnet = ".".join(item[1].split(".")[0:3])
				for svistat in svistats:
					dev = svistat[0]
					is_phy = is_physical_port(dev)
					if is_phy:
						for ip in svistat[2]:
							if ip.startswith(subnet) and "." in dev:
								return dev.split(".")[0]
		for external_interface in external_interfaces:
			interface = external_interface.split(".")[0]
			check_external_interface = is_physical_port(interface)
			if check_external_interface:
				return external_interface.split(".")[0]

	else:							
		for external_interface in external_interfaces:
			interface = external_interface.split(".")[0]
			check_external_interface = is_physical_port(interface)
			if check_external_interface:
				return external_interface.split(".")[0]
	
	return ""

#Method match the generated uplink by above two methods with uplink generated by strom topology(uplink_heuristic)
@udf(returnType = BooleanType())
def match_uplink(uplink_generated, uplink_heuristic):
    return uplink_generated.split(".")[0] == uplink_heuristic[0] 


#Method to find the SXR devices in which traffic flows from st0
@udf(returnType = BooleanType())
def is_st0_interface(svistats):
    return svistats[0][0].startswith("st0")

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

In [44]:
basePath='s3://mist-secorapp-production/'

paths = ["s3://mist-secorapp-production/oc-stats-analytics/oc-stats-analytics-production/dt=2021-10-13/", "s3://mist-secorapp-production/oc-stats-analytics/oc-stats-analytics-production/dt=2021-10-12/hr=1*/", "s3://mist-secorapp-production/oc-stats-analytics/oc-stats-analytics-production/dt=2021-10-12/hr=2*/"]

org_list = ['9777c1a0-6ef6-11e6-8bbf-02e208b2d34f',  
           '313cd174-b2e1-40cf-8908-b4ba11a7c85f',     
            'e98954f8-7833-43b7-b41a-3032937815d3',   
            '9291176a-6e1e-11e5-9cdd-02e208b2d34f']

df =spark.read.option("basePath",basePath).parquet(*paths).where(~ col('org_id').isin(org_list))

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

In [45]:
df.where((col("device_type") == 2) & (col("model") != "SSR")).select(col("mac"), col("uplink_heuristic")).distinct().groupBy(col("mac")).agg(count(col("uplink_heuristic")).alias("count_record")).count()




FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

422

In [46]:
macs = df.where((col("device_type") == 2) & (col("model") != "SSR")).select(col("mac"), col("uplink_heuristic")).distinct().groupBy(col("mac")).agg(count(col("uplink_heuristic")).alias("count_record")).where(col("count_record") >  1).select(col("mac")).rdd.flatMap(lambda x: x).collect()


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

In [47]:
print("SRX Macs which have two heuristic uplink:", macs)

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

SRX Macs which have two heuristic uplink: ['4071836a8e00', '0c81262978c8', '4c96140c5d00', 'e8b6c2cdf1c0', '00c52c3cd7a4', '2c2131520480']

In [48]:
#Details of Uplink for Macs which have two heuristic uplink
df.where((col("device_type") == 2) & (col("model") != "SSR")).select(col("mac"), col("uplink_heuristic")).distinct().where(col("mac").isin(macs)).orderBy(col("mac")).show(1000, truncate=False)


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

+------------+----------------+
|mac         |uplink_heuristic|
+------------+----------------+
|00c52c3cd7a4|[ge-5/0/0]      |
|00c52c3cd7a4|[ge-0/0/0]      |
|0c81262978c8|[ge-0/0/0]      |
|0c81262978c8|[]              |
|2c2131520480|[ge-0/0/0]      |
|2c2131520480|[]              |
|4071836a8e00|[]              |
|4071836a8e00|[reth0]         |
|4071836a8e00|[reth2]         |
|4c96140c5d00|[ge-0/0/0]      |
|4c96140c5d00|[]              |
|e8b6c2cdf1c0|[ge-0/0/0]      |
|e8b6c2cdf1c0|[]              |
+------------+----------------+

In [49]:
df_srx_gateway = df.where((col("device_type") == 2) & (col("model") != "SSR")).where(size(col("uplink_heuristic")) > 0)


df_srx_gateway = df_srx_gateway.select(col("mac"), col("ipsec"), col("ipv4_route"), col("svistats"), col("uplink_heuristic")).distinct()


df_srx_gateway = df_srx_gateway.withColumn("external_interface",get_external_interface(col("ipsec"))).withColumn("ip4routes_filtered", get_filtered_ip4routes(col("ipv4_route"))).withColumn("is_starts_with_st0", is_st0_interface("svistats")).drop(col("ipsec")).drop(col("ipv4_route"))
#printSchema
df_srx_gateway.printSchema()

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

root
 |-- mac: string (nullable = true)
 |-- svistats: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- dev: string (nullable = true)
 |    |    |-- vlan: integer (nullable = true)
 |    |    |-- ips: array (nullable = true)
 |    |    |    |-- element: string (containsNull = true)
 |-- uplink_heuristic: array (nullable = true)
 |    |-- element: string (containsNull = true)
 |-- external_interface: array (nullable = true)
 |    |-- element: string (containsNull = true)
 |-- ip4routes_filtered: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- dst: string (nullable = false)
 |    |    |-- gw: string (nullable = false)
 |-- is_starts_with_st0: boolean (nullable = true)

In [50]:
total_macs_valid_heuristic = df_srx_gateway.select(col("mac"), col("uplink_heuristic")).distinct().groupBy(col("mac")).agg(count(col("uplink_heuristic")).alias("count_record")).count()

print("Total macs count for valid heuristic:", total_macs_valid_heuristic)


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

Total macs count for valid heuristic: 326

In [51]:
total_macs_valid_heuristic_duplicate = df_srx_gateway.where((col("device_type") == 2) & (col("model") != "SSR")).select(col("mac"), col("uplink_heuristic")).distinct().groupBy(col("mac")).agg(count(col("uplink_heuristic")).alias("count_record")).where(col("count_record") >  1).select(col("mac")).rdd.flatMap(lambda x: x).collect()

if total_macs_valid_heuristic_duplicate:
    print("Total macs count which have more than one record for valid heuristic:", len(total_macs_valid_heuristic_duplicate))
    print("Macs which have more than one record for valid heuristic:", total_macs_valid_heuristic_duplicate)
    df_srx_gateway.where(col("mac").isin(total_macs_valid_heuristic_duplicate)).distinct().orderBy(col("mac")).show(1000, truncate=False)


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

Total macs count which have more than one record for valid heuristic: 2
Macs which have more than one record for valid heuristic: ['4071836a8e00', '00c52c3cd7a4']
+------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------+------------------+----------------------------+------------------+
|mac         |svistats                                                                                                                                                                                                                                                     

In [52]:
df_srx_gateway = df_srx_gateway.withColumn("uplink_ipsec", uplink_ipsec_static_route(col("external_interface"), col("ip4routes_filtered"),col("svistats")))

df_srx_gateway = df_srx_gateway.withColumn("uplink_staticroute", uplink_static_route_ipsec(col("external_interface"), col("ip4routes_filtered"),col("svistats")))

df_srx_gateway = df_srx_gateway.withColumn("uplink_staticroute_st0", uplink_static_route_ipsec_st0_handling(col("external_interface"), col("ip4routes_filtered"),col("svistats")))

df_srx_gateway.persist()

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

DataFrame[mac: string, svistats: array<struct<dev:string,vlan:int,ips:array<string>>>, uplink_heuristic: array<string>, external_interface: array<string>, ip4routes_filtered: array<struct<dst:string,gw:string>>, is_starts_with_st0: boolean, uplink_ipsec: string, uplink_staticroute: string, uplink_staticroute_st0: string]

In [53]:
df_srx_gateway = df_srx_gateway.select(col("mac"), col("external_interface"), col("ip4routes_filtered"), col("svistats"), col("uplink_heuristic"), col("uplink_ipsec"), col("uplink_staticroute"), col("is_starts_with_st0"), col("uplink_staticroute_st0"))

df_srx_gateway = df_srx_gateway.withColumn("is_ipsec_match", match_uplink(col("uplink_ipsec"), col("uplink_heuristic"))).withColumn("is_staicroute_match", match_uplink(col("uplink_staticroute"), col("uplink_heuristic")))

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

In [54]:
#printSchema
df_srx_gateway.printSchema()

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

root
 |-- mac: string (nullable = true)
 |-- external_interface: array (nullable = true)
 |    |-- element: string (containsNull = true)
 |-- ip4routes_filtered: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- dst: string (nullable = false)
 |    |    |-- gw: string (nullable = false)
 |-- svistats: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- dev: string (nullable = true)
 |    |    |-- vlan: integer (nullable = true)
 |    |    |-- ips: array (nullable = true)
 |    |    |    |-- element: string (containsNull = true)
 |-- uplink_heuristic: array (nullable = true)
 |    |-- element: string (containsNull = true)
 |-- uplink_ipsec: string (nullable = true)
 |-- uplink_staticroute: string (nullable = true)
 |-- is_starts_with_st0: boolean (nullable = true)
 |-- uplink_staticroute_st0: string (nullable = true)
 |-- is_ipsec_match: boolean (nullable = true)
 |-- is_staicroute_match: boolean (nullable = true)

In [55]:
df_srx_gateway_swap = df_srx_gateway.where((col("is_staicroute_match") == False) & (col("is_ipsec_match") == True))

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

In [56]:
df_srx_gateway_swap.select(col("mac"), col("uplink_heuristic").alias("Uplink By Topology"), col("uplink_staticroute").alias("Uplink By provding higher priority to Staticroute"), col("uplink_ipsec").alias("Uplink By provding higher priority to Ipsec")).distinct().show(1000,truncate=False)


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

+---+------------------+-------------------------------------------------+-------------------------------------------+
|mac|Uplink By Topology|Uplink By provding higher priority to Staticroute|Uplink By provding higher priority to Ipsec|
+---+------------------+-------------------------------------------------+-------------------------------------------+
+---+------------------+-------------------------------------------------+-------------------------------------------+

In [57]:
mismatch_mac_count = df_srx_gateway_swap.select(col("mac")).distinct().count()
print("Total mac count for which uplink port swap happened:",  mismatch_mac_count)

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

Total mac count for which uplink port swap happened: 0

In [58]:
print("%swap happened:", mismatch_mac_count * 100/total_macs_valid_heuristic)

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

%swap happened: 0.0

In [59]:
df_mac = df.where((col("device_type") == 2) & (col("model") != "SSR")).where(size(col("uplink_heuristic")) > 0).select(col("mac").alias("mac_id"), col("site_id"), col("org_id")).distinct()


df_srx_gateway_swap_mac = df_srx_gateway_swap.select(col("mac"), col("uplink_heuristic"), col("uplink_ipsec"), col("uplink_staticroute")).distinct()


df_srx_gateway_swap_mac.join(df_mac, df_srx_gateway_swap_mac["mac"] == df_mac["mac_id"], "left_outer").drop(col("mac_id")).select(col("mac"), col("uplink_heuristic").alias("Uplink By Topology"), col("uplink_staticroute").alias("Uplink By provding higher priority to Staticroute"), col("uplink_ipsec").alias("Uplink By provding higher priority to Ipsec"), col("site_id"), col("org_id")).show(1000,truncate=False)





FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

+---+------------------+-------------------------------------------------+-------------------------------------------+-------+------+
|mac|Uplink By Topology|Uplink By provding higher priority to Staticroute|Uplink By provding higher priority to Ipsec|site_id|org_id|
+---+------------------+-------------------------------------------------+-------------------------------------------+-------+------+
+---+------------------+-------------------------------------------------+-------------------------------------------+-------+------+

In [60]:
#Cases where uplink calculated by providing higher preference to static route is different from it gets calculated by providing higher preference to IPSec
#It contains both the cases:
#1. Cases which are correct by giving the higher priority to Static route over Ipsec
#2. Case where traffic passes through st0.x interfaces. Static route provides incorrect uplink interface.  
df_srx_gateway.select(col("mac"), col("external_interface"), col("ip4routes_filtered"), col("svistats"), col("uplink_heuristic"), col("uplink_ipsec"), col("uplink_staticroute"), col("uplink_staticroute_st0")).where(col("uplink_ipsec") != col("uplink_staticroute")).where(size(col("ip4routes_filtered")) > 0 ).distinct().show(100, truncate=False)

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

+------------+-------------------------------------------------------------------+-----------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

In [61]:
#Uplink calculated by providing higher preference to Ipsec does not match with uplink Heuristice
#Summarize details of above step
df_srx_gateway.where((col("is_staicroute_match") == True) & (col("is_ipsec_match") == False)).select(col("mac"), col("uplink_heuristic").alias("Uplink By Topology"), col("uplink_staticroute").alias("Uplink By provding higher priority to Staticroute"), col("uplink_ipsec").alias("Uplink By provding higher priority to Ipsec"), col("uplink_staticroute_st0").alias("Uplink By calculating by Ipsec for st0.x interfaces")).distinct().show(1000,truncate=False)


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

+------------+------------------+-------------------------------------------------+-------------------------------------------+---------------------------------------------------+
|mac         |Uplink By Topology|Uplink By provding higher priority to Staticroute|Uplink By provding higher priority to Ipsec|Uplink By calculating by Ipsec for st0.x interfaces|
+------------+------------------+-------------------------------------------------+-------------------------------------------+---------------------------------------------------+
|045c6c1722d9|[reth2]           |reth2                                            |reth0                                      |reth2                                              |
|94f7ad24d960|[ge-0/0/2]        |ge-0/0/2                                         |ge-0/0/5                                   |ge-0/0/5                                           |
|4c6d5824a310|[ge-0/0/14]       |ge-0/0/14                                        |ge-0/0/15        

In [62]:
#Case for which traffic passes through st0.x and the uplink calculated by providing higher preference to static route calculates incorrect uplink
#Case which will be handled by https://mistsys.atlassian.net/browse/MIST-52729
df_srx_gateway.where((col("is_staicroute_match") == True) & (col("is_ipsec_match") == False) & (col("is_starts_with_st0") == True)).select(col("mac"), col("uplink_heuristic").alias("Uplink By Topology"), col("uplink_staticroute").alias("Uplink By provding higher priority to Staticroute"), col("uplink_ipsec").alias("Uplink By provding higher priority to Ipsec"), col("uplink_staticroute_st0").alias("Uplink By calculating by Ipsec for st0.x interfaces")).distinct().show(1000,truncate=False)

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

+------------+------------------+-------------------------------------------------+-------------------------------------------+---------------------------------------------------+
|mac         |Uplink By Topology|Uplink By provding higher priority to Staticroute|Uplink By provding higher priority to Ipsec|Uplink By calculating by Ipsec for st0.x interfaces|
+------------+------------------+-------------------------------------------------+-------------------------------------------+---------------------------------------------------+
|94f7ad24d960|[ge-0/0/2]        |ge-0/0/2                                         |ge-0/0/5                                   |ge-0/0/5                                           |
|4c6d5824a310|[ge-0/0/14]       |ge-0/0/14                                        |ge-0/0/15                                  |ge-0/0/15                                          |
+------------+------------------+-------------------------------------------------+-----------------

In [63]:
#To check the uplink difference by correcting the uplink for which traffic passes through st0.x
#To check whether the fix will leads to impact current implementation. It seems the fix only applies to incorrect uplink detected by static route.
df_srx_gateway.where(col("uplink_staticroute") != col("uplink_staticroute_st0")).select(col("mac"),col("external_interface"), col("svistats"),col("uplink_heuristic"), col("uplink_staticroute"), col("uplink_staticroute_st0")).distinct().show(1000,truncate=False)

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

+------------+-----------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

In [64]:
print("End")

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

End