# Event pipeline demo visualization

In [1]:
# Init
from multimno.core.spark_session import generate_spark_session
from multimno.core.configuration import parse_configuration
from multimno.core.data_objects.bronze.bronze_event_data_object import BronzeEventDataObject
from multimno.core.data_objects.silver.silver_event_data_object import SilverEventDataObject

config_path = "/opt/dev/tests/test_resources/testing_spark.ini"
spark = generate_spark_session(parse_configuration(config_path))

24/01/22 10:33:54 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable


# Visualize Synthetic Event data

In [2]:
bronze_events_path = "/opt/dev/sample_data/lakehouse/bronze/mno_events"
bdo = BronzeEventDataObject(spark, bronze_events_path)
bdo.read()
bdo.df.show(10)

+--------------------+-------------------+---+---------------+--------+---------+---------+----+-----+---+
|             user_id|          timestamp|mcc|        cell_id|latitude|longitude|loc_error|year|month|day|
+--------------------+-------------------+---+---------------+--------+---------+---------+----+-----+---+
|[59 57 59 31 4E 5...|2023-01-01T00:00:00|154|864271311346020|    null|     null|     null|2023|    1|  1|
|[59 57 59 31 4E 5...|2023-01-01T19:12:00|154|739270974630659|    null|     null|     null|2023|    1|  1|
|[59 32 51 77 4E 4...|2023-01-01T00:00:00|154|956403845718259|    null|     null|     null|2023|    1|  1|
|[59 32 51 77 4E 4...|2023-01-01T19:12:00|154|322967902442235|    null|     null|     null|2023|    1|  1|
|[59 32 51 79 4E 6...|2023-01-01T00:00:00|154|582985296058444|    null|     null|     null|2023|    1|  1|
|[59 32 51 79 4E 6...|2023-01-01T19:12:00|154|956101528121349|    null|     null|     null|2023|    1|  1|
|[59 57 59 31 4E 5...|2023-01-04T04:4

In [3]:
bdo.df.count()

15

## Visualize Event data

In [4]:
silver_path = "/opt/data/lakehouse/silver"
silver_events_path = f"{silver_path}/mno_events"
sdo = SilverEventDataObject(spark, silver_events_path)
sdo.read()
sdo.df.show(10)

+--------------------+-------------------+---+---------------+--------+---------+---------+----+-----+---+
|             user_id|          timestamp|mcc|        cell_id|latitude|longitude|loc_error|year|month|day|
+--------------------+-------------------+---+---------------+--------+---------+---------+----+-----+---+
|[59 32 51 77 4E 4...|2023-01-01 08:00:00|154|956403845718259|    null|     null|     null|2023|    1|  1|
|[59 32 51 79 4E 6...|2023-01-01 08:00:00|154|582985296058444|    null|     null|     null|2023|    1|  1|
|[59 57 59 31 4E 5...|2023-01-01 08:00:00|154|864271311346020|    null|     null|     null|2023|    1|  1|
|[59 32 51 77 4E 4...|2023-01-02 03:12:00|154|322967902442235|    null|     null|     null|2023|    1|  2|
|[59 32 51 79 4E 6...|2023-01-02 03:12:00|154|956101528121349|    null|     null|     null|2023|    1|  2|
|[59 57 59 31 4E 5...|2023-01-02 03:12:00|154|739270974630659|    null|     null|     null|2023|    1|  2|
|[59 32 51 77 4E 4...|2023-01-04 12:4

In [5]:
sdo.df.count()

11

## Event Quality checks

In [6]:
input_path = f"{silver_path}/event_syntactic_quality_metrics_by_column"

df = spark.read.format('parquet').load(input_path)
df.show(20, truncate=False)

+--------------------------+-----------------+---------------+---------+-------------+----------------------+-----+
|result_timestamp          |data_period_start|data_period_end|variable |type_of_error|type_of_transformation|value|
+--------------------------+-----------------+---------------+---------+-------------+----------------------+-----+
|2024-01-22 10:33:30.852916|2023-01-01       |2023-01-05     |timestamp|null         |1                     |11   |
|2024-01-22 10:33:30.852916|2023-01-01       |2023-01-05     |timestamp|2            |null                  |1    |
|2024-01-22 10:33:30.852916|2023-01-01       |2023-01-05     |timestamp|3            |null                  |0    |
|2024-01-22 10:33:30.852916|2023-01-01       |2023-01-05     |timestamp|9            |null                  |11   |
|2024-01-22 10:33:30.852916|2023-01-01       |2023-01-05     |null     |6            |null                  |0    |
|2024-01-22 10:33:30.852916|2023-01-01       |2023-01-05     |null     |

In [7]:
input_path = f"{silver_path}/event_syntactic_quality_metrics_frequency_distribution"

df = spark.read.format('parquet').load(input_path)
df.show()

+---------------+--------------------+-----------------+---------------+----------+
|        cell_id|             user_id|initial_frequency|final_frequency|      date|
+---------------+--------------------+-----------------+---------------+----------+
|322967902442235|[59 32 51 77 4E 4...|                1|              1|2023-01-01|
|956403845718259|[59 32 51 77 4E 4...|                1|              1|2023-01-01|
|739270974630659|[59 57 59 31 4E 5...|                1|              1|2023-01-01|
|864271311346020|[59 57 59 31 4E 5...|                1|              1|2023-01-01|
|956101528121349|[59 32 51 79 4E 6...|                1|              1|2023-01-01|
|582985296058444|[59 32 51 79 4E 6...|                1|              1|2023-01-01|
|543163741337240|[59 32 51 79 4E 6...|                1|              1|2023-01-02|
|725540903844898|[59 32 51 77 4E 4...|                1|              1|2023-01-02|
|700894158774042|[59 57 59 31 4E 5...|                1|              0|2023