# Data enginner challenge


In [121]:
import pyspark

spark = pyspark.sql.SparkSession.builder.appName("inconsistences_report").getOrCreate()
CURRENT_DAY= '2024-12-03'

In [122]:
import pyspark.sql.functions as spark_f

def flatten_column(df,column):
    sub_columns = df.select(f"{column}.*").columns
    for sub_column in sub_columns:
        df = df.withColumn(f"{column}_{sub_column}", spark_f.col(f"{column}.{sub_column}"))
    return df.drop(column)

def histogram(df, column,title):
    import plotly.express as px
    fig = px.histogram(
        df,
        x=column,
        title=title,
    )
    fig.show()

def pie_chart(df, column, title, category_orders=None):
    import plotly.express as px
    df = df.groupby(column).count().rename(columns={df.columns[0]: "count"}).reset_index().sort_values(column)
    # print(df.head())
    fig = px.pie(
        df,
        names=column,
        values="count",
        category_orders = category_orders,
        title=title,
    )
    # fig.update_xaxes(categoryorder="category ascending")
    fig.show()

In [123]:
raw_allowance_backend_table = spark.read.csv("../data/allowance_backend_table.csv", header=True)
raw_allowance_events = spark.read.json("../data/allowance_events.json", multiLine = True)
raw_payments_schedule_backend_table = spark.read.csv("../data/payments_schedule_backend_table.csv", header=True)

### cleaned allowance events

In [124]:
allowance_events = flatten_column(raw_allowance_events, "allowance")
allowance_events = flatten_column(allowance_events, "allowance_scheduled")
allowance_events = flatten_column(allowance_events, "event")
allowance_events = flatten_column(allowance_events, "user")

print(allowance_events.dtypes)
cleaned_events = allowance_events.withColumn(
    "event_timestamp", allowance_events.event_timestamp.cast("timestamp")
).alias("cleaned_events")

cleaned_events = cleaned_events.dropDuplicates()
cleaned_events.show()


# payments_schedule_backend_table.show()

[('allowance_amount', 'bigint'), ('allowance_scheduled_day', 'string'), ('allowance_scheduled_frequency', 'string'), ('event_name', 'string'), ('event_timestamp', 'string'), ('user_id', 'string')]
+----------------+-----------------------+-----------------------------+-----------------+-------------------+--------------------+
|allowance_amount|allowance_scheduled_day|allowance_scheduled_frequency|       event_name|    event_timestamp|             user_id|
+----------------+-----------------------+-----------------------------+-----------------+-------------------+--------------------+
|              20|               saturday|                       weekly|allowance.created|2024-11-28 07:10:16|38f8d838-ea08-4fd...|
|               5|          fifteenth_day|                      monthly|allowance.created|2024-09-05 13:30:38|9748cdad-69a4-400...|
|               5|               saturday|                       weekly|allowance.created|2024-10-21 17:27:50|8252c070-2698-49d...|
|          

### Cleaned allowance bacckend table

doing some cleaning and selecting only the enable allowances to build the `cleaned_backend_table`

In [125]:
allowance_backend_table = raw_allowance_backend_table.withColumnRenamed(
    "uuid", "user_id"
)
# to see all possible values of column status
allowance_backend_table.select("status").dropDuplicates().show()

# get only the enabled rows
allowance_backend_table = allowance_backend_table.filter(
    allowance_backend_table.status == "enabled"
)
allowance_backend_table = allowance_backend_table.withColumn(
    "corrected_updated_at",
    spark_f.when(
        spark_f.rlike(
            allowance_backend_table.updated_at,
            spark_f.lit(r"\d+-\d+-\d+T\d+:\d+:\d+.\d+Z"),
        ),
        spark_f.to_timestamp(allowance_backend_table.updated_at),
    )
    .otherwise(spark_f.from_unixtime(allowance_backend_table.updated_at))
    .cast("timestamp"),
)
allowance_backend_table = allowance_backend_table.withColumn(
    "creation_date",
    spark_f.from_unixtime(allowance_backend_table.creation_date).cast("timestamp"),
)


cleaned_backend_table = allowance_backend_table.drop("updated_at").withColumnRenamed(
    "corrected_updated_at", "updated_at"
).alias("cleaned_backend_table")
print(cleaned_backend_table.dtypes)
cleaned_backend_table.show(truncate=False)

+--------+
|  status|
+--------+
| enabled|
|disabled|
+--------+

[('user_id', 'string'), ('creation_date', 'timestamp'), ('frequency', 'string'), ('day', 'string'), ('next_payment_day', 'string'), ('status', 'string'), ('updated_at', 'timestamp')]
+------------------------------------+-------------------+---------+-------------+----------------+-------+--------------------------+
|user_id                             |creation_date      |frequency|day          |next_payment_day|status |updated_at                |
+------------------------------------+-------------------+---------+-------------+----------------+-------+--------------------------+
|30f4e25e-3e37-462e-8c3c-42f24f54350f|2024-08-28 06:51:49|monthly  |fifteenth_day|15              |enabled|2024-10-15 05:00:41.445627|
|6da398ad-079d-49b9-8668-6d7ce4d22683|2024-08-26 05:10:29|monthly  |fifteenth_day|15              |enabled|2024-08-26 05:10:29       |
|2d30fe2d-6c32-4b8a-a19b-906184f64f62|2024-11-11 04:12:39|monthly  |fifteen

## truth backend table

Building the allowance_backend_table from the events to compare with the actual allowance_backend_table:

In [126]:
# We have events of creation and edition, so we need to get the most recent event for each user to get
# a snapshot of what the table allowance_backend_table should be
from pyspark.sql.window import Window

# user_window = Window.partitionBy("user_id").orderBy(spark_f.desc("event_timestamp"))

# cleaned_events.show()
last_event_by_user = (
    cleaned_events.groupBy("user_id").agg(
        spark_f.max("event_timestamp").alias("event_timestamp")
    )
).alias("last_event_by_user")

truth_backend_table = last_event_by_user.join(
    cleaned_events, on=["user_id", "event_timestamp"]
).selectExpr("cleaned_events.*","event_name as last_event_name")

###QUALITY TEST to see if we can have more than one created event per user
# If we have some user with more than one created event, the allowance_backend_table could have more than one row
# per user and this code to generate the truth table would not have the real truth (only the most 
# recent created or updated allowance)
more_than_one_allowance = (
    cleaned_events.groupBy("user_id", "event_name")
    .count()
    .filter("count > 1 and event_name = 'allowance.created'")
    .count()
)
assert (
    more_than_one_allowance == 0
), f"we have {more_than_one_allowance} users with more than one allowance.created event"


###QUALITY TEST to see if we have more than one event per user
duplicated_users = (
    truth_backend_table.groupBy("user_id").count().filter("count > 1").count()
)
assert (
    duplicated_users == 0
), f"we have {duplicated_users} users with more than one line in truth_backend_table"

# cleaned_events.filter("user_id = 'ea7a6ea8-ff78-4a5b-848b-9f532a7a653c'").show(truncate=False)
# cleaned_events.filter("user_id = 'e7fc3804-fb1e-416e-8788-3dd0256a7d54'").show(truncate=False)
truth_backend_table = truth_backend_table.withColumnRenamed("allowance_scheduled_frequency", "frequency")
truth_backend_table = truth_backend_table.withColumnRenamed("allowance_scheduled_day", "day")
truth_backend_table = truth_backend_table.withColumnRenamed("allowance_amount", "amount")
truth_backend_table = truth_backend_table.alias("truth_backend_table")

truth_backend_table.show()


+--------------------+-------------------+------+-------------+---------+-----------------+-----------------+
|             user_id|    event_timestamp|amount|          day|frequency|       event_name|  last_event_name|
+--------------------+-------------------+------+-------------+---------+-----------------+-----------------+
|38f8d838-ea08-4fd...|2024-11-28 07:10:16|    20|     saturday|   weekly|allowance.created|allowance.created|
|113d8189-3ce0-47b...|2024-09-21 17:02:11|    20|    first_day|  monthly| allowance.edited| allowance.edited|
|8c212fdf-895b-4d3...|2024-08-10 07:21:42|    15|       sunday|   weekly| allowance.edited| allowance.edited|
|138a678d-b0eb-46a...|2024-09-07 08:14:12|     4|       monday| biweekly|allowance.created|allowance.created|
|03bc2115-7d48-4be...|2024-08-08 08:12:00|    10|       friday|   weekly| allowance.edited| allowance.edited|
|e48bb548-a87a-406...|2024-11-10 07:28:22|     5|    first_day|  monthly| allowance.edited| allowance.edited|
|9af3c140-

# Analysing  discrepancies between the events and the backend table 

There are some allowances that has no events in the events table, as show bellow:

In [127]:
### Aconding to the events tables it should not be possible for a user to have more than one allowance
more_than_one_allowances = cleaned_backend_table.groupBy('user_id').count().filter('count > 1').count()
assert more_than_one_allowances == 0 

comparisson_backend_table = cleaned_backend_table.join(truth_backend_table, on="user_id", how="left")

### This lines of the allowance_backend_table should not exist because there are no events to support them
# on the events table
comparisson_backend_table.filter("truth_backend_table.user_id is null").select('cleaned_backend_table.*').show()

+--------------------+-------------------+---------+---------+----------------+-------+--------------------+
|             user_id|      creation_date|frequency|      day|next_payment_day| status|          updated_at|
+--------------------+-------------------+---------+---------+----------------+-------+--------------------+
|bb950bcb-0760-417...|2024-09-20 23:53:12|   weekly|   friday|              27|enabled| 2024-09-20 23:53:12|
|59a14e2a-27a1-4cb...|2024-11-23 18:50:05|  monthly|first_day|               1|enabled|2024-12-01 05:00:...|
|a616b4c6-482d-4e5...|2024-11-23 18:49:36|  monthly|first_day|               1|enabled|2024-12-01 05:01:...|
|a6ca6993-3b73-4a8...|2024-11-23 18:49:28|  monthly|first_day|               1|enabled|2024-12-01 05:01:...|
|cd2d5904-7ca0-4d0...|2024-11-23 18:49:47|  monthly|first_day|               1|enabled| 2024-11-23 18:49:47|
+--------------------+-------------------+---------+---------+----------------+-------+--------------------+



As shown below, there are allowances with different values for the `day` and `frequency` columns compared to the values in the last event for those allowances.

In [128]:
print("Users with discrepancies in the day column:")
comparisson_backend_table.filter("truth_backend_table.day != cleaned_backend_table.day").selectExpr("user_id","cleaned_backend_table.day","truth_backend_table.day as truth_day").show(truncate =False)

print("Users with discrepancies in the frequency column:")
comparisson_backend_table.filter("truth_backend_table.frequency != cleaned_backend_table.frequency").selectExpr("user_id","cleaned_backend_table.frequency", "truth_backend_table.frequency as truth_frequency").show(truncate =False)



Users with discrepancies in the day column:
+------------------------------------+---------+---------+
|user_id                             |day      |truth_day|
+------------------------------------+---------+---------+
|308b2b9c-d49d-4b2d-947c-5b2370da090f|first_day|sunday   |
+------------------------------------+---------+---------+

Users with discrepancies in the frequency column:
+------------------------------------+---------+---------------+
|user_id                             |frequency|truth_frequency|
+------------------------------------+---------+---------------+
|308b2b9c-d49d-4b2d-947c-5b2370da090f|monthly  |weekly         |
+------------------------------------+---------+---------------+



In [129]:
def build_backend_truth_table(frequency, truth_function):
    return (
        comparisson_backend_table.filter(f"truth_backend_table.frequency = '{frequency}'")
        .withColumn(
            "truth_next_payment_day",
            truth_function(
                "truth_backend_table.day", "truth_backend_table.event_timestamp"
            ),
        )
        .selectExpr(
            "user_id",
            "cleaned_backend_table.frequency",
            "cleaned_backend_table.day",
            "cleaned_backend_table.creation_date",
            "cleaned_backend_table.updated_at",
            "cleaned_backend_table.next_payment_day",
            "last_event_name",
            "event_timestamp as last_event_timestamp",
            "truth_next_payment_day",
        )
        .withColumn(
            "correct",
            spark_f.when(
                spark_f.col("truth_next_payment_day") == spark_f.col("next_payment_day"),
                spark_f.lit("correct"),
            ).otherwise(spark_f.lit("wrong")),
        )
        .withColumn(
            "diference_next_payment_day",
            spark_f.col("next_payment_day") - spark_f.col("truth_next_payment_day"),
        )
    )

#### Daily allowancces

In [130]:
## daily discrepancies


def next_payment_day_daily_frequency(day, event_timestamp, current_day = None):
    if current_day is None:
        return spark_f.day(spark_f.date_add(spark_f.lit(CURRENT_DAY), 1))
    return spark_f.day(spark_f.date_add(spark_f.col(current_day), 1))


backend_table_daily_truth = build_backend_truth_table("daily", next_payment_day_daily_frequency)

pd_comparisson_backend = backend_table_daily_truth.toPandas()
histogram(pd_comparisson_backend, "correct", "Correctness of the next_payment_day for daily frequency")
pie_chart(pd_comparisson_backend, "correct", "proportion", category_orders={"correct": ["correct", "wrong"]})

#### monthly

In [131]:
comparisson_backend_table.filter("truth_backend_table.frequency = 'monthly'").select("cleaned_backend_table.day").drop_duplicates().show()

+-------------+
|          day|
+-------------+
|fifteenth_day|
|    first_day|
+-------------+



In [132]:
# comparisson_backend_table.show()


def next_payment_day_montlhy_frequency(day_column, event_timestamp):
    # current_day = spark_f.day(spark_f.to_date(spark_f.lit(CURRENT_DAY)))

    day_column = (
        spark_f.when(spark_f.col(day_column) == "fifteenth_day", spark_f.lit(15))
        .when(spark_f.col(day_column) == "first_day", spark_f.lit(1))
        .otherwise(spark_f.lit(None))
    )

    return day_column


backend_table_monthly_truth = build_backend_truth_table(
    "monthly", next_payment_day_montlhy_frequency
)

comparisson_backend_table.filter("truth_backend_table.frequency = 'monthly'").show()
pd_comparisson_backend = backend_table_monthly_truth.toPandas()

histogram(
    pd_comparisson_backend,
    "correct",
    "Correctness of the next_payment_day for monthly frequency",
)

pie_chart(
    pd_comparisson_backend,
    "correct",
    "proportion",
    category_orders={"correct": ["correct", "wrong"]},
)

+--------------------+-------------------+---------+-------------+----------------+-------+--------------------+-------------------+------+-------------+---------+-----------------+-----------------+
|             user_id|      creation_date|frequency|          day|next_payment_day| status|          updated_at|    event_timestamp|amount|          day|frequency|       event_name|  last_event_name|
+--------------------+-------------------+---------+-------------+----------------+-------+--------------------+-------------------+------+-------------+---------+-----------------+-----------------+
|0e4ede31-e71b-4c8...|2024-11-13 23:15:30|  monthly|    first_day|               1|enabled|2024-12-01 05:00:...|2024-11-13 18:15:30|    40|    first_day|  monthly|allowance.created|allowance.created|
|e7f3a4c3-443d-476...|2024-11-17 13:49:44|  monthly|    first_day|               1|enabled|2024-12-01 05:01:...|2024-11-17 08:49:44|     8|    first_day|  monthly|allowance.created|allowance.created|


### weekly

In [133]:
comparisson_backend_table.filter("truth_backend_table.frequency = 'weekly'").select("truth_backend_table.day").dropDuplicates().show()

+---------+
|      day|
+---------+
|   sunday|
| thursday|
|   monday|
|wednesday|
|   friday|
| saturday|
|  tuesday|
+---------+



In [134]:
# comparisson_backend_table.show()
def int_day_of_week(column):
    DAYS_OF_WEEK = [
        "sunday",
        "monday",
        "tuesday",
        "wednesday",
        "thursday",
        "friday",
        "saturday",
    ]

    day_of_week = spark_f.when(
        spark_f.col(column).isin(DAYS_OF_WEEK),
        spark_f.expr(f"array_position(array({','.join([ f"'{day}'" for day in DAYS_OF_WEEK])}), {column})")
    ).otherwise(spark_f.lit(None)).cast("int")
    return day_of_week

def next_day_of_week(timestamp,day_of_week):
    
    dif = day_of_week - spark_f.dayofweek(timestamp)
    next_day = spark_f.when(dif > 0, spark_f.date_add(timestamp,dif)).otherwise(spark_f.date_add(timestamp ,dif+7))
    return next_day

def next_payment_day_weekly_frequency(day_column, event_timestamp, current_day = None):
    if current_day is None:
        current_day = spark_f.to_date(spark_f.lit(CURRENT_DAY))
    else:
        current_day = spark_f.to_date(current_day)
    day_of_week = int_day_of_week(day_column)
    
    next_day = next_day_of_week(current_day,day_of_week)
    return spark_f.day(next_day)


backend_table_weekly_truth = build_backend_truth_table("weekly", next_payment_day_weekly_frequency)
# comparisson_backend_table.filter("truth_backend_table.frequency = 'weekly'").show()
pd_comparisson_backend = backend_table_weekly_truth.toPandas()

histogram(
    pd_comparisson_backend,
    "correct",
    "Correctness of the next_payment_day for monthly frequency",
)

pie_chart(pd_comparisson_backend, "correct", "proportion",category_orders={"correct": ["correct", "wrong"]})

### biweekly

In [135]:
def next_payment_day_biweekly_frequency(day_column, last_event_date_column,current_day=None):
    if current_day is None:
        current_day = spark_f.to_date(spark_f.lit(CURRENT_DAY))
    else:
        current_day = spark_f.to_date(spark_f.col(current_day))
    day_of_week = int_day_of_week(day_column)
    current_day_of_week = spark_f.dayofweek(current_day)

    
    ## I'm considering that the first week of the payment is the first occurence of the day
    #  after or during the last edition day of the allowance
    first_payment_day = next_day_of_week(last_event_date_column, day_of_week)

    dif_days = spark_f.date_diff(current_day, first_payment_day)  
    
    next_biweek_day = spark_f.date_add(current_day,14 - (dif_days % (7*2)))


    next_day = spark_f.when(
        first_payment_day > current_day, first_payment_day
    ).otherwise(next_biweek_day)

    return spark_f.day(next_day)


backend_table_biweekly_truth = build_backend_truth_table("biweekly", next_payment_day_biweekly_frequency)
# comparisson_backend_table.filter("truth_backend_table.frequency = 'weekly'").show()
pd_comparisson_backend = backend_table_biweekly_truth.toPandas()

histogram(
    pd_comparisson_backend,
    "correct",
    "Correctness of the next_payment_day for monthly frequency",
)

pie_chart(pd_comparisson_backend, "correct", "proportion",category_orders={"correct": ["correct", "wrong"]})

Among all possible frequencies, only the `monthly` frequency has no errors. This is likely because the `monthly` frequency does not require updates to change the value of the `next_payment_day` column. It remains the same unless the user edits the schedule day. **All frequencies that require updates to the `next_payment_day` have errors.**

Considering that the longest possible period for an allowance is monthly, it is expected that no allowance has an `updated_at` date older than 30 days. However, there are some allowances with an `updated_at` date older than 30 days, as shown in the next graphic. It is possible for an allowance to have an `updated_at` date older than 30 days if the user re-enables an allowance that was previously disabled. Unfortunately, I do not have information about the specific times when allowances were enabled or disabled to confirm this hypothesis.

In [136]:
all_backend_table_truth = (
    backend_table_daily_truth.unionByName(backend_table_monthly_truth)
    .unionByName(backend_table_biweekly_truth)
    .unionByName(backend_table_weekly_truth)
)
all_backend_table_truth= all_backend_table_truth.withColumn("how_old_was_last_update",spark_f.date_diff(spark_f.to_timestamp(spark_f.lit(CURRENT_DAY)),spark_f.col("updated_at")))

histogram(
    all_backend_table_truth.toPandas(),
    "how_old_was_last_update",
    "Time in days from last update",
)

As observed in the following graphs, the distribution of errors for allowances that were edited is similar to those that were never edited. This indicates that the issue is not related to the edit action of an allowance.

In [137]:
to_graph=all_backend_table_truth.filter("last_event_name like '%created'")
histogram(to_graph.toPandas(), "correct", "correctness")

to_graph=all_backend_table_truth.filter("last_event_name like '%edited'")
histogram(to_graph.toPandas(), "correct", "correctness")

In the daily frequency, 86.4% of the allowances have the `next_payment_day` equal to `updated_at + 1 day`. Considering this and the fact that almost every daily allowance has an incorrect `next_payment_day` when using the current date of `2024-11-03`, it suggests that the backend job responsible for updating the `next_payment_day` column for the daily frequency is either not running every day or is encountering issues. This can be observed in the following graph.

In [138]:
# all_backend_table_truth.filter("correct = 'correct' and day = 'daily'").show()
test_next_update_daily = all_backend_table_truth.withColumn(
    "next_day_from_update", next_payment_day_daily_frequency(None,None,"updated_at")
).filter(
    "frequency = 'daily'"
)  # .show()
test_next_update_daily = test_next_update_daily.withColumn(
    "update_correct",
    spark_f.when(
        spark_f.col("next_day_from_update") == spark_f.col("next_payment_day"),
        spark_f.lit("correct"),
    ).otherwise(spark_f.lit("wrong")),
)

# test_next_update.show()

pie_chart(
    test_next_update_daily.toPandas(),
    "update_correct",
    "proportion",
    category_orders={"update_correct": ["correct", "wrong"]},
)
# all_backend_table_truth.filter("date(updated_at) = '2024-11-03'").show()

The majority of the discrepancies in the `next_payment_day` column, when compared to the `next_day_from_update`, show a difference of 1 day. This suggests that there may have been instances of double updating, where the update process increments the `next_payment_day` without considering the current day.

In [139]:
test_next_update_errors = test_next_update_daily.filter(
    "update_correct = 'wrong'"
).withColumn(
    "next_day_to_next_day_from_update_dif",
    test_next_update_daily.next_payment_day - test_next_update_daily.next_day_from_update,
).select(
    "user_id",
    "frequency",
    "updated_at",
    "next_day_from_update",
    "next_payment_day",
    "next_day_to_next_day_from_update_dif",
)
test_next_update_errors.show()

+--------------------+---------+--------------------+--------------------+----------------+------------------------------------+
|             user_id|frequency|          updated_at|next_day_from_update|next_payment_day|next_day_to_next_day_from_update_dif|
+--------------------+---------+--------------------+--------------------+----------------+------------------------------------+
|ddcfef7f-f431-4d2...|    daily|2024-09-11 10:51:...|                  12|               4|                                -8.0|
|9be9e091-4f3a-4a6...|    daily|2024-10-15 22:26:...|                  16|              17|                                 1.0|
|e792e7d1-67ae-444...|    daily| 2024-11-25 21:18:00|                  26|              27|                                 1.0|
|7f1f0ab3-4eb3-418...|    daily|2024-10-06 21:44:...|                   7|               8|                                 1.0|
|337875a6-6c2b-49a...|    daily| 2024-11-25 21:18:13|                  26|              27|      

In the **weekly** frequency, almost all allowances have a correct `next_payment_day` if we consider the current day as the `updated_at` date. This is similar to what happened with the daily frequency. Therefore, it indicates that the job responsible for updating the `next_payment_day` is either not running daily or is encountering issues.

In [140]:
test_next_update_weekly = all_backend_table_truth.filter(
    "frequency = 'weekly'"
).withColumn(
    "next_day_from_update", next_payment_day_weekly_frequency("day", None, "updated_at")
)

test_next_update_weekly = test_next_update_weekly.withColumn(
    "update_correct",
    spark_f.when(
        spark_f.col("next_day_from_update") == spark_f.col("next_payment_day"),
        "correct",
    ).otherwise("wrong"),
)


pie_chart(
    test_next_update_weekly.toPandas(),
    "update_correct",
    "proportion",
    category_orders={"update_correct": ["correct", "wrong"]},
)

In [141]:
test_next_update_weekly.filter(
    "update_correct = 'wrong'"
).withColumn(
    "next_day_to_next_day_from_update_dif",
    test_next_update_weekly.next_payment_day - test_next_update_weekly.next_day_from_update,
).select(
    "user_id",
    "frequency",
    "updated_at",
    "next_day_from_update",
    "next_payment_day",
    "next_day_to_next_day_from_update_dif",
    "*"
).orderBy("next_day_to_next_day_from_update_dif").show()

+--------------------+---------+--------------------+--------------------+----------------+------------------------------------+--------------------+---------+---------+-------------------+--------------------+----------------+-----------------+--------------------+----------------------+-------+--------------------------+-----------------------+--------------------+--------------+------------------------------------+
|             user_id|frequency|          updated_at|next_day_from_update|next_payment_day|next_day_to_next_day_from_update_dif|             user_id|frequency|      day|      creation_date|          updated_at|next_payment_day|  last_event_name|last_event_timestamp|truth_next_payment_day|correct|diference_next_payment_day|how_old_was_last_update|next_day_from_update|update_correct|next_day_to_next_day_from_update_dif|
+--------------------+---------+--------------------+--------------------+----------------+------------------------------------+--------------------+-------

### Conclusion

There appears to be a scheduling issue as the update process is not occurring daily. Additionally, there are some anomalies that might be due to the job not being idempotent and running multiple times on certain days.