# How to find First and Last day of week

In [1]:
import os
os.getcwd()
os.chdir("H:\pyspark_advanced-coding_interview")
os.getcwd()

'H:\\pyspark_advanced-coding_interview'

# Spark SQL

In [2]:
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, IntegerType, DateType
from datetime import datetime

# Initialize Spark Session
spark = SparkSession.builder.appName("FirstLastDayOfWeek").getOrCreate()

# Define schema using StructType and StructField
schema = StructType([
    StructField("RecordID", IntegerType(), True),
    StructField("Date", DateType(), True)
])

# Sample data with complex dates
data = [
    (1, datetime(2024, 10, 1)),
    (2, datetime(2024, 10, 2)),
    (3, datetime(2024, 10, 3)),
    (4, datetime(2024, 10, 5)),
    (5, datetime(2024, 10, 6)),
    (6, datetime(2024, 10, 7)),
    (7, datetime(2024, 10, 8)),
    (8, datetime(2024, 10, 10)),
    (9, datetime(2024, 10, 12)),
    (10, datetime(2024, 10, 14)),
    (11, datetime(2024, 10, 15)),
    (12, datetime(2024, 10, 18)),
    (13, datetime(2024, 10, 20)),
    (14, datetime(2024, 10, 21)),
    (15, datetime(2024, 10, 22))
]

# Create DataFrame
df = spark.createDataFrame(data, schema)
df.show(truncate=False)


+--------+----------+
|RecordID|Date      |
+--------+----------+
|1       |2024-10-01|
|2       |2024-10-02|
|3       |2024-10-03|
|4       |2024-10-05|
|5       |2024-10-06|
|6       |2024-10-07|
|7       |2024-10-08|
|8       |2024-10-10|
|9       |2024-10-12|
|10      |2024-10-14|
|11      |2024-10-15|
|12      |2024-10-18|
|13      |2024-10-20|
|14      |2024-10-21|
|15      |2024-10-22|
+--------+----------+



In [3]:
# Register the DataFrame as a temporary table
df.createOrReplaceTempView("Dates")


In [10]:
# Method 1 Execution
query1 = spark.sql("""
SELECT 
    Date, 
    date_trunc('WEEK', Date) AS FirstDayOfWeek,
    date_add(date_trunc('WEEK', Date), 6) AS LastDayOfWeek
FROM Dates
""")
query1.show(truncate=False)

# Method 2 Execution
query2 = spark.sql("""
SELECT 
    Date, 
    date_sub(next_day(Date, 'Sunday'), 6) AS FirstDayOfWeek,
    next_day(Date, 'Sunday') AS LastDayOfWeek
FROM Dates
""")
query2.show(truncate=False)

# Method 3 Execution
query3 = spark.sql("""
SELECT 
    Date,
    date_sub(Date, dayofweek(Date) - 2) AS FirstDayOfWeek,
    date_add(Date, 8 - dayofweek(Date)) AS LastDayOfWeek
FROM Dates
""")
query3.show(truncate=False)

# Returns the next Sunday after the given date, which will be treated as the end of the week.
# Returns an integer (1 for Sunday, 2 for Monday, etc.)
# Returns the first day of the week (Monday) for the specified date


+----------+-------------------+-------------+
|Date      |FirstDayOfWeek     |LastDayOfWeek|
+----------+-------------------+-------------+
|2024-10-01|2024-09-30 00:00:00|2024-10-06   |
|2024-10-02|2024-09-30 00:00:00|2024-10-06   |
|2024-10-03|2024-09-30 00:00:00|2024-10-06   |
|2024-10-05|2024-09-30 00:00:00|2024-10-06   |
|2024-10-06|2024-09-30 00:00:00|2024-10-06   |
|2024-10-07|2024-10-07 00:00:00|2024-10-13   |
|2024-10-08|2024-10-07 00:00:00|2024-10-13   |
|2024-10-10|2024-10-07 00:00:00|2024-10-13   |
|2024-10-12|2024-10-07 00:00:00|2024-10-13   |
|2024-10-14|2024-10-14 00:00:00|2024-10-20   |
|2024-10-15|2024-10-14 00:00:00|2024-10-20   |
|2024-10-18|2024-10-14 00:00:00|2024-10-20   |
|2024-10-20|2024-10-14 00:00:00|2024-10-20   |
|2024-10-21|2024-10-21 00:00:00|2024-10-27   |
|2024-10-22|2024-10-21 00:00:00|2024-10-27   |
+----------+-------------------+-------------+

+----------+--------------+-------------+
|Date      |FirstDayOfWeek|LastDayOfWeek|
+----------+----------

# Pyspark

In [16]:
from pyspark.sql.functions import date_trunc, date_add

# Find the first and last day of the week using date_trunc
df_first_last_day = df.withColumn("FirstDayOfWeek", date_trunc("week", "Date")) \
                      .withColumn("LastDayOfWeek", date_add(date_trunc("week", "Date"), 6))

df_first_last_day.show(truncate=False)

# Truncates the date to the start of the week (Monday)
# Finds the next Sunday after the given date.
# Returns an integer representing the day of the week (1 for Sunday, 2 for Monday, etc.).

+--------+----------+-------------------+-------------+
|RecordID|Date      |FirstDayOfWeek     |LastDayOfWeek|
+--------+----------+-------------------+-------------+
|1       |2024-10-01|2024-09-30 00:00:00|2024-10-06   |
|2       |2024-10-02|2024-09-30 00:00:00|2024-10-06   |
|3       |2024-10-03|2024-09-30 00:00:00|2024-10-06   |
|4       |2024-10-05|2024-09-30 00:00:00|2024-10-06   |
|5       |2024-10-06|2024-09-30 00:00:00|2024-10-06   |
|6       |2024-10-07|2024-10-07 00:00:00|2024-10-13   |
|7       |2024-10-08|2024-10-07 00:00:00|2024-10-13   |
|8       |2024-10-10|2024-10-07 00:00:00|2024-10-13   |
|9       |2024-10-12|2024-10-07 00:00:00|2024-10-13   |
|10      |2024-10-14|2024-10-14 00:00:00|2024-10-20   |
|11      |2024-10-15|2024-10-14 00:00:00|2024-10-20   |
|12      |2024-10-18|2024-10-14 00:00:00|2024-10-20   |
|13      |2024-10-20|2024-10-14 00:00:00|2024-10-20   |
|14      |2024-10-21|2024-10-21 00:00:00|2024-10-27   |
|15      |2024-10-22|2024-10-21 00:00:00|2024-10

In [12]:
from pyspark.sql.functions import next_day, date_sub

# Assuming the week starts on Monday and ends on Sunday
df_first_last_day_2 = df.withColumn("FirstDayOfWeek", date_sub(next_day("Date", "Sunday"), 6)) \
                        .withColumn("LastDayOfWeek", next_day("Date", "Sunday"))

df_first_last_day_2.show(truncate=False)


+--------+----------+--------------+-------------+
|RecordID|Date      |FirstDayOfWeek|LastDayOfWeek|
+--------+----------+--------------+-------------+
|1       |2024-10-01|2024-09-30    |2024-10-06   |
|2       |2024-10-02|2024-09-30    |2024-10-06   |
|3       |2024-10-03|2024-09-30    |2024-10-06   |
|4       |2024-10-05|2024-09-30    |2024-10-06   |
|5       |2024-10-06|2024-10-07    |2024-10-13   |
|6       |2024-10-07|2024-10-07    |2024-10-13   |
|7       |2024-10-08|2024-10-07    |2024-10-13   |
|8       |2024-10-10|2024-10-07    |2024-10-13   |
|9       |2024-10-12|2024-10-07    |2024-10-13   |
|10      |2024-10-14|2024-10-14    |2024-10-20   |
|11      |2024-10-15|2024-10-14    |2024-10-20   |
|12      |2024-10-18|2024-10-14    |2024-10-20   |
|13      |2024-10-20|2024-10-21    |2024-10-27   |
|14      |2024-10-21|2024-10-21    |2024-10-27   |
|15      |2024-10-22|2024-10-21    |2024-10-27   |
+--------+----------+--------------+-------------+



In [13]:
from pyspark.sql.functions import dayofweek, date_sub, date_add

# Calculate first and last day of the week based on day of the week
df_first_last_day_3 = df.withColumn("FirstDayOfWeek", date_sub("Date", dayofweek("Date") - 2)) \
                        .withColumn("LastDayOfWeek", date_add("Date", 8 - dayofweek("Date")))

df_first_last_day_3.show(truncate=False)


+--------+----------+--------------+-------------+
|RecordID|Date      |FirstDayOfWeek|LastDayOfWeek|
+--------+----------+--------------+-------------+
|1       |2024-10-01|2024-09-30    |2024-10-06   |
|2       |2024-10-02|2024-09-30    |2024-10-06   |
|3       |2024-10-03|2024-09-30    |2024-10-06   |
|4       |2024-10-05|2024-09-30    |2024-10-06   |
|5       |2024-10-06|2024-10-07    |2024-10-13   |
|6       |2024-10-07|2024-10-07    |2024-10-13   |
|7       |2024-10-08|2024-10-07    |2024-10-13   |
|8       |2024-10-10|2024-10-07    |2024-10-13   |
|9       |2024-10-12|2024-10-07    |2024-10-13   |
|10      |2024-10-14|2024-10-14    |2024-10-20   |
|11      |2024-10-15|2024-10-14    |2024-10-20   |
|12      |2024-10-18|2024-10-14    |2024-10-20   |
|13      |2024-10-20|2024-10-21    |2024-10-27   |
|14      |2024-10-21|2024-10-21    |2024-10-27   |
|15      |2024-10-22|2024-10-21    |2024-10-27   |
+--------+----------+--------------+-------------+

