<a href="https://colab.research.google.com/github/candidlpd/pyspark-coding-interview/blob/master/find_number_of_weekdays.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
!pip install pyspark


Collecting pyspark
  Downloading pyspark-3.5.3.tar.gz (317.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m317.3/317.3 MB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: pyspark
  Building wheel for pyspark (setup.py) ... [?25l[?25hdone
  Created wheel for pyspark: filename=pyspark-3.5.3-py2.py3-none-any.whl size=317840625 sha256=110270faa93e34b5fdce7fb69fb43ff64769b18affa8ff7be08f201059bb6851
  Stored in directory: /root/.cache/pip/wheels/1b/3a/92/28b93e2fbfdbb07509ca4d6f50c5e407f48dce4ddbda69a4ab
Successfully built pyspark
Installing collected packages: pyspark
Successfully installed pyspark-3.5.3


In [4]:
from pyspark.sql import SparkSession
from pyspark.sql import functions as F
from pyspark.sql.types import DateType

# Initialize Spark session
spark = SparkSession.builder.master("local").appName("WeekdaysBetweenDates").getOrCreate()


In [5]:
# Sample data of start and end dates
data = [
    ("2024-10-01", "2024-10-15"),
    ("2024-09-10", "2024-09-25"),
    ("2024-08-01", "2024-08-10")
]

# Create a DataFrame
columns = ["StartDate", "EndDate"]
df = spark.createDataFrame(data, columns)

# Convert the date columns from string to date type
df = df.withColumn("StartDate", df["StartDate"].cast(DateType())) \
       .withColumn("EndDate", df["EndDate"].cast(DateType()))

# Show the data
df.show()

# Register the DataFrame as a temporary SQL table
df.createOrReplaceTempView("DatesTable")


+----------+----------+
| StartDate|   EndDate|
+----------+----------+
|2024-10-01|2024-10-15|
|2024-09-10|2024-09-25|
|2024-08-01|2024-08-10|
+----------+----------+



In [14]:
spark.sql("""
select StartDate,EndDate,total_days_including_start_end,full_weekend_days,StartDay_as_Saturday,EndDay_as_Sunday,(total_days_including_start_end - full_weekend_days - StartDay_as_Saturday - EndDay_as_Sunday) as weekdays from (
SELECT StartDate, EndDate,
       (DATEDIFF(EndDate, StartDate) + 1) AS total_days_including_start_end,
       (Floor(DATEDIFF(EndDate, StartDate)/7) *2) as full_weekend_days,
       (case when DATE_FORMAT(StartDate, 'EEEE') = 'Saturday' THEN 1 ELSE 0 END) as StartDay_as_Saturday,
       (case when DATE_FORMAT(EndDate, 'EEEE') = 'Sunday' THEN 1 ELSE 0 END) as EndDay_as_Sunday

FROM DatesTable
) as weekdayCalc
""").show()



+----------+----------+------------------------------+-----------------+--------------------+----------------+--------+
| StartDate|   EndDate|total_days_including_start_end|full_weekend_days|StartDay_as_Saturday|EndDay_as_Sunday|weekdays|
+----------+----------+------------------------------+-----------------+--------------------+----------------+--------+
|2024-10-01|2024-10-15|                            15|                4|                   0|               0|      11|
|2024-09-10|2024-09-25|                            16|                4|                   0|               0|      12|
|2024-08-01|2024-08-10|                            10|                2|                   0|               0|       8|
+----------+----------+------------------------------+-----------------+--------------------+----------------+--------+

