## Date and time ranges

In [3]:
from datetime import datetime,date,time,timedelta

import polars as pl

In [6]:
start_date = date(2026,1,1)
end_date = date(2026,1,2)
interval = timedelta(hours=3)

## Vertical ranges

### Datetime

In [7]:
pl.datetime_range(
    start=start_date,
    end=end_date,
    interval=interval,
    eager=True
).head(3)

literal
datetime[μs]
2026-01-01 00:00:00
2026-01-01 03:00:00
2026-01-01 06:00:00


### Date range

In [9]:
pl.date_range(
    start=start_date,
    end=end_date,
    eager=True
).head(3)

literal
date
2026-01-01
2026-01-02


Date range defaults to a one day interval but other intervals can be specified.

### Time range

In [10]:
start_time = time(0)
end_time = time(12)
interval = timedelta(hours=3)

pl.time_range(
    start=start_time,
    end=end_time,
    interval=interval,
    eager=True
)

literal
time
00:00:00
03:00:00
06:00:00
09:00:00
12:00:00


## Lazy datetime ranges

Without setting `eager=True`, Polars uses lazy mode to run the code.

In [11]:
pl.datetime_range(
    start=start_date,
    end=end_date,
    interval=interval
)

In [13]:
type(pl.datetime_range(
    start=start_date,
    end=end_date,
    interval=interval,
    eager=False
))

polars.expr.expr.Expr

## Horizontal datetime ranges

In [14]:
df = pl.DataFrame(
    {
        "id": ["A", "B"],
        "start":    [datetime(2026,1,1), datetime(2026,1,2)],
        "end": datetime(2026,1,3)
    }
)

df

id,start,end
str,datetime[μs],datetime[μs]
"""A""",2026-01-01 00:00:00,2026-01-03 00:00:00
"""B""",2026-01-02 00:00:00,2026-01-03 00:00:00


In [15]:
df.with_columns(
    pl.date_ranges("start", "end", interval="1mo").alias("datetime_range")
)

id,start,end,datetime_range
str,datetime[μs],datetime[μs],list[date]
"""A""",2026-01-01 00:00:00,2026-01-03 00:00:00,[2026-01-01]
"""B""",2026-01-02 00:00:00,2026-01-03 00:00:00,[2026-01-02]


## Exercises

### Exercise 1 
We have a short hourly temperature record with a gap at 2 am

In [16]:
df_weather = (
    pl.DataFrame(
        {
            "time": [time(0), time(1), time(3)], 
            "temperature": [12.0, 11, 9]
        }
    )
)
df_weather

time,temperature
time,f64
00:00:00,12.0
01:00:00,11.0
03:00:00,9.0


We want to create an hourly `DataFrame` with no time gaps.

First create a `DataFrame` where the `time` column has no gaps

In [18]:
df_time = pl.DataFrame({"time": pl.time_range(time(0), time(3), eager=True)})
df_time

time
time
00:00:00
01:00:00
02:00:00
03:00:00


Now do a left join of `df_weather` to `df_time`

In [22]:
df_time.join(
    df_weather,
    how="left",
    left_on="time",
    right_on="time",
    coalesce=True
)

time,temperature
time,f64
00:00:00,12.0
01:00:00,11.0
02:00:00,
03:00:00,9.0


Fill the gaps in the `temperature` column with linear interpolation

In [23]:
df_time.join(
    df_weather,
    how="left",
    left_on="time",
    right_on="time",
    coalesce=True
).with_columns(
    pl.col("temperature").interpolate()
)

time,temperature
time,f64
00:00:00,12.0
01:00:00,11.0
02:00:00,10.0
03:00:00,9.0


### Exercise 2
Our client is a bike shop and wants to look at sales during their summer and halloween sale periods.

The client provides you with the following data for the start and end of each sale period

In [24]:
df_sales_periods = pl.DataFrame(
    {
        "sale":["Summer","Halloween"],
        "start": [date(2015, 6, 1), date(2015, 10, 15)],
        "end": [date(2015, 9, 1),date(2015, 11, 15)]
    }
)
df_sales_periods

sale,start,end
str,date,date
"""Summer""",2015-06-01,2015-09-01
"""Halloween""",2015-10-15,2015-11-15


Add a `date` column that has the range of dates between `start` and `end` on each row

In [27]:
df_sales_periods.with_columns(
    pl.date_ranges("start", "end").alias("date")
)

sale,start,end,date
str,date,date,list[date]
"""Summer""",2015-06-01,2015-09-01,"[2015-06-01, 2015-06-02, … 2015-09-01]"
"""Halloween""",2015-10-15,2015-11-15,"[2015-10-15, 2015-10-16, … 2015-11-15]"


Expand the list column to have a row for each element of the list

In [28]:
df_sales_periods.with_columns(
    pl.date_ranges("start", "end").alias("date")
).explode("date")

sale,start,end,date
str,date,date,date
"""Summer""",2015-06-01,2015-09-01,2015-06-01
"""Summer""",2015-06-01,2015-09-01,2015-06-02
"""Summer""",2015-06-01,2015-09-01,2015-06-03
"""Summer""",2015-06-01,2015-09-01,2015-06-04
"""Summer""",2015-06-01,2015-09-01,2015-06-05
…,…,…,…
"""Halloween""",2015-10-15,2015-11-15,2015-11-11
"""Halloween""",2015-10-15,2015-11-15,2015-11-12
"""Halloween""",2015-10-15,2015-11-15,2015-11-13
"""Halloween""",2015-10-15,2015-11-15,2015-11-14


The bike sales data is in the following `DataFrame`

In [17]:
df_sales = pl.read_parquet("data/bike_sales.parquet")
df_sales.head(2)

date,customer age,customer gender,country,sub category,order quantity,unit cost,unit price,cost,revenue
date,i64,str,str,str,i64,i64,i64,i64,i64
2013-01-28,31,"""M""","""Australia""","""Mountain Bikes""",1,1912,3400,1912,2856
2015-01-28,31,"""M""","""Australia""","""Mountain Bikes""",1,1912,3400,1912,2856


Join the sale periods to the full sales dataframe. 

Ensure that only rows that fall inside either the Summer or Halloween sale period are kept

In [32]:
df_sales.join(
    df_sales_periods.with_columns(pl.date_ranges("start", "end").alias("date")).explode(
        "date"
    ),
    on="date",
    how="inner",
)

date,customer age,customer gender,country,sub category,order quantity,unit cost,unit price,cost,revenue,sale,start,end
date,i64,str,str,str,i64,i64,i64,i64,i64,str,date,date
2015-07-22,31,"""M""","""Australia""","""Mountain Bikes""",2,1912,3400,3824,5712,"""Summer""",2015-06-01,2015-09-01
2015-07-04,32,"""M""","""Australia""","""Mountain Bikes""",1,1912,3400,1912,2754,"""Summer""",2015-06-01,2015-09-01
2015-07-27,32,"""M""","""Australia""","""Mountain Bikes""",1,1898,3375,1898,2835,"""Summer""",2015-06-01,2015-09-01
2015-07-04,33,"""M""","""Australia""","""Mountain Bikes""",3,1912,3400,5736,8058,"""Summer""",2015-06-01,2015-09-01
2015-07-25,33,"""M""","""Australia""","""Mountain Bikes""",1,1912,3400,1912,2686,"""Summer""",2015-06-01,2015-09-01
…,…,…,…,…,…,…,…,…,…,…,…,…
2015-10-23,39,"""M""","""Australia""","""Touring Bikes""",2,1482,2384,2964,4005,"""Halloween""",2015-10-15,2015-11-15
2015-08-14,30,"""F""","""Germany""","""Touring Bikes""",1,461,742,461,690,"""Summer""",2015-06-01,2015-09-01
2015-11-09,31,"""F""","""United Kingdom""","""Touring Bikes""",1,1482,2384,1482,2289,"""Halloween""",2015-10-15,2015-11-15
2015-07-04,22,"""M""","""United Kingdom""","""Touring Bikes""",1,755,1215,755,1166,"""Summer""",2015-06-01,2015-09-01


Aggregate the data by sale period and get the total cost and revenue for each sale period. 

Sort by revenue

In [34]:
df_sales.join(
    df_sales_periods.with_columns(pl.date_ranges("start", "end").alias("date")).explode(
        "date"
    ),
    on="date",
    how="inner",
).group_by(
    "sale"
).agg(
    pl.col("cost", "revenue").sum(),
).sort("revenue")

sale,cost,revenue
str,i64,i64
"""Halloween""",1136762,1698624
"""Summer""",2310151,3476841
