# Polars DateTime Operations - Comprehensive Guide

Master date and time manipulation in Polars.

## Topics:
- Date/DateTime/Duration types
- Parsing dates from strings
- Extracting date components
- Date arithmetic
- Time zones
- Date ranges and sequences
- Temporal aggregations
- Real-world time series examples

In [None]:
import polars as pl
from datetime import datetime, date, timedelta

## Part 1: Creating Date/DateTime Columns

### From Python objects

In [None]:
df = pl.DataFrame({
    'date': [date(2023, 1, 1), date(2023, 1, 2), date(2023, 1, 3)],
    'datetime': [datetime(2023, 1, 1, 10, 30), datetime(2023, 1, 2, 14, 45), datetime(2023, 1, 3, 9, 15)]
})

print(df)
print("\nSchema:")
print(df.schema)

### Parsing from strings

In [None]:
df_str = pl.DataFrame({
    'date_str': ['2023-01-01', '2023-01-02', '2023-01-03'],
    'date_custom': ['01/15/2023', '02/20/2023', '03/10/2023'],
    'datetime_str': ['2023-01-01 10:30:00', '2023-01-02 14:45:00', '2023-01-03 09:15:00']
})

parsed = df_str.select([
    pl.col('date_str').str.to_date().alias('date'),
    pl.col('date_custom').str.to_date('%m/%d/%Y').alias('date_parsed'),
    pl.col('datetime_str').str.to_datetime().alias('datetime')
])

print("Parsed dates:")
print(parsed)

### Creating date ranges

In [None]:
# Date range
date_range = pl.date_range(
    start=date(2023, 1, 1),
    end=date(2023, 1, 10),
    interval='1d',
    eager=True
)

df_range = pl.DataFrame({
    'date': date_range
})

print("Date range:")
print(df_range)

## Part 2: Extracting Date Components

In [None]:
# Sample date data
dates_df = pl.DataFrame({
    'datetime': pl.datetime_range(
        datetime(2023, 1, 1, 10, 30),
        datetime(2023, 12, 31, 15, 45),
        '30d',
        eager=True
    )
})

print("Sample datetimes:")
print(dates_df.head())

### Extract year, month, day

In [None]:
components = dates_df.select([
    pl.col('datetime'),
    pl.col('datetime').dt.year().alias('year'),
    pl.col('datetime').dt.month().alias('month'),
    pl.col('datetime').dt.day().alias('day'),
    pl.col('datetime').dt.hour().alias('hour'),
    pl.col('datetime').dt.minute().alias('minute'),
    pl.col('datetime').dt.second().alias('second')
])

print("Date components:")
print(components.head())

### Weekday, week, quarter

In [None]:
temporal = dates_df.select([
    pl.col('datetime'),
    pl.col('datetime').dt.weekday().alias('weekday'),  # 0=Mon, 6=Sun
    pl.col('datetime').dt.week().alias('week_of_year'),
    pl.col('datetime').dt.quarter().alias('quarter'),
    pl.col('datetime').dt.ordinal_day().alias('day_of_year')
])

print("Temporal components:")
print(temporal.head())

## Part 3: Date Arithmetic

### Adding/Subtracting durations

In [None]:
df_date = pl.DataFrame({
    'date': [date(2023, 1, 1), date(2023, 2, 15), date(2023, 6, 30)]
})

arithmetic = df_date.select([
    pl.col('date'),
    (pl.col('date') + pl.duration(days=7)).alias('plus_7_days'),
    (pl.col('date') + pl.duration(weeks=2)).alias('plus_2_weeks'),
    (pl.col('date') + pl.duration(days=30)).alias('plus_30_days'),
    (pl.col('date') - pl.duration(days=10)).alias('minus_10_days')
])

print("Date arithmetic:")
print(arithmetic)

### Date differences

In [None]:
df_events = pl.DataFrame({
    'event': ['Start', 'Milestone 1', 'Milestone 2', 'End'],
    'date': [date(2023, 1, 1), date(2023, 3, 15), date(2023, 6, 30), date(2023, 12, 31)]
})

differences = df_events.with_columns([
    (pl.col('date') - pl.col('date').first()).alias('days_from_start'),
    (pl.col('date') - pl.col('date').shift(1)).alias('days_since_prev')
])

print("Date differences:")
print(differences)

## Part 4: Truncating and Rounding

In [None]:
df_times = pl.DataFrame({
    'datetime': [
        datetime(2023, 1, 15, 10, 30, 45),
        datetime(2023, 2, 20, 14, 45, 30),
        datetime(2023, 3, 10, 9, 15, 20)
    ]
})

truncated = df_times.select([
    pl.col('datetime'),
    pl.col('datetime').dt.truncate('1d').alias('day'),
    pl.col('datetime').dt.truncate('1h').alias('hour'),
    pl.col('datetime').dt.truncate('1mo').alias('month'),
    pl.col('datetime').dt.truncate('1y').alias('year')
])

print("Truncated dates:")
print(truncated)

## Part 5: Filtering by Dates

In [None]:
# Sample sales data
sales_df = pl.DataFrame({
    'date': pl.date_range(date(2023, 1, 1), date(2023, 12, 31), '1d', eager=True),
    'sales': pl.Series([100 + i % 50 for i in range(365)])
})

print(f"Total records: {len(sales_df)}")

### Filter by date range

In [None]:
# Q1 2023
q1 = sales_df.filter(
    (pl.col('date') >= date(2023, 1, 1)) & 
    (pl.col('date') < date(2023, 4, 1))
)

print(f"Q1 records: {len(q1)}")
print(q1.head())

### Filter by date components

In [None]:
# All Mondays in January
mondays_jan = sales_df.filter(
    (pl.col('date').dt.month() == 1) &
    (pl.col('date').dt.weekday() == 0)
)

print("Mondays in January:")
print(mondays_jan)

## Part 6: Temporal Aggregations

### Group by date components

In [None]:
# Monthly aggregation
monthly = sales_df.with_columns([
    pl.col('date').dt.year().alias('year'),
    pl.col('date').dt.month().alias('month')
]).group_by(['year', 'month']).agg([
    pl.col('sales').sum().alias('total_sales'),
    pl.col('sales').mean().alias('avg_daily_sales'),
    pl.len().alias('num_days')
]).sort(['year', 'month'])

print("Monthly sales:")
print(monthly)

### Quarterly aggregation

In [None]:
quarterly = sales_df.with_columns([
    pl.col('date').dt.quarter().alias('quarter')
]).group_by('quarter').agg([
    pl.col('sales').sum().alias('total_sales'),
    pl.col('sales').mean().alias('avg_sales')
]).sort('quarter')

print("Quarterly sales:")
print(quarterly)

### Weekday vs Weekend

In [None]:
weekday_analysis = sales_df.with_columns([
    pl.when(pl.col('date').dt.weekday().is_in([5, 6]))
      .then(pl.lit('Weekend'))
      .otherwise(pl.lit('Weekday'))
      .alias('day_type')
]).group_by('day_type').agg([
    pl.col('sales').mean().alias('avg_sales'),
    pl.len().alias('num_days')
])

print("Weekday vs Weekend:")
print(weekday_analysis)

## Part 7: group_by_dynamic (Time-based Grouping)

In [None]:
# Create hourly data
hourly_df = pl.DataFrame({
    'timestamp': pl.datetime_range(
        datetime(2023, 1, 1, 0, 0),
        datetime(2023, 1, 7, 23, 0),
        '1h',
        eager=True
    ),
    'value': pl.Series([10 + i % 20 for i in range(7 * 24)])
})

print(f"Hourly data: {len(hourly_df)} records")
print(hourly_df.head())

### Aggregate to daily

In [None]:
daily = hourly_df.group_by_dynamic('timestamp', every='1d').agg([
    pl.col('value').sum().alias('daily_total'),
    pl.col('value').mean().alias('daily_avg'),
    pl.len().alias('num_hours')
])

print("Daily aggregation:")
print(daily)

### Custom windows

In [None]:
# 6-hour windows
six_hour = hourly_df.group_by_dynamic('timestamp', every='6h').agg([
    pl.col('value').sum().alias('total'),
    pl.len().alias('count')
])

print("6-hour windows:")
print(six_hour.head(10))

## Part 8: Real-World Examples

### Example 1: Business days calculation

In [None]:
# Count business days between dates
date_range_df = pl.DataFrame({
    'start_date': [date(2023, 1, 1), date(2023, 6, 1)],
    'end_date': [date(2023, 1, 31), date(2023, 6, 30)]
})

# Expand to all dates and count weekdays
business_days = date_range_df.select([
    pl.col('start_date'),
    pl.col('end_date'),
    # Generate date range and count weekdays
    pl.date_ranges(pl.col('start_date'), pl.col('end_date'), '1d')
      .list.eval(pl.element().dt.weekday() < 5)
      .list.sum()
      .alias('business_days')
])

print("Business days calculation:")
print(business_days)

### Example 2: Age calculation

In [None]:
people_df = pl.DataFrame({
    'name': ['Alice', 'Bob', 'Charlie'],
    'birth_date': [date(1990, 5, 15), date(1985, 10, 20), date(2000, 3, 8)]
})

today = date(2023, 11, 1)

ages = people_df.with_columns([
    ((pl.lit(today) - pl.col('birth_date')).dt.total_days() / 365.25).cast(pl.Int32).alias('age')
])

print(f"Ages as of {today}:")
print(ages)

### Example 3: Fiscal year handling

In [None]:
# Fiscal year starts in April
fiscal_df = sales_df.with_columns([
    pl.when(pl.col('date').dt.month() >= 4)
      .then(pl.col('date').dt.year())
      .otherwise(pl.col('date').dt.year() - 1)
      .alias('fiscal_year'),
    pl.when(pl.col('date').dt.month() >= 4)
      .then(pl.col('date').dt.month() - 3)
      .otherwise(pl.col('date').dt.month() + 9)
      .alias('fiscal_month')
])

fiscal_summary = fiscal_df.group_by('fiscal_year').agg([
    pl.col('sales').sum().alias('fy_sales')
]).sort('fiscal_year')

print("Fiscal year summary:")
print(fiscal_summary)

### Example 4: Time since last event

In [None]:
events_df = pl.DataFrame({
    'customer_id': [1, 1, 1, 2, 2, 3],
    'event_date': [
        date(2023, 1, 1),
        date(2023, 1, 15),
        date(2023, 2, 1),
        date(2023, 1, 10),
        date(2023, 3, 1),
        date(2023, 1, 5)
    ]
}).sort(['customer_id', 'event_date'])

with_gaps = events_df.with_columns([
    (pl.col('event_date') - pl.col('event_date').shift(1).over('customer_id'))
      .dt.total_days()
      .alias('days_since_last')
])

print("Time since last event:")
print(with_gaps)

## Summary

### Key Operations:
- **Creation**: date(), datetime(), date_range()
- **Parsing**: str.to_date(), str.to_datetime()
- **Components**: year(), month(), day(), weekday(), etc.
- **Arithmetic**: + duration(), - duration(), differences
- **Truncation**: dt.truncate() for rounding to periods
- **Aggregation**: group_by date components, group_by_dynamic

### Best Practices:
- Use appropriate types (Date vs DateTime)
- Parse strings with correct format
- Extract components for grouping
- Use group_by_dynamic for time-based windows
- Consider time zones for global applications