In [None]:
import polars as pl 
from datetime import timedelta

In [None]:
pl_df = pl.read_csv('../data/202306-divvy-tripdata.csv',schema_overrides={"end_station_id":pl.Utf8})
pl_df.head()

In [None]:
pl_df

In [None]:
pl_df = pl_df.with_columns([
    pl.col("started_at").str.strptime(pl.Date, format="%Y-%m-%d %H:%M:%S").alias("started_at"),
    pl.col("ended_at").str.strptime(pl.Date, format="%Y-%m-%d %H:%M:%S").alias("ended_at"),
])   

In [None]:
print(pl_df)

In [None]:
pl_df

In [None]:
pl_df.schema

### 1. Count the number bike rides per day.

In [None]:
daily_counts = pl_df.with_columns(pl.col("started_at").dt.date().alias("date")).group_by("date").agg(pl.count().alias("ride_count")).sort("date")   
daily_counts

### 2. Calculate the average, max, and minimum number of rides per week of the dataset.

In [None]:
(pl_df.sort("started_at")
.group_by_dynamic("started_at", every="1w", closed="left")
.agg((pl.len().alias("ride_count")))
.select([pl.col("ride_count").mean().alias("avg_rides_per_week"),
pl.col("ride_count").max().alias("max_rides_per_week"),
pl.col("ride_count").min().alias("min_rides_per_week")]))

### 3. For each day, calculate how many rides that day is above or below the same day last week.

In [None]:
(
    pl_df
    .with_columns(pl.col("started_at").dt.date().alias("date"))
    .group_by("date")
    .agg(pl.count().alias("ride_count")).sort('date')
)

In [None]:
first_last_ride = daily_counts.with_columns([
    pl.col("ride_count").cast(pl.Int32).alias("ride_count"),
    pl.col("ride_count").cast(pl.Int32).shift(7).alias("last_week_ride_count"),
    (
        (pl.col("ride_count").cast(pl.Int32) - pl.col("ride_count").cast(pl.Int32).shift(7))
        .fill_null(0)
    ).alias("diff_from_last_week")
])
print(first_last_ride)