# #5 Movie Metrics

In [1]:
import polars as pl

In [2]:
users = pl.read_csv(
	'user_activity/users.csv', 
	try_parse_dates=True, 
	new_columns=['user_id']
)

activity = pl.read_csv(
	'user_activity/activity.csv', 
	try_parse_dates=True, 
	columns=['user_id', 'date', 'movie_name', 'finished']
)

user_act = users.join(activity,on='user_id')

In [3]:
grouped_users = (
    user_act
    .filter(pl.col('finished') == 1)
    .sort(pl.col('user_id'), pl.col('date'))
    .group_by('user_id', maintain_order=True)
)

first_movie = (
    grouped_users
    .first()
    .select('user_id', 'created_at', 'date', 'movie_name')
    .rename({'date':'first_date', 'movie_name':'first_name'})
)

last_movie = (
    grouped_users
    .last()
    .select('user_id', 'date', 'movie_name')
    .rename({'date':'last_date', 'movie_name':'last_name'})
)

started_finished = (
    user_act
    .group_by('user_id', maintain_order=True)
    .agg(pl.col('finished').len().alias('started'), pl.col('finished').sum().alias('finished'))
)

last_first_movies = (
    first_movie
    .join(last_movie, on='user_id')
    .join(started_finished, on='user_id')
)

last_first_movies

user_id,created_at,first_date,first_name,last_date,last_name,started,finished
i64,date,date,str,date,str,u32,i64
2,2023-06-15,2023-06-22,"""The Shawshank Redemption""",2025-05-01,"""Fight Club""",15,12
1,2023-05-26,2023-09-12,"""Turning Red""",2025-03-26,"""Her""",30,26
4,2023-07-27,2023-07-27,"""Fight Club""",2025-05-09,"""Avengers: Endgame""",43,34
5,2023-09-01,2023-09-07,"""Top Gun: Maverick""",2025-03-14,"""Bohemian Rhapsody""",31,25
3,2023-07-18,2023-11-10,"""Oppenheimer""",2025-03-31,"""Nope""",10,7
…,…,…,…,…,…,…,…
21,2025-01-01,2025-01-03,"""Up""",2025-05-14,"""Forrest Gump""",35,29
22,2025-01-13,2025-01-14,"""Knives Out""",2025-05-13,"""Big Hero 6""",36,28
23,2025-01-28,2025-01-31,"""Gladiator""",2025-05-04,"""Mad Max: Fury Road""",24,17
24,2025-02-04,2025-02-05,"""Dune""",2025-05-16,"""The Social Network""",17,17


In [4]:
last_first_movies.filter(pl.col('last_name') == 'Fight Club')

user_id,created_at,first_date,first_name,last_date,last_name,started,finished
i64,date,date,str,date,str,u32,i64
2,2023-06-15,2023-06-22,"""The Shawshank Redemption""",2025-05-01,"""Fight Club""",15,12
18,2024-10-01,2024-10-01,"""Fight Club""",2025-05-16,"""Fight Club""",35,28
20,2024-11-23,2024-11-27,"""Luca""",2025-05-02,"""Fight Club""",34,19
