### Input file name here

In [77]:
filename = 'AttendeeReport.csv'

### Code running (calculating duration)

In [78]:
import polars as pl
from datetime import datetime, timedelta

In [79]:
df = pl.read_csv(filename).rename({
    'Session Id': 'id',
    'Participant Id': 'email',
    'Full Name': 'name',
    'UserAgent': 'user_agent',
    'UTC Event Timestamp': 'timestamp',
    'Action': 'action',
    'Role': 'role'
}).with_columns(
    pl.col('timestamp').str.strptime(pl.Datetime, format='%m/%d/%Y %I:%M:%S %p')
).sort(
    by=['email','id', 'timestamp']
)

df

id,email,name,user_agent,timestamp,action,role
str,str,str,str,datetime[μs],str,str
"""be6789ac-cff9-4391-9b20-f17238…","""An.DTH2410000@sis.hust.edu.vn""","""Dao Thi Ha An 202410000""","""Mozilla/5.0 (iPhone; CPU iPhon…",2025-03-07 03:45:52,"""Joined""","""Attendee"""
"""be6789ac-cff9-4391-9b20-f17238…","""An.DTH2410000@sis.hust.edu.vn""","""Dao Thi Ha An 202410000""","""Mozilla/5.0 (iPhone; CPU iPhon…",2025-03-07 04:12:32,"""Left""","""Attendee"""
"""fb225999-3098-485b-91f4-fc9e51…","""An.DTH2410000@sis.hust.edu.vn""","""Dao Thi Ha An 202410000""","""Mozilla/5.0""",2025-03-07 02:18:53,"""Joined""","""Attendee"""
"""fb225999-3098-485b-91f4-fc9e51…","""An.DTH2410000@sis.hust.edu.vn""","""Dao Thi Ha An 202410000""","""Mozilla/5.0 (iPhone; CPU iPhon…",2025-03-07 03:37:16,"""Left""","""Attendee"""
"""17e7bc42-ee19-468b-a1b4-ccde44…","""An.LT2412859@sis.hust.edu.vn""","""Le Thai An 202412859""","""Mozilla/5.0 (iPhone; CPU iPhon…",2025-03-07 02:10:21,"""Joined""","""Attendee"""
…,…,…,…,…,…,…
"""555a4332-f0b9-4466-8525-23d2aa…","""Yen.NH2419910@sis.hust.edu.vn""","""Nguyen Hai Yen 202419910""","""Mozilla/5.0""",2025-03-07 02:25:16,"""Joined""","""Attendee"""
"""9380b31a-9c9a-4905-871e-e1e2bb…","""Yen.NH2419910@sis.hust.edu.vn""","""Nguyen Hai Yen 202419910""","""Mozilla/5.0 (iPhone; CPU iPhon…",2025-03-07 02:30:19,"""Joined""","""Attendee"""
"""9380b31a-9c9a-4905-871e-e1e2bb…","""Yen.NH2419910@sis.hust.edu.vn""","""Nguyen Hai Yen 202419910""","""Mozilla/5.0 (iPhone; CPU iPhon…",2025-03-07 02:31:51,"""Left""","""Attendee"""
"""9b5e7f54-9580-4ea0-84fd-a6d0ed…","""Yen.NTH2420365@sis.hust.edu.vn""","""Ngo Thi Hai Yen 202420365""","""Mozilla/5.0""",2025-03-07 02:24:56,"""Joined""","""Attendee"""


In [80]:
error_entry_sessions = df.group_by('id').agg(pl.len().alias('count')).filter(pl.col('count') != 2)
df_filtered = df.join(error_entry_sessions, on='id', how='anti')

error_entry_sessions

id,count
str,u32
"""3597ae5d-21ed-418b-83e5-1d5d18…",1
"""1e7f76e4-23e3-4853-b462-92a531…",1
"""36e7d376-319d-4676-9e21-e25c66…",1
"""989c8797-13d3-4362-9de9-5e4ed2…",1
"""57bf59fa-b7bd-409a-9962-4d4620…",1
…,…
"""a5b09975-cd9c-4d61-80f9-02e472…",1
"""5255e18d-4ec0-48be-989c-0cce18…",1
"""eb2422d9-d902-4a02-941e-44ac31…",1
"""fb2ca441-9055-4b47-9493-3d125e…",1


In [81]:
sessions = (
    df_filtered.sort(['id', 'timestamp'])
    .group_by('id')
    .agg(
        pl.col('name').first().alias('name'),
        pl.col('email').first().alias('email'),
        pl.col('timestamp').filter(pl.col('action') == 'Joined').first().alias('join_time'),
        pl.col('timestamp').filter(pl.col('action') == 'Left').first().alias('leave_time')
    )
    .with_columns(
        (pl.col('leave_time') - pl.col('join_time')).alias('duration')
    )
)

sessions

id,name,email,join_time,leave_time,duration
str,str,str,datetime[μs],datetime[μs],duration[μs]
"""09fc9e26-a9d9-4197-ac10-e81d48…","""Dang Huu Huy 202414518""","""Huy.DH2414518@sis.hust.edu.vn""",2025-03-07 02:41:32,2025-03-07 02:54:47,13m 15s
"""350baecf-5c8c-4b03-bd2d-50e930…","""Le Xuan Viet Thang 202418568""","""Thang.LXV2418568@sis.hust.edu.…",2025-03-07 02:02:04,2025-03-07 03:15:02,1h 12m 58s
"""7d63ff9d-42ac-471c-bfad-e3a6a9…","""Tran Thanh Hang 20232477""","""Hang.TT232477@sis.hust.edu.vn""",2025-03-07 02:37:17,2025-03-07 02:41:48,4m 31s
"""27d18db9-aac0-4104-92d2-ca63c4…","""Le Van Huy 202414174""","""Huy.LV2414174@sis.hust.edu.vn""",2025-03-07 02:36:57,2025-03-07 03:00:12,23m 15s
"""5e967b23-a83b-4add-a990-c39e59…","""Nguyen Thi Anh Minh 20233074""","""Minh.NTA233074@sis.hust.edu.vn""",2025-03-07 02:21:49,2025-03-07 02:22:32,43s
…,…,…,…,…,…
"""674ad83d-e76f-4dbe-ade5-865855…","""Nguyen Duc Manh 202414237""","""Manh.ND2414237@sis.hust.edu.vn""",2025-03-07 02:54:57,2025-03-07 02:58:15,3m 18s
"""17e7bc42-ee19-468b-a1b4-ccde44…","""Le Thai An 202412859""","""An.LT2412859@sis.hust.edu.vn""",2025-03-07 02:10:21,2025-03-07 02:16:12,5m 51s
"""75ad360e-427b-4cf4-88ed-8ba107…","""Bui Minh Cuong 202416146""","""Cuong.BM2416146@sis.hust.edu.v…",2025-03-07 03:48:17,2025-03-07 03:51:04,2m 47s
"""98b392ec-2c0a-4c32-8511-ef2718…","""Vu Ngoc Gia Han 202415555""","""Han.VNG2415555@sis.hust.edu.vn""",2025-03-07 02:34:02,2025-03-07 03:22:48,48m 46s


In [82]:
def merge_intervals(intervals):
    # Sort intervals by start time
    sorted_intervals = sorted(intervals, key=lambda x: x['join_time'])
    
    if not sorted_intervals:
        return timedelta(seconds=0)
    
    merged = [sorted_intervals[0]]
    for current in sorted_intervals[1:]:
        previous = merged[-1]
        if current['join_time'] <= previous['leave_time']:
            merged[-1] = {
                'join_time': previous['join_time'], 
                'leave_time': max(previous['leave_time'], current['leave_time'])
            }
        else:
            merged.append(current)
    
    # Calculate total duration using timedelta objects
    total_seconds = sum((interval['leave_time'] - interval['join_time']).total_seconds() 
                        for interval in merged)
    return timedelta(seconds=total_seconds)

In [89]:
result = (
    # Group by email to process each person
    sessions
    .group_by('email')
    .agg(
        pl.col('name').first().alias('name'),
        
        # Output all intervals as struct array
        pl.struct(['join_time', 'leave_time']).alias('intervals'),
        pl.concat_str([
            pl.col('join_time').dt.time().cast(pl.Utf8),
            pl.lit(' - '),
            pl.col('leave_time').dt.time().cast(pl.Utf8)            
        ]).alias('formatted_intervals')
    )
    .with_columns(
        pl.col('intervals').map_elements(merge_intervals, return_dtype=pl.Duration).alias('total_non_overlapping_duration'),
        pl.col('formatted_intervals')
            .list.eval(pl.format('<{}>', pl.element()))
            .list.join('')
    )
    .sort('total_non_overlapping_duration')
)

In [90]:
result

email,name,intervals,formatted_intervals,total_non_overlapping_duration
str,str,list[struct[2]],str,duration[μs]
"""Anh.NN235013@sis.hust.edu.vn""","""Nguyen Ngoc Anh 20235013""","[{2025-03-07 02:32:24,2025-03-07 02:32:55}]","""<02:32:24 - 02:32:55>""",31s
"""Thanh.HX236321@sis.hust.edu.vn""","""Ha Xuan Thanh 20236321""","[{2025-03-07 03:36:01,2025-03-07 03:36:39}]","""<03:36:01 - 03:36:39>""",38s
"""Anh.HHT2417284@sis.hust.edu.vn""","""Ha Huy Tuan Anh 202417284""","[{2025-03-07 02:19:48,2025-03-07 02:20:27}]","""<02:19:48 - 02:20:27>""",39s
"""Son.BT2421574@sis.hust.edu.vn""","""Bui Thanh Son 202421574""","[{2025-03-07 02:37:49,2025-03-07 02:38:30}]","""<02:37:49 - 02:38:30>""",41s
"""Ky.TM222574@sis.hust.edu.vn""","""Tran Minh Ky 20222574""","[{2025-03-07 02:01:51,2025-03-07 02:02:33}]","""<02:01:51 - 02:02:33>""",42s
…,…,…,…,…
"""Duy.NV2417380@sis.hust.edu.vn""","""Nguyen Vu Duy 202417380""","[{2025-03-07 02:55:27,2025-03-07 02:55:42}, {2025-03-07 02:55:48,2025-03-07 05:30:17}, {2025-03-07 02:17:20,2025-03-07 02:56:13}]","""<02:55:27 - 02:55:42><02:55:48…",3h 12m 57s
"""Anh.VTP2411537@sis.hust.edu.vn""","""Vu Thi Phuong Anh 202411537""","[{2025-03-07 02:12:06,2025-03-07 02:17:24}, {2025-03-07 02:11:54,2025-03-07 02:12:34}, … {2025-03-07 02:16:10,2025-03-07 05:32:14}]","""<02:12:06 - 02:17:24><02:11:54…",3h 23m 1s
"""Thao.LT227152@sis.hust.edu.vn""","""Le Thanh Thao 20227152""","[{2025-03-07 02:33:22,2025-03-07 02:50:47}, {2025-03-07 02:50:01,2025-03-07 06:16:59}, {2025-03-07 02:25:41,2025-03-07 02:32:23}]","""<02:33:22 - 02:50:47><02:50:01…",3h 50m 19s
"""Son.BT2417021@sis.hust.edu.vn""","""Bui Tung Son 202417021""","[{2025-03-07 03:08:30,2025-03-07 03:45:27}, {2025-03-07 02:25:11,2025-03-07 02:33:17}, … {2025-03-07 03:29:38,2025-03-07 06:51:30}]","""<03:08:30 - 03:45:27><02:25:11…",4h 4s


In [99]:
result_formatted = (
    result
    .with_columns(
        (pl.col("total_non_overlapping_duration") / 1_000_000).cast(pl.Int64).alias("duration_in_seconds")
    )
    .drop(['intervals', 'total_non_overlapping_duration'])
)

result_formatted

email,name,formatted_intervals,duration_in_seconds
str,str,str,i64
"""Anh.NN235013@sis.hust.edu.vn""","""Nguyen Ngoc Anh 20235013""","""<02:32:24 - 02:32:55>""",31
"""Thanh.HX236321@sis.hust.edu.vn""","""Ha Xuan Thanh 20236321""","""<03:36:01 - 03:36:39>""",38
"""Anh.HHT2417284@sis.hust.edu.vn""","""Ha Huy Tuan Anh 202417284""","""<02:19:48 - 02:20:27>""",39
"""Son.BT2421574@sis.hust.edu.vn""","""Bui Thanh Son 202421574""","""<02:37:49 - 02:38:30>""",41
"""Ky.TM222574@sis.hust.edu.vn""","""Tran Minh Ky 20222574""","""<02:01:51 - 02:02:33>""",42
…,…,…,…
"""Duy.NV2417380@sis.hust.edu.vn""","""Nguyen Vu Duy 202417380""","""<02:55:27 - 02:55:42><02:55:48…",11577
"""Anh.VTP2411537@sis.hust.edu.vn""","""Vu Thi Phuong Anh 202411537""","""<02:12:06 - 02:17:24><02:11:54…",12181
"""Thao.LT227152@sis.hust.edu.vn""","""Le Thanh Thao 20227152""","""<02:33:22 - 02:50:47><02:50:01…",13819
"""Son.BT2417021@sis.hust.edu.vn""","""Bui Tung Son 202417021""","""<03:08:30 - 03:45:27><02:25:11…",14404


In [102]:
result_formatted.write_csv(f"Duration_Output_{filename.replace('.csv', '')}.csv")