### Input file name here

In [45]:
filename = 'AttendeeReport.csv'
input_starting_time = '3/7/2025 9:00:00 AM'
input_ending_time = '3/7/2025 10:52:00 AM'

### Code running (calculating duration)

In [55]:
import polars as pl
from datetime import datetime, timedelta, timezone

In [56]:
# Parse time & set timezone UTC+7
ending_time = (datetime
    .strptime(input_ending_time, "%m/%d/%Y %I:%M:%S %p")
    .replace(tzinfo=timezone(timedelta(hours=7)))
)

In [57]:
df = pl.read_csv(filename).rename({
    'Session Id': 'id',
    'Participant Id': 'email',
    'Full Name': 'name',
    'UserAgent': 'user_agent',
    'UTC Event Timestamp': 'timestamp',
    'Action': 'action',
    'Role': 'role'
}).with_columns(
    pl.col('timestamp')
        .str.strptime(pl.Datetime, format='%m/%d/%Y %I:%M:%S %p')
        .dt.convert_time_zone('Asia/Ho_Chi_Minh')
).sort(
    by=['email','id', 'timestamp']
)

df

id,email,name,user_agent,timestamp,action,role
str,str,str,str,"datetime[μs, Asia/Ho_Chi_Minh]",str,str
"""be6789ac-cff9-4391-9b20-f17238…","""An.DTH2410000@sis.hust.edu.vn""","""Dao Thi Ha An 202410000""","""Mozilla/5.0 (iPhone; CPU iPhon…",2025-03-07 10:45:52 +07,"""Joined""","""Attendee"""
"""be6789ac-cff9-4391-9b20-f17238…","""An.DTH2410000@sis.hust.edu.vn""","""Dao Thi Ha An 202410000""","""Mozilla/5.0 (iPhone; CPU iPhon…",2025-03-07 11:12:32 +07,"""Left""","""Attendee"""
"""fb225999-3098-485b-91f4-fc9e51…","""An.DTH2410000@sis.hust.edu.vn""","""Dao Thi Ha An 202410000""","""Mozilla/5.0""",2025-03-07 09:18:53 +07,"""Joined""","""Attendee"""
"""fb225999-3098-485b-91f4-fc9e51…","""An.DTH2410000@sis.hust.edu.vn""","""Dao Thi Ha An 202410000""","""Mozilla/5.0 (iPhone; CPU iPhon…",2025-03-07 10:37:16 +07,"""Left""","""Attendee"""
"""17e7bc42-ee19-468b-a1b4-ccde44…","""An.LT2412859@sis.hust.edu.vn""","""Le Thai An 202412859""","""Mozilla/5.0 (iPhone; CPU iPhon…",2025-03-07 09:10:21 +07,"""Joined""","""Attendee"""
…,…,…,…,…,…,…
"""555a4332-f0b9-4466-8525-23d2aa…","""Yen.NH2419910@sis.hust.edu.vn""","""Nguyen Hai Yen 202419910""","""Mozilla/5.0""",2025-03-07 09:25:16 +07,"""Joined""","""Attendee"""
"""9380b31a-9c9a-4905-871e-e1e2bb…","""Yen.NH2419910@sis.hust.edu.vn""","""Nguyen Hai Yen 202419910""","""Mozilla/5.0 (iPhone; CPU iPhon…",2025-03-07 09:30:19 +07,"""Joined""","""Attendee"""
"""9380b31a-9c9a-4905-871e-e1e2bb…","""Yen.NH2419910@sis.hust.edu.vn""","""Nguyen Hai Yen 202419910""","""Mozilla/5.0 (iPhone; CPU iPhon…",2025-03-07 09:31:51 +07,"""Left""","""Attendee"""
"""9b5e7f54-9580-4ea0-84fd-a6d0ed…","""Yen.NTH2420365@sis.hust.edu.vn""","""Ngo Thi Hai Yen 202420365""","""Mozilla/5.0""",2025-03-07 09:24:56 +07,"""Joined""","""Attendee"""


In [58]:
error_entry_sessions = df.group_by('id').agg(pl.len().alias('count')).filter(pl.col('count') != 2)
df_filtered = df.join(error_entry_sessions, on='id', how='anti')

error_entry_sessions

id,count
str,u32
"""78c7845a-524d-4f72-810c-48ba32…",1
"""b5bc9106-8989-47d2-ba77-681bfc…",1
"""bdea21e0-fbb5-4a01-bbdb-a7ab65…",1
"""7850dbe6-c773-4ddf-8038-2ff45d…",1
"""d47d1096-f148-4392-a8b0-f358fd…",1
…,…
"""24bfebc2-d325-4e77-8975-6799a6…",1
"""cbb7feaa-9968-48c5-b0e9-e51646…",1
"""ad5c9bd4-e03e-48fa-b04d-3cbd7c…",1
"""3e4a023e-95ad-4e05-82e3-d88d15…",1


In [59]:
sessions = (
    df_filtered.sort(['id', 'timestamp'])
    .group_by('id')
    .agg(
        pl.col('name').first().alias('name'),
        pl.col('email').first().alias('email'),
        pl.col('timestamp').filter(pl.col('action') == 'Joined').first().alias('join_time'),
        pl.col('timestamp').filter(pl.col('action') == 'Left').first().alias('leave_time')
    )
    .with_columns(
        (pl.col('leave_time') - pl.col('join_time')).alias('duration')
    )
)

sessions

id,name,email,join_time,leave_time,duration
str,str,str,"datetime[μs, Asia/Ho_Chi_Minh]","datetime[μs, Asia/Ho_Chi_Minh]",duration[μs]
"""001cbea3-e2bb-427d-8da4-9fc622…","""Truong Quang Thai 20224139""","""Thai.TQ224139@sis.hust.edu.vn""",2025-03-07 09:54:49 +07,2025-03-07 10:49:31 +07,54m 42s
"""002a27b2-537c-4478-b676-e8e841…","""Phuong Xuan Ngoc Linh 20241372…","""Linh.PXN2413720@sis.hust.edu.v…",2025-03-07 09:08:20 +07,2025-03-07 09:18:22 +07,10m 2s
"""00dc363e-ba12-414a-ac50-fcabc9…","""Nguyen Hoang Tung 202414596""","""Tung.NH2414596@sis.hust.edu.vn""",2025-03-07 09:11:44 +07,2025-03-07 10:06:09 +07,54m 25s
"""013706ad-fb1d-41e8-980d-045be5…","""Nguyen Van The 202418988""","""The.NV2418988@sis.hust.edu.vn""",2025-03-07 10:15:53 +07,2025-03-07 10:51:38 +07,35m 45s
"""014037ea-c777-4228-9877-cca377…","""Vu Ngoc Gia Han 202415555""","""Han.VNG2415555@sis.hust.edu.vn""",2025-03-07 10:24:41 +07,2025-03-07 10:57:34 +07,32m 53s
…,…,…,…,…,…
"""feae5aa3-80a0-418a-a6fa-3b7a5f…","""Ha Phuong Thao 20233808""","""Thao.HP233808@sis.hust.edu.vn""",2025-03-07 09:39:02 +07,2025-03-07 09:53:00 +07,13m 58s
"""ff2cb8aa-9581-4d47-9d7d-44ab01…","""Nguyen Thao Linh 20223388""","""Linh.NT223388@sis.hust.edu.vn""",2025-03-07 10:04:07 +07,2025-03-07 10:12:48 +07,8m 41s
"""ff8e4f6a-c968-4d19-bc5f-51e8dd…","""Le Anh Duc 20233324""","""Duc.LA233324@sis.hust.edu.vn""",2025-03-07 09:25:06 +07,2025-03-07 10:51:54 +07,1h 26m 48s
"""ffb35f1b-5b6c-4154-9a2f-83cb82…","""Dinh Hoang Tung 202410613""","""Tung.DH2410613@sis.hust.edu.vn""",2025-03-07 09:13:39 +07,2025-03-07 09:52:47 +07,39m 8s


In [60]:
def merge_intervals(intervals):
    # Sort intervals by start time
    sorted_intervals = sorted(intervals, key=lambda x: x['join_time'])
    
    if not sorted_intervals:
        return timedelta(seconds=0)
    
    merged = [sorted_intervals[0]]
    for current in sorted_intervals[1:]:
        previous = merged[-1]
        if current['join_time'] <= previous['leave_time']:
            merged[-1] = {
                'join_time': previous['join_time'], 
                'leave_time': max(previous['leave_time'], current['leave_time'])
            }
        else:
            merged.append(current)
    
    # Calculate total duration using timedelta objects
    total_seconds = 0
    for interval in merged:
        start = interval['join_time']
        end = interval['leave_time']
        
        # If ending_time is specified and is before the end of this interval,
        # truncate the interval at ending_time
        if ending_time is not None and end > ending_time:
            end = ending_time
        
        # If ending_time is before the start of this interval, skip the whole calculation
        if ending_time is not None and start >= ending_time:
            continue
            
        total_seconds += (end - start).total_seconds()
        
    return timedelta(seconds=total_seconds)

In [61]:
result = (
    # Group by email to process each person
    sessions
    .group_by('email')
    .agg(
        pl.col('name').first().alias('name'),
        
        # Output all intervals as struct array
        pl.struct(['join_time', 'leave_time']).alias('intervals'),
        pl.concat_str([
            pl.col('join_time').dt.time().cast(pl.Utf8),
            pl.lit(' - '),
            pl.col('leave_time').dt.time().cast(pl.Utf8)            
        ]).alias('formatted_intervals')
    )
    .with_columns(
        pl.col('intervals').map_elements(merge_intervals, return_dtype=pl.Duration).alias('total_non_overlapping_duration'),
        pl.col('formatted_intervals')
            .list.eval(pl.format('<{}>', pl.element()))
            .list.join('')
    )
    .rename({'formatted_intervals':'formatted_intervals (UTC+7)'})
    .sort('total_non_overlapping_duration')
)

In [62]:
result_formatted = (
    result
    .with_columns(
        (pl.col("total_non_overlapping_duration") / 1_000_000).cast(pl.Int64).alias("duration_in_seconds")
    )
    .drop(['intervals', 'total_non_overlapping_duration'])
)

result_formatted

email,name,formatted_intervals (UTC+7),duration_in_seconds
str,str,str,i64
"""Anh.NN235013@sis.hust.edu.vn""","""Nguyen Ngoc Anh 20235013""","""<09:32:24 - 09:32:55>""",31
"""Thanh.HX236321@sis.hust.edu.vn""","""Ha Xuan Thanh 20236321""","""<10:36:01 - 10:36:39>""",38
"""Anh.HHT2417284@sis.hust.edu.vn""","""Ha Huy Tuan Anh 202417284""","""<09:19:48 - 09:20:27>""",39
"""Son.BT2421574@sis.hust.edu.vn""","""Bui Thanh Son 202421574""","""<09:37:49 - 09:38:30>""",41
"""Dung.TA2420073@sis.hust.edu.vn""","""Tran Anh Dung 202420073""","""<08:53:58 - 08:54:40>""",42
…,…,…,…
"""Ngan.LTH234509@sis.hust.edu.vn""","""Le Thi Hong Ngan 20234509""","""<09:37:15 - 11:00:36><08:54:24…",7034
"""Hung.VS2417128@sis.hust.edu.vn""","""Vu Sinh Hung 202417128""","""<08:57:32 - 10:52:19><08:54:16…",7060
"""Duy.NQ235065@sis.hust.edu.vn""","""Nguyen Quy Duy 20235065""","""<08:52:58 - 10:54:04>""",7142
"""Huyen.NT223148@sis.hust.edu.vn""","""Nguyen Thanh Huyen 20223148""","""<08:51:35 - 10:58:08>""",7225


In [64]:
result_formatted.write_csv(f"Duration_Output_{filename.replace('.csv', '')}.csv")