In [None]:
# @title Upload file 'AttendeeReport.csv'
from google.colab import files
uploaded = files.upload()

Saving AttendeeReport.csv to AttendeeReport (2).csv


In [None]:
# @title Nhập thời gian bắt đầu, kết thúc
ngay = "2025-03-07" # @param {"type":"date"}
bat_dau = '9:00:00' # @param {"type":"string","placeholder":"9:00:00"}
ket_thuc = '10:52:00' # @param {"type":"string","placeholder":"10:00:00"}


In [None]:
print(f"Bắt đầu: {ngay} - {bat_dau}")
print(f"Kết thúc: {ngay} - {ket_thuc}")

Bắt đầu: 2025-03-07 - 9:00:00
Kết thúc: 2025-03-07 - 10:52:00


### Code running (calculating duration)

In [None]:
import polars as pl
from datetime import datetime, timedelta, timezone

In [None]:
filename = 'AttendeeReport.csv'

In [None]:
# Parse time & set timezone UTC+7
ending_datetime = datetime.combine(
    datetime.strptime(ngay, "%Y-%m-%d").date(),
    datetime.strptime(ket_thuc, "%H:%M:%S").time()
).replace(tzinfo=timezone(timedelta(hours=7)))

In [None]:
df = pl.read_csv(filename).rename({
    'Session Id': 'id',
    'Participant Id': 'email',
    'Full Name': 'name',
    'UserAgent': 'user_agent',
    'UTC Event Timestamp': 'timestamp',
    'Action': 'action',
    'Role': 'role'
}).with_columns(
    pl.col('timestamp')
        .str.strptime(pl.Datetime, format='%m/%d/%Y %I:%M:%S %p')
        .dt.convert_time_zone('Asia/Ho_Chi_Minh')
).sort(
    by=['email','id', 'timestamp']
)

df

id,email,name,user_agent,timestamp,action,role
str,str,str,str,"datetime[μs, Asia/Ho_Chi_Minh]",str,str
"""be6789ac-cff9-4391-9b20-f17238…","""An.DTH2410000@sis.hust.edu.vn""","""Dao Thi Ha An 202410000""","""Mozilla/5.0 (iPhone; CPU iPhon…",2025-03-07 10:45:52 +07,"""Joined""","""Attendee"""
"""be6789ac-cff9-4391-9b20-f17238…","""An.DTH2410000@sis.hust.edu.vn""","""Dao Thi Ha An 202410000""","""Mozilla/5.0 (iPhone; CPU iPhon…",2025-03-07 11:12:32 +07,"""Left""","""Attendee"""
"""fb225999-3098-485b-91f4-fc9e51…","""An.DTH2410000@sis.hust.edu.vn""","""Dao Thi Ha An 202410000""","""Mozilla/5.0""",2025-03-07 09:18:53 +07,"""Joined""","""Attendee"""
"""fb225999-3098-485b-91f4-fc9e51…","""An.DTH2410000@sis.hust.edu.vn""","""Dao Thi Ha An 202410000""","""Mozilla/5.0 (iPhone; CPU iPhon…",2025-03-07 10:37:16 +07,"""Left""","""Attendee"""
"""17e7bc42-ee19-468b-a1b4-ccde44…","""An.LT2412859@sis.hust.edu.vn""","""Le Thai An 202412859""","""Mozilla/5.0 (iPhone; CPU iPhon…",2025-03-07 09:10:21 +07,"""Joined""","""Attendee"""
…,…,…,…,…,…,…
"""555a4332-f0b9-4466-8525-23d2aa…","""Yen.NH2419910@sis.hust.edu.vn""","""Nguyen Hai Yen 202419910""","""Mozilla/5.0""",2025-03-07 09:25:16 +07,"""Joined""","""Attendee"""
"""9380b31a-9c9a-4905-871e-e1e2bb…","""Yen.NH2419910@sis.hust.edu.vn""","""Nguyen Hai Yen 202419910""","""Mozilla/5.0 (iPhone; CPU iPhon…",2025-03-07 09:30:19 +07,"""Joined""","""Attendee"""
"""9380b31a-9c9a-4905-871e-e1e2bb…","""Yen.NH2419910@sis.hust.edu.vn""","""Nguyen Hai Yen 202419910""","""Mozilla/5.0 (iPhone; CPU iPhon…",2025-03-07 09:31:51 +07,"""Left""","""Attendee"""
"""9b5e7f54-9580-4ea0-84fd-a6d0ed…","""Yen.NTH2420365@sis.hust.edu.vn""","""Ngo Thi Hai Yen 202420365""","""Mozilla/5.0""",2025-03-07 09:24:56 +07,"""Joined""","""Attendee"""


In [None]:
error_entry_sessions = df.group_by('id').agg(pl.len().alias('count')).filter(pl.col('count') != 2)
df_filtered = df.join(error_entry_sessions, on='id', how='anti')

error_entry_sessions

id,count
str,u32
"""9bfe0244-9b03-46cc-920c-fc2ab9…",1
"""33e9125b-37de-44ce-a212-1ea6a5…",1
"""a8367a8d-b5b8-4ce7-8a40-de6b35…",1
"""bce2f862-7312-42cc-b1bd-48e2e1…",1
"""aa2581d6-baf8-4b66-975f-20117a…",1
…,…
"""774d018f-d9d8-4995-911d-137127…",1
"""76215f0b-4737-4520-9977-09e7e5…",1
"""c0651cb0-beff-4498-b34c-fad7b8…",1
"""8c8d3b09-0a09-4914-b0e6-4c1b86…",1


In [None]:
sessions = (
    df_filtered.sort(['id', 'timestamp'])
    .group_by('id')
    .agg(
        pl.col('name').first().alias('name'),
        pl.col('email').first().alias('email'),
        pl.col('timestamp').filter(pl.col('action') == 'Joined').first().alias('join_time'),
        pl.col('timestamp').filter(pl.col('action') == 'Left').first().alias('leave_time')
    )
    .with_columns(
        (pl.col('leave_time') - pl.col('join_time')).alias('duration')
    )
)

sessions

id,name,email,join_time,leave_time,duration
str,str,str,"datetime[μs, Asia/Ho_Chi_Minh]","datetime[μs, Asia/Ho_Chi_Minh]",duration[μs]
"""1e2baf55-a68f-4706-aa1d-a0f0d6…","""Vu Duc Tam 20236314""","""Tam.VD236314@sis.hust.edu.vn""",2025-03-07 10:17:03 +07,2025-03-07 10:22:11 +07,5m 8s
"""eaf3f8cc-6138-4c94-8b53-e75973…","""Le Dinh Long 20236222""","""Long.LD236222@sis.hust.edu.vn""",2025-03-07 10:22:43 +07,2025-03-07 10:53:54 +07,31m 11s
"""cb21c683-c80b-4330-93d8-2f6ca5…","""Trieu Huyen Trang 20232333""","""Trang.TH232333@sis.hust.edu.vn""",2025-03-07 10:06:31 +07,2025-03-07 11:11:14 +07,1h 4m 43s
"""4bdddd6f-ea64-40a5-b1dd-a72282…","""Tran Van Duy 20222256""","""Duy.TV222256@sis.hust.edu.vn""",2025-03-07 09:19:00 +07,2025-03-07 10:15:05 +07,56m 5s
"""b09c2350-4ef5-4d51-a2c4-765c91…","""Nguyen Hong Tan 20228142""","""Tan.NH228142@sis.hust.edu.vn""",2025-03-07 09:24:16 +07,2025-03-07 09:25:49 +07,1m 33s
…,…,…,…,…,…
"""9341c417-e0ad-407b-93f4-1f4ac9…","""Nguyen Ngoc Minh Tue 20221821""","""Tue.NNM221821@sis.hust.edu.vn""",2025-03-07 09:21:29 +07,2025-03-07 09:27:12 +07,5m 43s
"""a68eb6f1-562d-4ba5-9ce3-657aec…","""Do Dinh Sinh 202418981""","""Sinh.DD2418981@sis.hust.edu.vn""",2025-03-07 09:34:15 +07,2025-03-07 10:53:28 +07,1h 19m 13s
"""b95bbabc-0a5e-4248-bd81-9ed8d0…","""Nguyen Le Tu 202420325""","""Tu.NL2420325@sis.hust.edu.vn""",2025-03-07 09:35:12 +07,2025-03-07 09:47:26 +07,12m 14s
"""dd981ede-ce56-4d5d-9088-344798…","""Cao Thanh Hung 202416502""","""Hung.CT2416502@sis.hust.edu.vn""",2025-03-07 09:12:49 +07,2025-03-07 09:20:42 +07,7m 53s


In [None]:
def merge_intervals(intervals):
    # Sort intervals by start time
    sorted_intervals = sorted(intervals, key=lambda x: x['join_time'])

    if not sorted_intervals:
        return timedelta(seconds=0)

    merged = [sorted_intervals[0]]
    for current in sorted_intervals[1:]:
        previous = merged[-1]
        if current['join_time'] <= previous['leave_time']:
            merged[-1] = {
                'join_time': previous['join_time'],
                'leave_time': max(previous['leave_time'], current['leave_time'])
            }
        else:
            merged.append(current)

    # Calculate total duration using timedelta objects
    total_seconds = 0
    for interval in merged:
        start = interval['join_time']
        end = interval['leave_time']

        # If ending_datetime is specified and is before the end of this interval,
        # truncate the interval at ending_datetime
        if ending_datetime is not None and end > ending_datetime:
            end = ending_datetime

        # If ending_datetime is before the start of this interval, skip the whole calculation
        if ending_datetime is not None and start >= ending_datetime:
            continue

        total_seconds += (end - start).total_seconds()

    return timedelta(seconds=total_seconds)

In [None]:
result = (
    # Group by email to process each person
    sessions
    .group_by('email')
    .agg(
        pl.col('name').first().alias('name'),

        # Output all intervals as struct array
        pl.struct(['join_time', 'leave_time']).alias('intervals'),
        pl.concat_str([
            pl.col('join_time').dt.time().cast(pl.Utf8),
            pl.lit(' - '),
            pl.col('leave_time').dt.time().cast(pl.Utf8)
        ]).alias('formatted_intervals')
    )
    .with_columns(
        pl.col('intervals').map_elements(merge_intervals, return_dtype=pl.Duration).alias('total_non_overlapping_duration'),
        pl.col('formatted_intervals')
            .list.eval(pl.format('<{}>', pl.element()))
            .list.join('')
    )
    .rename({'formatted_intervals':'formatted_intervals (UTC+7)'})
    .sort('total_non_overlapping_duration')
)

In [83]:
result_formatted = (
    result
    .with_columns(
        (pl.col("total_non_overlapping_duration") / 1_000_000).cast(pl.Int64).alias("duration_seconds"),
    )
    .with_columns(
        (
          (pl.col("duration_seconds") // 3600).cast(pl.Utf8).str.zfill(2) + ":" +
          ((pl.col("duration_seconds") % 3600) // 60).cast(pl.Utf8).str.zfill(2) + ":" +
          (pl.col("duration_seconds") % 60).cast(pl.Utf8).str.zfill(2)
        ).alias("formatted_time")
    )
    .drop(['intervals', 'total_non_overlapping_duration'])
    .rename({
        'email': 'Email',
        'name': 'Name',
        'formatted_intervals (UTC+7)': 'Sessions (UTC+7)',
        'duration_seconds': 'Total time (seconds)',
        'formatted_time': 'Total time',
    })
)

result_formatted

Email,Name,Sessions (UTC+7),Total time (seconds),Total time
str,str,str,i64,str
"""Anh.NN235013@sis.hust.edu.vn""","""Nguyen Ngoc Anh 20235013""","""<09:32:24 - 09:32:55>""",31,"""00:00:31"""
"""Thanh.HX236321@sis.hust.edu.vn""","""Ha Xuan Thanh 20236321""","""<10:36:01 - 10:36:39>""",38,"""00:00:38"""
"""Anh.HHT2417284@sis.hust.edu.vn""","""Ha Huy Tuan Anh 202417284""","""<09:19:48 - 09:20:27>""",39,"""00:00:39"""
"""Son.BT2421574@sis.hust.edu.vn""","""Bui Thanh Son 202421574""","""<09:37:49 - 09:38:30>""",41,"""00:00:41"""
"""Ky.TM222574@sis.hust.edu.vn""","""Tran Minh Ky 20222574""","""<09:01:51 - 09:02:33>""",42,"""00:00:42"""
…,…,…,…,…
"""Ngan.LTH234509@sis.hust.edu.vn""","""Le Thi Hong Ngan 20234509""","""<08:54:24 - 09:36:53><09:37:15…",7034,"""01:57:14"""
"""Hung.VS2417128@sis.hust.edu.vn""","""Vu Sinh Hung 202417128""","""<08:53:24 - 08:54:05><08:54:16…",7060,"""01:57:40"""
"""Duy.NQ235065@sis.hust.edu.vn""","""Nguyen Quy Duy 20235065""","""<08:52:58 - 10:54:04>""",7142,"""01:59:02"""
"""Huyen.NT223148@sis.hust.edu.vn""","""Nguyen Thanh Huyen 20223148""","""<08:51:35 - 10:58:08>""",7225,"""02:00:25"""


In [84]:
result_formatted.write_csv(f"Tính_TG_{ngay}.csv")