In [None]:
import re
import pandas as pd
from datetime import datetime
import subprocess
import time
import os
import glob
import ast
import json

class AggregateDataProcessor:
    def __init__(self, log_input_path, yolo_input_path, ocr_input_path, output_path, test_path):
        self.log_input_path = log_input_path
        self.yolo_input_path = yolo_input_path
        self.ocr_input_path = ocr_input_path
        self.output_path = output_path
        self.test_csv = pd.read_csv(test_path)
        
        self.log_csv = pd.read_csv(log_input_path)
        self.yolo_csv = pd.read_csv(yolo_input_path)
        self.ocr_csv = pd.read_csv(ocr_input_path)
        self.ocr_csv['event_details'] = self.ocr_csv['event_details'].apply(self.safe_literal_eval)
        self.yolo_csv['event_details'] = self.yolo_csv['event_details'].apply(self.safe_literal_eval)
        self.log_csv['event_details'] = self.log_csv['event_details'].apply(self.safe_literal_eval)
        self.output_csv = pd.DataFrame(columns=["timestamp","player_ip","map","game_round","event_type","event_details"])
        
        self.item_keys = ['latency', 'item_category', 'item', 'player_id', 'player_ip',
                          'weapon_inventory', 'weapon_num', 'total_armor_picked', 'armor_picked',
                          'total_health_picked', 'health_picked', 'latency_map', 'log_line']
        self.itemLost_keys = ['latency', 'event_type', 'item_category', 'item', 'player_id', 'player_ip', 'weapon_inventory', 'weapon_num',
                              'total_armor_picked', 'armor_picked', 'total_health_picked', 'health_picked', 'latency_map', 'log_line']
        
        self.score_keys = ['latency', 'player_id', 'player_ip', 'points', 'log_line']
        
        self.suicide_keys = ['killer_id', 'victim_id', 'weapon_id', 'killer_ip', 'victim_ip', 'weapon', 'kills', 'deaths', 'kill_dict', 'death_dict',
                             'log_line']
        self.kill_keys = ['latency', 'killer_id', 'victim_id', 'weapon_id', 'killer_ip', 'victim_ip', 'player_id', 'player_ip', 'weapon',
                          'kills', 'deaths', 'kill_dict', 'death_dict', 'log_line']
        self.killed_keys = ['latency', 'killer_id', 'victim_id', 'weapon_id', 'killer_ip', 'victim_ip', 'player_id', 'player_ip', 'weapon', 
                            'kills', 'deaths', 'kill_dict', 'death_dict', 'log_line']
        
        self.yolo_keys = ['player_ip', 'yolo_obj', 'confidence', 'x', 'y', 'width', 'height', 'box_area', 'center_x', 'center_y', 'diff_height', 
                          'position_of_object']
        
        self.ocr_keys = ['player_ip', 'pred_roi_ammo', 'conf_roi_ammo', 'pred_roi_health', 'conf_roi_health', 'pred_roi_armour', 'conf_roi_armour',
                         'file_name', 'roi_ammo_file_name', 'roi_health_file_name', 'roi_armour_file_name']
        
        
    def format_datetime(self, timestamp):
        try:
            ts = float(timestamp)
            if ts < 1e12:  # If it's in seconds, convert to milliseconds
                ts = ts * 1000
            datetime_obj = datetime.fromtimestamp(ts / 1000)  # divide by 1000 back for datetime
            #date_time_str = datetime_obj.strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]
            date_time_str = datetime_obj.strftime('%Y-%m-%d %H:%M:%S')
            return date_time_str
        except (ValueError, TypeError, OSError) as e:
            print(f"[Warning] Invalid timestamp '{timestamp}': {e}")
            return f'{None} {None}'
        
    def safe_literal_eval(self,val): # OCR data have issue processing, thats why need to convert first
        if isinstance(val, str):
            try:
                return ast.literal_eval(val)
            except (ValueError, SyntaxError):
                return None
        return val  # If it's already a dict or NaN
    
    def get_event_details(self, event, df):
        event_dict = {}
        if event == 'suicide': key_list = self.suicide_keys
        elif event == 'kill':  key_list = self.kill_keys
        elif event == 'score': key_list = self.score_keys
        elif event == 'killed': key_list = self.killed_keys
        elif event == 'item': key_list = self.item_keys
        elif event == 'itemLost': key_list = self.itemLost_keys
        elif event == 'yolo': key_list = self.yolo_keys
        elif event == 'ocr': key_list = self.ocr_keys
        else: 
            print('Invalid Event') 
            return None
        for key in key_list:
            event_dict[key] = df[df['event_type'] == event]['event_details'].apply(lambda d: d.get(key) if isinstance(d, dict) else None)
        return event_dict
    
    def simplify_Context_df(self, csv):
        grouped = csv.groupby('timestamp')
        data = []
        # Iterate over the grouped data
        for name, group in grouped:
            print(name)
            combined_context = { # each row represents the information in that second
                "timestamp": str(name),
                # OCR
                "player_ammo": 0,
                "player_health": 0,
                "player_armor": 0,
                # Yolo     
                "detected_entities":[],
                # Log
                "suicide_event": "",
                "kill_event": "",
                "score_event": "",
                "killed_event": "",
                "itemPicked_event": [],
                "itemLost_event": "",
            }
            for event_type in group['event_type'].unique():
                temp = group[group['event_type'] == event_type][['event_details']]
                #flattened_temp = pd.json_normalize(temp['event_details'])
                flattened_temp = temp['event_details'].apply(pd.Series)
                #------------ OCR --------------------------#
                if event_type == 'ocr':
                    if flattened_temp.isnull().all().all():
                        # All values are NaN, meaning OCR failed totally
                        combined_context["player_ammo"] = 0
                        combined_context["player_health"] = 0
                        combined_context["player_armor"] = 0
                    else: 
                        ammo_series = flattened_temp[flattened_temp["conf_roi_ammo"] > 0.6]["pred_roi_ammo"] if not flattened_temp.empty else 0
                        health_series = flattened_temp[flattened_temp["conf_roi_health"] > 0.6]["pred_roi_health"] if not flattened_temp.empty else 0
                        armor_series = flattened_temp[flattened_temp["conf_roi_armour"] > 0.6]["pred_roi_armour"] if not flattened_temp.empty else 0
                        
                        combined_context["player_ammo"] = ammo_series.value_counts().idxmax() if not ammo_series.empty else 0
                        combined_context["player_health"] = health_series.value_counts().idxmax() if not health_series.empty else 0
                        combined_context["player_armor"] = armor_series.value_counts().idxmax() if not armor_series.empty else 0
                #------------------------------------------#
                
                #------------ YOLO --------------------------#
                elif event_type == 'yolo':
                    combined_context["detected_entities"] = flattened_temp.loc[flattened_temp["confidence"] > 0.6, "yolo_obj"].unique().tolist()
                #-----------------------------------------#
                
                #------------ Suicide --------------------------#
                elif event_type == 'suicide':
                    combined_context["suicide_event"] = f'{flattened_temp["victim_ip"]} died by fall off from game map'
                #-------------------------------------------#
                
                #------------ Kill --------------------------#
                elif event_type == 'kill':
                    if len(flattened_temp) == 1:
                        combined_context["kill_event"] = f'{flattened_temp["killer_ip"]} killed enemy {flattened_temp["victim_ip"]} by {flattened_temp["weapon"]}, player currently has {flattened_temp["kills"]} and {flattened_temp["deaths"]}'
                    else:
                        combined_context["kill_event"] = []
                        for idx, row in flattened_temp.iterrows():
                            combined_context["kill_event"].append(f'{row["killer_ip"]} killed enemy {row["victim_ip"]} by {row["weapon"]}, player currently has {row["kills"]} and {row["deaths"]}')
                #-------------------------------------------#
                
                #------------ Score --------------------------#
                elif event_type == 'score':
                    if len(flattened_temp) == 1:   
                        combined_context["score_event"] = f'{flattened_temp["player_ip"]} score updated to {flattened_temp["points"]}'
                    else:
                        combined_context["score_event"] = []
                        for idx in range(len(flattened_temp)):
                            combined_context["score_event"].append(f'{flattened_temp["player_ip"][idx]} score updated to {flattened_temp["points"][idx]}')
                #---------------------------------------------#
                
                #------------ Killed --------------------------#            
                elif event_type == 'killed':
                    combined_context["killed_event"] = f'{flattened_temp["victim_ip"]} killed by enemy {flattened_temp["killer_ip"]} by {flattened_temp["weapon"]}, player currently has {flattened_temp["kills"]} and {flattened_temp["deaths"]}'
                #----------------------------------------------#   
                
                #------------ Item --------------------------#
                elif event_type == 'item':
                    combined_context['itemPicked_event'] = []

                    for _, row in flattened_temp.iterrows():
                        player_inventory = {
                            "weapon": row["weapon_inventory"],
                            "armor": row["armor_picked"],
                            "health": row["health_picked"]
                        }
                        event_str = (
                            f'{row["player_ip"]} picked up {row["item_category"]} type: '
                            f'{row["item"]}, player current inventory: {player_inventory}'
                        )
                        combined_context['itemPicked_event'].append(event_str)

                    # If there's only one event, flatten it into a string (instead of a list)
                    if len(combined_context['itemPicked_event']) == 1:
                        combined_context['itemPicked_event'] = combined_context['itemPicked_event'][0]
                #---------------------------------------------#
                
                #------------ Item Lost --------------------------#
                elif event_type == 'itemLost':
                    player_inventory = {
                        "weapon": {'weapon_machinegun': 1},
                        "armor": 0,
                        "health": 0
                    }
                    combined_context['itemLost_event'] = f'{flattened_temp["player_ip"]} lost all items due to killed or suicide event. Player current inventory: {player_inventory}'
                #-------------------------------------------------#
            data.append(combined_context)

        simp_df = pd.DataFrame(data)
        return simp_df
    
    def aggregate(self):
        df = pd.concat([self.log_csv, self.yolo_csv, self.ocr_csv], ignore_index = True) #self.ocr_csv
        df = df.sort_values(by='raw_timestamp') # Sorting by 'Age' in ascending order
        df['timestamp'] = df['raw_timestamp'].apply(lambda ts: pd.Series(self.format_datetime(ts)))
        # Convert the 'date' column to datetime
        df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')
        df = df.sort_values(by='timestamp')
        df['player_ip'] = df['event_details'].apply(lambda d: d.get('player_ip') if isinstance(d, dict) else None)
        df['player_ip'] = df['player_ip'].str.replace('Player_', '', regex=False)
        
        # Validate NewGameRound events
        if df.loc[df['event_type'] == 'NewGameRound', ['map', 'game_round']].isnull().any().any():
            print("Warning: Some NewGameRound events have missing map or game_round")
        # Forward fill
        df['map'] = df['map'].ffill()
        df['game_round'] = df['game_round'].ffill()
        # Handle rows before first NewGameRound (optional)
        df['map'] = df['map'].fillna('unknown')
        df['game_round'] = df['game_round'].fillna(0)
        
        out_df = pd.DataFrame()
        out_df['timestamp'] = df['timestamp']
        out_df['player_ip'] = df['player_ip']
        out_df['map'] = df['map']
        out_df['game_round'] = df['game_round']
        out_df['event_type'] = df['event_type']
        out_df['event_details'] = df['event_details']
        return out_df
        
        #out_df.to_csv(self.output_path, index=False)
        
    
        

        
        

import_dir = './data/processed'
output_dir = './data/processed/final'
latency = 0
input_log_path = f'{import_dir}/log/processedLog_{latency}ms.csv'
input_yolo_path = f'{import_dir}/yolo/processed_Yolo_{latency}ms.csv'
input_ocr_path = f'{import_dir}/ocr/processed_OCR_{latency}ms.csv'
    
output_path = f'{output_dir}/aggregated_contextMenu_{latency}ms.csv'
    
aggregator = AggregateDataProcessor(input_log_path, input_yolo_path, input_ocr_path, output_path, "./data/sample.csv")
        

In [25]:
csv = aggregator.test_csv

In [26]:
csv

Unnamed: 0,timestamp,player_ip,map,game_round,event_type,event_details,timestamp_ms
0,2025-03-16 17:14:18,,kaos2,2.0,NewGameRound,{'latency': 0},2025-03-16 17:14:18.628
1,2025-03-16 17:15:08,172.19.113.65,kaos2,2.0,item,"{'latency': 0, 'item_category': 'Ammo', 'item'...",2025-03-16 17:15:08.179
2,2025-03-16 17:15:08,,kaos2,3.0,NewGameRound,{'latency': 0},2025-03-16 17:15:08.339
3,2025-03-16 17:15:13,172.19.113.65,kaos2,3.0,ocr,"{'player_ip': '172.19.113.65', 'pred_roi_ammo'...",2025-03-16 17:15:13.504
4,2025-03-16 17:15:13,172.19.113.65,kaos2,3.0,ocr,"{'player_ip': '172.19.113.65', 'pred_roi_ammo'...",2025-03-16 17:15:13.521
...,...,...,...,...,...,...,...
24980,2025-03-16 17:18:55,172.19.113.65,wrackdm17,5.0,ocr,"{'player_ip': '172.19.113.65', 'pred_roi_ammo'...",2025-03-16 17:18:55.966
24981,2025-03-16 17:18:55,172.19.113.65,wrackdm17,5.0,ocr,"{'player_ip': '172.19.113.65', 'pred_roi_ammo'...",2025-03-16 17:18:55.983
24982,2025-03-16 17:18:55,172.19.113.65,wrackdm17,5.0,ocr,"{'player_ip': '172.19.113.65', 'pred_roi_ammo'...",2025-03-16 17:18:55.999
24983,2025-03-16 17:18:56,172.19.113.65,wrackdm17,5.0,ocr,"{'player_ip': '172.19.113.65', 'pred_roi_ammo'...",2025-03-16 17:18:56.017


In [27]:
csv['event_details'] = csv['event_details'].apply(aggregator.safe_literal_eval)

In [28]:
csv['event_details']

0                                           {'latency': 0}
1        {'latency': 0, 'item_category': 'Ammo', 'item'...
2                                           {'latency': 0}
3        {'player_ip': '172.19.113.65', 'pred_roi_ammo'...
4        {'player_ip': '172.19.113.65', 'pred_roi_ammo'...
                               ...                        
24980    {'player_ip': '172.19.113.65', 'pred_roi_ammo'...
24981    {'player_ip': '172.19.113.65', 'pred_roi_ammo'...
24982    {'player_ip': '172.19.113.65', 'pred_roi_ammo'...
24983    {'player_ip': '172.19.113.65', 'pred_roi_ammo'...
24984    {'player_ip': '172.19.113.65', 'pred_roi_ammo'...
Name: event_details, Length: 24985, dtype: object

In [63]:
grouped = csv.groupby('timestamp')
data = []
# Iterate over the grouped data
for name, group in grouped:
    print(name)
    combined_context = { # each row represents the information in that second
        "timestamp": str(name),
        # OCR
        "player_ammo": 0,
        "player_health": 0,
        "player_armor": 0,
        # Yolo     
        "detected_entities":[],
        # Log
        "suicide_event": "",
        "kill_event": "",
        "score_event": "",
        "killed_event": "",
        "itemPicked_event": [],
        "itemLost_event": "",
    }
    for event_type in group['event_type'].unique():
        temp = group[group['event_type'] == event_type][['event_details']]
        #flattened_temp = pd.json_normalize(temp['event_details'])
        flattened_temp = temp['event_details'].apply(pd.Series)
        #------------ OCR --------------------------#
        if event_type == 'ocr':
            if flattened_temp.isnull().all().all():
                # All values are NaN, meaning OCR failed totally
                combined_context["player_ammo"] = 0
                combined_context["player_health"] = 0
                combined_context["player_armor"] = 0
            else: 
                ammo_series = flattened_temp[flattened_temp["conf_roi_ammo"] > 0.6]["pred_roi_ammo"] if not flattened_temp.empty else 0
                health_series = flattened_temp[flattened_temp["conf_roi_health"] > 0.6]["pred_roi_health"] if not flattened_temp.empty else 0
                armor_series = flattened_temp[flattened_temp["conf_roi_armour"] > 0.6]["pred_roi_armour"] if not flattened_temp.empty else 0
                
                combined_context["player_ammo"] = ammo_series.value_counts().idxmax() if not ammo_series.empty else 0
                combined_context["player_health"] = health_series.value_counts().idxmax() if not health_series.empty else 0
                combined_context["player_armor"] = armor_series.value_counts().idxmax() if not armor_series.empty else 0
        #------------------------------------------#
        
        #------------ YOLO --------------------------#
        elif event_type == 'yolo':
            combined_context["detected_entities"] = flattened_temp.loc[flattened_temp["confidence"] > 0.6, "yolo_obj"].unique().tolist()
        #-----------------------------------------#
        
        #------------ Suicide --------------------------#
        elif event_type == 'suicide':
            combined_context["suicide_event"] = f'{flattened_temp["victim_ip"]} died by fall off from game map'
        #-------------------------------------------#
        
        #------------ Kill --------------------------#
        elif event_type == 'kill':
            if len(flattened_temp) == 1:
                combined_context["kill_event"] = f'{flattened_temp["killer_ip"]} killed enemy {flattened_temp["victim_ip"]} by {flattened_temp["weapon"]}, player currently has {flattened_temp["kills"]} and {flattened_temp["deaths"]}'
            else:
                combined_context["kill_event"] = []
                for idx, row in flattened_temp.iterrows():
                    combined_context["kill_event"].append(f'{row["killer_ip"]} killed enemy {row["victim_ip"]} by {row["weapon"]}, player currently has {row["kills"]} and {row["deaths"]}')
        #-------------------------------------------#
        
        #------------ Score --------------------------#
        elif event_type == 'score':
            if len(flattened_temp) == 1:   
                combined_context["score_event"] = f'{flattened_temp["player_ip"]} score updated to {flattened_temp["points"]}'
            else:
                combined_context["score_event"] = []
                for idx in range(len(flattened_temp)):
                    combined_context["score_event"].append(f'{flattened_temp["player_ip"][idx]} score updated to {flattened_temp["points"][idx]}')
        #---------------------------------------------#
        
        #------------ Killed --------------------------#            
        elif event_type == 'killed':
            combined_context["killed_event"] = f'{flattened_temp["victim_ip"]} killed by enemy {flattened_temp["killer_ip"]} by {flattened_temp["weapon"]}, player currently has {flattened_temp["kills"]} and {flattened_temp["deaths"]}'
        #----------------------------------------------#   
        
        #------------ Item --------------------------#
        elif event_type == 'item':
            if len(flattened_temp) == 1:
                player_inventory = {
                    "weapon": flattened_temp["weapon_inventory"],
                    "armor": flattened_temp["armor_picked"],
                    "health": flattened_temp["health_picked"]
                }
                combined_context['itemPicked_event'] = f'{flattened_temp["player_ip"]} picked up {flattened_temp["item_category"]} type: {flattened_temp["item"]}, player current inventory: {player_inventory}'
            else:
                combined_context['itemPicked_event'] = []

                for idx, row in flattened_temp.iterrows():
                    player_inventory = {
                        "weapon": row["weapon_inventory"],
                        "armor": row["armor_picked"],
                        "health": row["health_picked"]
                    }
                    combined_context['itemPicked_event'].append(
                        f'{row["player_ip"]} picked up {row["item_category"]} type: {row["item"]}, player current inventory: {player_inventory}'
                    )
        #---------------------------------------------#
        
        #------------ Item Lost --------------------------#
        elif event_type == 'itemLost':
            player_inventory = {
                "weapon": {'weapon_machinegun': 1},
                "armor": 0,
                "health": 0
            }
            combined_context['itemLost_event'] = f'{flattened_temp["player_ip"]} lost all items due to killed or suicide event. Player current inventory: {player_inventory}'
        #-------------------------------------------------#
    data.append(combined_context)

df = pd.DataFrame(data)
    

2025-03-16 17:14:18
2025-03-16 17:15:08
2025-03-16 17:15:13
2025-03-16 17:15:14
2025-03-16 17:15:15
2025-03-16 17:15:16
2025-03-16 17:15:17
2025-03-16 17:15:18
2025-03-16 17:15:19
2025-03-16 17:15:20
2025-03-16 17:15:21
2025-03-16 17:15:22
2025-03-16 17:15:23
2025-03-16 17:15:24
2025-03-16 17:15:25
2025-03-16 17:15:26
2025-03-16 17:15:27
2025-03-16 17:15:28
2025-03-16 17:15:29
2025-03-16 17:15:30
2025-03-16 17:15:31
2025-03-16 17:15:32
2025-03-16 17:15:33
2025-03-16 17:15:34
2025-03-16 17:15:35
2025-03-16 17:15:36
2025-03-16 17:15:37
2025-03-16 17:15:38
2025-03-16 17:15:39
2025-03-16 17:15:40
2025-03-16 17:15:41
2025-03-16 17:15:42
2025-03-16 17:15:43
2025-03-16 17:15:44
2025-03-16 17:15:45
2025-03-16 17:15:46
2025-03-16 17:15:47
2025-03-16 17:15:48
2025-03-16 17:15:49
2025-03-16 17:15:50
2025-03-16 17:15:51
2025-03-16 17:15:52
2025-03-16 17:15:53
2025-03-16 17:15:54
2025-03-16 17:15:55
2025-03-16 17:15:56
2025-03-16 17:15:57
2025-03-16 17:15:58
2025-03-16 17:15:59
2025-03-16 17:16:00


In [64]:
df

Unnamed: 0,timestamp,player_ammo,player_health,player_armor,detected_entities,suicide_event,kill_event,score_event,killed_event,itemPicked_event,itemLost_event
0,2025-03-16 17:14:18,0,0,0,[],,,,,[],
1,2025-03-16 17:15:08,0,0,0,[],,,,,"1 Player_172.19.113.65\nName: player_ip, dt...",
2,2025-03-16 17:15:13,99,125,0,[],,,,,[],
3,2025-03-16 17:15:14,99,124,0,[],,,,,[],
4,2025-03-16 17:15:15,99,123,0,[],,,,,[],
...,...,...,...,...,...,...,...,...,...,...,...
221,2025-03-16 17:18:52,0,0,0,[],,,,,[],
222,2025-03-16 17:18:53,0,0,0,[],,,,,[],
223,2025-03-16 17:18:54,0,0,0,[],,,,,[],
224,2025-03-16 17:18:55,0,0,0,[],,,,,[],
