In [1]:
import json
from pathlib import Path

PATH_TO_DATASET = '../dataset/'

In [2]:
raw_dataset = []

for path in Path(PATH_TO_DATASET).rglob('*.json'):
    print(f'Processing {path}')
    with open(path, 'r', encoding='utf-8') as f:
        data = json.load(f)
        raw_dataset.append((path.stem, data))

Processing ../dataset/0001.json


In [3]:
from typing import Dict, List, Union, Literal
from dataclasses import dataclass
import pandas as pd
@dataclass
class MessageFromAgent:
    sender_id: int
    message: str
    type: Literal['info', 'order']
    
MessagesFromAgentsType = Dict[str, List[MessageFromAgent]]


messages_from_agents_df = pd.DataFrame(columns=["id", "timestamp", "aid", "sender_id", "message", "type"])

"""
Messages from agents DataFrame:
    - id: sample id, e.g. 0001, 0002, etc. (comes from 0001.json)
    - timestamp: timestamp of the message
    - aid: agent id
    - sender_id: id of the sender
    - message: content of the message
    - type: type of the message (info or order)
"""

for sample_id, data in raw_dataset:
    for aid, agent_data in data['agents'].items():
        # take all messages
        all_messages_from_agents: MessagesFromAgentsType = agent_data['messages_from_agents']

        # iterate over all timestamps
        for timestamp, message_data in all_messages_from_agents.items():
            # iterate over all messages from different agents
            for message in message_data:
                new_row = pd.DataFrame([{
                    "id": sample_id,
                    "timestamp": timestamp,
                    "aid": aid,
                    "sender_id": message['sender_id'],
                    "message": message['message'],
                    "type": message['type']
                }])
                messages_from_agents_df = pd.concat([messages_from_agents_df, new_row], ignore_index=True)

messages_from_agents_df.sample(10)

Unnamed: 0,id,timestamp,aid,sender_id,message,type
39,1,115,3,1,Reached western quadrant. No contacts in curre...,info
25,1,135,2,3,Scout 1 — redirect course to eastern sector. I...,order
29,1,262,2,1,Reached base. No incidents. Systems nominal.,info
10,1,185,1,2,Standing by at last extraction point.,info
36,1,70,3,1,No targets detected within current visual range.,info
35,1,40,3,2,No targets within current visual range.,info
40,1,115,3,2,"Arrived at target location (63, 44).",info
37,1,70,3,2,"Reached confirmed target at coordinates (72, 5...",info
6,1,90,1,3,Scout 1 — redeploy westward. Sweep western and...,order
42,1,185,3,1,"Target visually confirmed at (81, 15).",info


In [4]:
from typing import Dict, List, Union, Literal
from dataclasses import dataclass
import pandas as pd

@dataclass
class SendedMessages:
    receiver: int
    message: str
    type: Literal['info', 'order']
    
SendedMessagesT = Dict[str, List[SendedMessages]]

sended_messages_df = pd.DataFrame(columns=["id", "timestamp", "aid", "receiver_id", "message", "type"])

"""
Sendede Messages DataFrame:
    - id: sample id, e.g. 0001, 0002, etc. (comes from 0001.json)
    - timestamp: timestamp of the message
    - aid: agent id
    - receiver_id: id of the receiver
    - message: content of the message
    - type: type of the message (info or order)
"""

for sample_id, data in raw_dataset:
    for aid, agent_data in data['agents'].items():
        # take all messages
        all_sended_messages: SendedMessagesT = agent_data['sended_messages']

        # iterate over all timestamps
        for timestamp, message_data in all_sended_messages.items():
            # iterate over all messages from different agents
            for message in message_data:
                new_row = pd.DataFrame([{
                    "id": sample_id,
                    "timestamp": timestamp,
                    "aid": aid,
                    "receiver_id": message['receiver'],
                    "message": message['message'],
                    "type": message['type']
                }])
                sended_messages_df = pd.concat([sended_messages_df, new_row], ignore_index=True)

sended_messages_df.head()

Unnamed: 0,id,timestamp,aid,receiver_id,message,type
0,1,10,1,2,No targets detected within current visual range.,info
1,1,10,1,3,No targets detected within current visual range.,info
2,1,40,1,2,"Two targets visually confirmed at (63, 44) and...",info
3,1,40,1,3,"Two targets visually confirmed at (63, 44) and...",info
4,1,70,1,2,No targets detected within current visual range.,info


In [9]:
positions_df = pd.DataFrame(columns=["id", "timestamp", "aid", "x", "y"])
for sample_id, data in raw_dataset:
    for aid, agent_data in data['agents'].items():
        # take all messages
        positions = agent_data['positions']

        # iterate over all timestamps
        for timestamp, position in positions.items():
            new_row = pd.DataFrame([{
                "id": sample_id,
                "timestamp": timestamp,
                "aid": aid,
                "x": position[0],
                "y": position[1],
            }])
            positions_df = pd.concat([positions_df, new_row], ignore_index=True)

positions_df.head()

Unnamed: 0,id,timestamp,aid,x,y
0,1,0,1,83,64
1,1,5,1,83,69
2,1,10,1,58,84
3,1,15,1,58,79
4,1,20,1,58,74


In [11]:
mission_progress_df = pd.DataFrame(columns=["id", "timestamp", "aid", "mission_progress"])
for sample_id, data in raw_dataset:
    for aid, agent_data in data['agents'].items():
        # take all messages
        mission_progresses = agent_data['mission_progress']

        # iterate over all timestamps
        for timestamp, progress in mission_progresses.items():
            new_row = pd.DataFrame([{
                "id": sample_id,
                "timestamp": timestamp,
                "aid": aid,
                "mission_progress": progress
            }])
            mission_progress_df = pd.concat([mission_progress_df, new_row], ignore_index=True)

mission_progress_df.head()

Unnamed: 0,id,timestamp,aid,mission_progress
0,1,10,1,Recon unit operational. Awaiting directive for...
1,1,40,1,Target zone active. Southern sector sweep succ...
2,1,70,1,Northern sweep underway. Awaiting new contact.
3,1,85,1,Awaiting redirection.
4,1,115,1,Western sector partially scanned. No targets f...


In [13]:
target_in_fov_df = pd.DataFrame(columns=["id", "timestamp", "aid", "target_x", "target_y"])
for sample_id, data in raw_dataset:
    for aid, agent_data in data['agents'].items():
        # take all messages
        all_targets_in_fov = agent_data['target_in_fov']

        # iterate over all timestamps
        for timestamp, targets_in_fov in all_targets_in_fov.items():
            # since agent can see multiple targets, we need to iterate over all targets
            for target_pos in targets_in_fov:
                target_x, target_y = target_pos
                new_row = pd.DataFrame([{
                    "id": sample_id,
                    "timestamp": timestamp,
                    "aid": aid,
                    "target_x": target_x,
                    "target_y": target_y
                }])
                target_in_fov_df = pd.concat([target_in_fov_df, new_row], ignore_index=True)

target_in_fov_df.head()

Unnamed: 0,id,timestamp,aid,target_x,target_y
0,1,5,1,72,56
1,1,10,1,72,56
2,1,30,1,72,56
3,1,35,1,72,56
4,1,35,1,63,44


In [18]:
from typing import Tuple

@dataclass
class AgentInformation:
    postion: Tuple[int, int]
    timestamp: int
    
LatestAgentInformationType = Dict[str, Dict[str, AgentInformation]]

latest_agents_information = pd.DataFrame(columns=["id", "timestamp", "aid", "agent_id", "agent_timestamp", "agent_position_x", "agent_position_y"])
for sample_id, data in raw_dataset:
    for aid, agent_data in data['agents'].items():
        # take all messages
        all_agents_information: LatestAgentInformationType = agent_data['latest_agents_information']

        # iterate over all timestamps
        for timestamp, agents_information in all_agents_information.items():
            for agent_id, agent_info in agents_information.items():
                agent_timestamp = agent_info['timestamp']
                agent_position = agent_info['position']
                new_row = pd.DataFrame([{
                    "id": sample_id,
                    "timestamp": timestamp,
                    "aid": aid,
                    "agent_id": agent_id,
                    "agent_timestamp": agent_timestamp,
                    "agent_position_x": agent_position[0],
                    "agent_position_y": agent_position[1]
                }])
                latest_agents_information = pd.concat([latest_agents_information, new_row], ignore_index=True)

latest_agents_information.head()

Unnamed: 0,id,timestamp,aid,agent_id,agent_timestamp,agent_position_x,agent_position_y
0,1,10,1,2,10,62,78
1,1,10,1,3,10,11,77
2,1,40,1,2,40,62,78
3,1,40,1,3,40,11,77
4,1,70,1,2,70,67,62


In [22]:
global_strategy_df = pd.DataFrame(columns=["id", "timestamp", "aid", "global_strategy"])
for sample_id, data in raw_dataset:
    for aid, agent_data in data['agents'].items():
        # take all messages
        global_strategies = agent_data['global_strategy']

        # iterate over all timestamps
        for timestamp, strategy in global_strategies.items():
            new_row = pd.DataFrame([{
                "id": sample_id,
                "timestamp": timestamp,
                "aid": aid,
                "global_strategy": strategy
            }])
            global_strategy_df = pd.concat([global_strategy_df, new_row], ignore_index=True)

global_strategy_df.head()

Unnamed: 0,id,timestamp,aid,global_strategy
0,1,0,1,scout targets
1,1,0,2,Scan all targets
2,1,0,3,Scan all targets


In [24]:
local_strategy_df = pd.DataFrame(columns=["id", "timestamp", "aid", "local_strategy"])
for sample_id, data in raw_dataset:
    for aid, agent_data in data['agents'].items():
        # take all messages
        local_strategies = agent_data['local_strategy']

        # iterate over all timestamps
        for timestamp, strategy in local_strategies.items():
            new_row = pd.DataFrame([{
                "id": sample_id,
                "timestamp": timestamp,
                "aid": aid,
                "local_strategy": strategy
            }])
            local_strategy_df = pd.concat([local_strategy_df, new_row], ignore_index=True)

local_strategy_df.head()

Unnamed: 0,id,timestamp,aid,local_strategy
0,1,10,1,Standing by for movement order; sensors active.
1,1,40,1,Repositioning northward to continue search for...
2,1,70,1,Maintaining northern vector; continuing broad ...
3,1,85,1,Holding position. Requesting new search vector.
4,1,115,1,Maintaining lateral sweep pattern across weste...


In [25]:
meta_info_df = pd.DataFrame(columns=["id", "aid", "role", "mission"])
for sample_id, data in raw_dataset:
    for aid, agent_data in data['agents'].items():
        # take all messages
        role = agent_data['role']
        mission = agent_data['mission']

        new_row = pd.DataFrame([{
            "id": sample_id,
            "aid": aid,
            "role": role,
            "mission": mission
        }])
        meta_info_df = pd.concat([meta_info_df, new_row], ignore_index=True)
meta_info_df.head()

Unnamed: 0,id,aid,role,mission
0,1,1,scout,Find the position of targets and send coordina...
1,1,2,rescuer,"Based on the coordinates provided by scout, sc..."
2,1,3,scout_commander,Coordinate scouts and rescuers to find and sca...


In [5]:
agent_data_keys = data['agents']['1'].keys()

print(*agent_data_keys, sep='\n')

role
mission
messages_from_agents
sended_messages
positions
mission_progress
target_in_fov
latest_agents_information
actions
global_strategy
local_strategy
special_actions
