In [62]:
import json
from pathlib import Path
import pandas as pd

PATH_TO_DATASAMPLES = '../../datasamples/'
PATH_TO_DATASET = Path('../../dataset/')

In [63]:
raw_dataset = []

for path in Path(PATH_TO_DATASAMPLES).rglob('*.json'):
    print(f'Processing {path}')
    with open(path, 'r', encoding='utf-8') as f:
        data = json.load(f)
        raw_dataset.append((path.stem, data))

Processing ../../datasamples/0004.json
Processing ../../datasamples/0003.json
Processing ../../datasamples/0001.json
Processing ../../datasamples/0002.json


In [64]:
from typing import Dict, List, Union, Literal
from dataclasses import dataclass
import pandas as pd
@dataclass
class MessageFromAgent:
    sender_id: int
    message: str
    type: Literal['info', 'order']
    
MessagesFromAgentsType = Dict[str, List[MessageFromAgent]]


messages_from_agents_df = pd.DataFrame(columns=["id", "timestamp", "ego_id", "message_from_agent_id", "message_from_agent", "message_from_agent_type"])

"""
Messages from agents DataFrame:
    - id: sample id, e.g. 0001, 0002, etc. (comes from 0001.json)
    - timestamp: timestamp of the message
    - ego_id: agent id
    - sender_id: id of the sender
    - message: content of the message
    - type: type of the message (info or order)
"""

for sample_id, data in raw_dataset:
    for ego_id, agent_data in data['agents'].items():
        # take all messages
        all_messages_from_agents: MessagesFromAgentsType = agent_data['messages_from_agents']

        # iterate over all timestamps
        for timestamp, message_data in all_messages_from_agents.items():
            # iterate over all messages from different agents
            for message in message_data:
                new_row = pd.DataFrame([{
                    "id": sample_id,
                    "timestamp": timestamp,
                    "ego_id": ego_id,
                    "message_from_agent_id": message['sender_id'],
                    "message_from_agent": message['message'],
                    "message_from_agent_type": message['type']
                }])
                messages_from_agents_df = pd.concat([messages_from_agents_df, new_row], ignore_index=True)

messages_from_agents_df.to_csv(PATH_TO_DATASET / 'messages_from_agents.csv', index=False)

messages_from_agents_df.sample(10)

Unnamed: 0,id,timestamp,ego_id,message_from_agent_id,message_from_agent,message_from_agent_type
4,4,131,1,3,Scout 1 — redeploy to northern region. Scan fu...,order
114,2,13,2,1,"Reached designated point (32, 24). Visual conf...",info
90,1,10,3,2,No targets within operational range.,info
112,2,2,2,3,Rescuer 2 — hold position. Deploy only upon Sc...,order
92,1,40,3,2,No targets within current visual range.,info
47,3,206,2,3,mission complete. Return to base immediately. ...,order
49,3,11,3,2,"Received new target location (91, 51). Mobiliz...",info
0,4,1,1,3,Scout 1 — proceed southeast. Visual confirmati...,order
115,2,15,2,3,"Rescuer 2 — proceed to (32, 24). Secure and ex...",order
119,2,88,2,3,Rescuer 1 — maintain current position. Hold ex...,order


In [65]:
from typing import Dict, List, Union, Literal
from dataclasses import dataclass
import pandas as pd

@dataclass
class SendedMessages:
    receiver: int
    message: str
    type: Literal['info', 'order']
    
SendedMessagesT = Dict[str, List[SendedMessages]]

sended_messages_df = pd.DataFrame(columns=["id", "timestamp", "ego_id", "send_message_to_receiver_id", "sended_message", "sended_message_type"])

"""
Sendede Messages DataFrame:
    - id: sample id, e.g. 0001, 0002, etc. (comes from 0001.json)
    - timestamp: timestamp of the message
    - ego_id: agent id
    - receiver_id: id of the receiver
    - message: content of the message
    - type: type of the message (info or order)
"""

for sample_id, data in raw_dataset:
    for ego_id, agent_data in data['agents'].items():
        # take all messages
        all_sended_messages: SendedMessagesT = agent_data['sended_messages']

        # iterate over all timestamps
        for timestamp, message_data in all_sended_messages.items():
            # iterate over all messages from different agents
            for message in message_data:
                new_row = pd.DataFrame([{
                    "id": sample_id,
                    "timestamp": timestamp,
                    "ego_id": ego_id,
                    "send_message_to_receiver_id": message['receiver'],
                    "sended_message": message['message'],
                    "sended_message_type": message['type']
                }])
                sended_messages_df = pd.concat([sended_messages_df, new_row], ignore_index=True)

sended_messages_df.to_csv(PATH_TO_DATASET / 'sended_messages.csv', index=False)

sended_messages_df.head()

Unnamed: 0,id,timestamp,ego_id,send_message_to_receiver_id,sended_message,sended_message_type
0,4,2,1,2,"Moving from (36, 44) toward southeast vector.",info
1,4,2,1,3,"Moving from (36, 44) toward southeast vector.",info
2,4,33,1,2,"Visual confirmation — target located at (19, 73)",info
3,4,33,1,3,"Visual confirmation — target located at (19, 73)",info
4,4,47,1,2,Visual contact confirmed — additional target ...,info


In [66]:
positions_df = pd.DataFrame(columns=["id", "timestamp", "ego_id", "ego_id_pos_x", "ego_id_pos_y"])
for sample_id, data in raw_dataset:
    for ego_id, agent_data in data['agents'].items():
        # take all messages
        positions = agent_data['positions']

        # iterate over all timestamps
        for timestamp, position in positions.items():
            new_row = pd.DataFrame([{
                "id": sample_id,
                "timestamp": timestamp,
                "ego_id": ego_id,
                "ego_id_pos_x": position[0],
                "ego_id_pos_y": position[1],
            }])
            positions_df = pd.concat([positions_df, new_row], ignore_index=True)

positions_df.to_csv(PATH_TO_DATASET / 'positions.csv', index=False)

positions_df.head()

Unnamed: 0,id,timestamp,ego_id,ego_id_pos_x,ego_id_pos_y
0,4,0,1,36,44
1,4,1,1,36,44
2,4,2,1,36,45
3,4,3,1,36,46
4,4,8,1,31,46


In [67]:
mission_progress_df = pd.DataFrame(columns=["id", "timestamp", "ego_id", "mission_progress"])
for sample_id, data in raw_dataset:
    for ego_id, agent_data in data['agents'].items():
        # take all messages
        mission_progresses = agent_data['mission_progress']

        # iterate over all timestamps
        for timestamp, progress in mission_progresses.items():
            new_row = pd.DataFrame([{
                "id": sample_id,
                "timestamp": timestamp,
                "ego_id": ego_id,
                "mission_progress": progress
            }])
            mission_progress_df = pd.concat([mission_progress_df, new_row], ignore_index=True)

mission_progress_df.to_csv(PATH_TO_DATASET / 'mission_progress.csv', index=False)

mission_progress_df.head()

Unnamed: 0,id,timestamp,ego_id,mission_progress
0,4,2,1,No contact yet. Terrain open. Awaiting furthe...
1,4,33,1,First target identified. Sector yield validated.
2,4,47,1,Second target located. New grid priority confi...
3,4,130,1,Area secured. Transitioning to new vector.
4,4,204,1,Third objective not yet located. Sector sweep...


In [68]:
target_in_fov_df = pd.DataFrame(columns=["id", "timestamp", "ego_id", "target_pos_x", "target_pos_y"])
for sample_id, data in raw_dataset:
    for ego_id, agent_data in data['agents'].items():
        # take all messages
        all_targets_in_fov = agent_data['target_in_fov']

        # iterate over all timestamps
        for timestamp, targets_in_fov in all_targets_in_fov.items():
            # since agent can see multiple targets, we need to iterate over all targets
            for target_pos in targets_in_fov:
                target_x, target_y = target_pos
                new_row = pd.DataFrame([{
                    "id": sample_id,
                    "timestamp": timestamp,
                    "ego_id": ego_id,
                    "target_pos_x": target_x,
                    "target_pos_y": target_y
                }])
                target_in_fov_df = pd.concat([target_in_fov_df, new_row], ignore_index=True)
target_in_fov_df.to_csv(PATH_TO_DATASET / 'target_in_fov.csv', index=False)

target_in_fov_df.head()

Unnamed: 0,id,timestamp,ego_id,target_pos_x,target_pos_y
0,4,33,1,19,73
1,4,34,1,19,73
2,4,35,1,19,73
3,4,36,1,19,73
4,4,37,1,19,73


In [69]:
from typing import Tuple

@dataclass
class AgentInformation:
    postion: Tuple[int, int]
    timestamp: int
    
LatestAgentInformationType = Dict[str, Dict[str, AgentInformation]]

latest_agents_information = pd.DataFrame(columns=["id", "timestamp", "ego_id", "latest_info_agent_id", "latest_info_agent_timestamp", "latest_info_agent_pos_x", "latest_info_agent_pos_y"])
for sample_id, data in raw_dataset:
    for ego_id, agent_data in data['agents'].items():
        # take all messages
        all_agents_information: LatestAgentInformationType = agent_data['latest_agents_information']

        # iterate over all timestamps
        for timestamp, agents_information in all_agents_information.items():
            for agent_id, agent_info in agents_information.items():
                agent_timestamp = agent_info['timestamp']
                agent_position = agent_info['position']
                new_row = pd.DataFrame([{
                    "id": sample_id,
                    "timestamp": timestamp,
                    "ego_id": ego_id,
                    "latest_info_agent_id": agent_id,
                    "latest_info_agent_timestamp": agent_timestamp,
                    "latest_info_agent_pos_x": agent_position[0],
                    "latest_info_agent_pos_y": agent_position[1]
                }])
                latest_agents_information = pd.concat([latest_agents_information, new_row], ignore_index=True)

latest_agents_information.to_csv(PATH_TO_DATASET / 'latest_agents_information.csv', index=False)

latest_agents_information.head()

Unnamed: 0,id,timestamp,ego_id,latest_info_agent_id,latest_info_agent_timestamp,latest_info_agent_pos_x,latest_info_agent_pos_y
0,4,1,1,2,1,40,7
1,4,1,1,3,1,21,23
2,4,2,1,2,2,40,7
3,4,2,1,3,2,21,23
4,4,3,1,2,3,40,7


In [70]:
global_strategy_df = pd.DataFrame(columns=["id", "timestamp", "ego_id", "global_strategy"])
for sample_id, data in raw_dataset:
    for ego_id, agent_data in data['agents'].items():
        # take all messages
        global_strategies = agent_data['global_strategy']

        # iterate over all timestamps
        for timestamp, strategy in global_strategies.items():
            new_row = pd.DataFrame([{
                "id": sample_id,
                "timestamp": timestamp,
                "ego_id": ego_id,
                "global_strategy": strategy
            }])
            global_strategy_df = pd.concat([global_strategy_df, new_row], ignore_index=True)

global_strategy_df.to_csv(PATH_TO_DATASET / 'global_strategy.csv', index=False)

global_strategy_df.head()

Unnamed: 0,id,timestamp,ego_id,global_strategy
0,4,0,1,scout targets
1,4,0,2,Scan all targets
2,4,0,3,Scan all targets
3,3,0,1,scout targets
4,3,0,2,Scan all targets


In [71]:
local_strategy_df = pd.DataFrame(columns=["id", "timestamp", "ego_id", "local_strategy"])
for sample_id, data in raw_dataset:
    for ego_id, agent_data in data['agents'].items():
        # take all messages
        local_strategies = agent_data['local_strategy']

        # iterate over all timestamps
        for timestamp, strategy in local_strategies.items():
            new_row = pd.DataFrame([{
                "id": sample_id,
                "timestamp": timestamp,
                "ego_id": ego_id,
                "local_strategy": strategy
            }])
            local_strategy_df = pd.concat([local_strategy_df, new_row], ignore_index=True)

local_strategy_df.to_csv(PATH_TO_DATASET / 'local_strategy.csv', index=False)

local_strategy_df.head()

Unnamed: 0,id,timestamp,ego_id,local_strategy
0,4,2,1,Initial sweep pattern activated — scanning sec...
1,4,33,1,Continuing southeastern trajectory. Next grid ...
2,4,47,1,Proceeding southbound.
3,4,130,1,Awaiting next directive. Systems green.
4,4,204,1,Pattern extended to adjacent quadrants.


In [74]:
meta_info_df = pd.DataFrame(columns=["id", "agent_id", "role", "mission"])
for sample_id, data in raw_dataset:
    for agent_id, agent_data in data['agents'].items():
        # take all messages
        role = agent_data['role']
        mission = agent_data['mission']

        new_row = pd.DataFrame([{
            "id": sample_id,
            "agent_id": agent_id,
            "role": role,
            "mission": mission
        }])
        meta_info_df = pd.concat([meta_info_df, new_row], ignore_index=True)

meta_info_df.to_csv(PATH_TO_DATASET / 'meta_info.csv', index=False)

meta_info_df.head()

Unnamed: 0,id,agent_id,role,mission
0,4,1,scout,Find the position of targets and send coordina...
1,4,2,rescuer,"Based on the coordinates provided by scout, sc..."
2,4,3,scout_commander,Coordinate scouts and rescuers to find and sca...
3,3,1,scout,Find the position of targets and send coordina...
4,3,2,rescuer,"Based on the coordinates provided by scout, sc..."


In [73]:
agent_data_keys = data['agents']['1'].keys()

print(*agent_data_keys, sep='\n')

role
mission
messages_from_agents
sended_messages
positions
mission_progress
target_in_fov
latest_agents_information
actions
global_strategy
local_strategy
special_actions
