# More mappers!

# Mapping here simply means converting an existing value that's already in the data into another value using a dictionary as a lookup.

# Examples of this could be mapping similar AIS Codes to standard group names, or adding a dynamic string descriptor based on the speed of the agent (e.g. slow, medium, fast). This is where you can get creative.

# First, you can map metadata (static data) for the agents in your `Dataset`

In [1]:
import trackio as tio

In [27]:
#define the data_path
data_path = 'example0'

#read the existing Dataset
ds = tio.read(data_path=data_path)

#check the metadata available
ds

Type:
    <class 'trackio.Dataset.Dataset'>
Status:
    0 Unprocessed CSV Files
    0 Processed CSV Files
    0 Unsplit Agent Files
    172 Split Agent Files
Static Data Fields:
    ['AISCode', 'Agent ID', 'Length', 'MMSI', 'Name', 'Width']
Dynamic Data Fields:
    ['Code1', 'Code2', 'Code3', 'Code4', 'Coursing', 'Draft', 'Speed', 'Status', 'Time', 'X', 'Y']
Metadata:
    X: degrees
    Y: degrees
    CRS: EPSG:4326
Data Path:
    c:\code\trackio\notebooks\example0

In [3]:
#make metadata mappers
meta_mapper = ds.make_meta_mapper('Length', ncores=4)

meta_mapper

Making meta mappers: 100%|[32m██████████[0m| 172/172 [00:02<00:00, 64.11it/s]


{0.0: None,
 11.0: None,
 12.0: None,
 13.0: None,
 15.0: None,
 16.0: None,
 17.0: None,
 18.0: None,
 19.0: None,
 20.0: None,
 21.0: None,
 22.0: None,
 23.0: None,
 24.0: None,
 25.0: None,
 26.0: None,
 27.0: None,
 28.0: None,
 29.0: None,
 30.0: None,
 31.0: None,
 32.0: None,
 33.0: None,
 34.0: None,
 35.0: None,
 36.0: None,
 37.0: None,
 38.0: None,
 39.0: None,
 42.0: None,
 44.0: None,
 48.0: None,
 52.0: None,
 60.0: None,
 84.0: None,
 88.0: None,
 94.0: None,
 159.0: None,
 169.0: None,
 170.0: None,
 227.0: None,
 228.0: None,
 240.0: None,
 248.0: None,
 333.0: None,
 nan: None}

# Or make multiple at once.

In [4]:
#make metadata mappers
meta_mappers = ds.make_meta_mapper(['Length', 'Width'], ncores=4)

meta_mappers

Making meta mappers: 100%|[32m██████████[0m| 172/172 [00:02<00:00, 61.54it/s]


{'Length': {0.0: None,
  11.0: None,
  12.0: None,
  13.0: None,
  15.0: None,
  16.0: None,
  17.0: None,
  18.0: None,
  19.0: None,
  20.0: None,
  21.0: None,
  22.0: None,
  23.0: None,
  24.0: None,
  25.0: None,
  26.0: None,
  27.0: None,
  28.0: None,
  29.0: None,
  30.0: None,
  31.0: None,
  32.0: None,
  33.0: None,
  34.0: None,
  35.0: None,
  36.0: None,
  37.0: None,
  38.0: None,
  39.0: None,
  42.0: None,
  44.0: None,
  48.0: None,
  52.0: None,
  60.0: None,
  84.0: None,
  88.0: None,
  94.0: None,
  159.0: None,
  169.0: None,
  170.0: None,
  227.0: None,
  228.0: None,
  240.0: None,
  248.0: None,
  333.0: None,
  nan: None},
 'Width': {0.0: None,
  4.0: None,
  5.0: None,
  6.0: None,
  7.0: None,
  8.0: None,
  9.0: None,
  10.0: None,
  11.0: None,
  12.0: None,
  14.0: None,
  18.0: None,
  21.0: None,
  26.0: None,
  27.0: None,
  32.0: None,
  33.0: None,
  37.0: None,
  42.0: None,
  nan: None}}

# Now make up some mapped data.

In [5]:
#descriptor by width
for k in meta_mappers['Width'].keys():
      if k <= 20:
            meta_mappers['Width'][k] = 'narrow'
      else:
            meta_mappers['Width'][k] = 'wide'      
for k in  meta_mappers['Length'].keys():
      if k <= 20:
            meta_mappers['Length'][k] = 'short'
      else:
            meta_mappers['Length'][k] = 'long' 

# Now you can map these to the agents metadata.

In [6]:
#map metadata
ds = ds.map_meta('Width', 
                 'How Wide?', 
                 meta_mappers['Width'],
                 ncores=4,
                 fill='Unknown')

Mapping metadata to agent:  19%|[32m█▉        [0m| 33/172 [00:00<00:00, 298.54it/s]

Mapping metadata to agent: 100%|[32m██████████[0m| 172/172 [00:03<00:00, 54.37it/s]


In [7]:
#map multiple metadata at once
ds = ds.map_meta(['Width', 'Length'], 
                 ['How Wide?', 'How Long?'], 
                 meta_mappers,
                 ncores=4,
                 fill='Unknown')

Mapping metadata to agent: 100%|[32m██████████[0m| 172/172 [00:03<00:00, 55.24it/s]


# Now if you refresh the metadata tables, you'll see this data in the `agent.db` file by accessing `Dataset.agents`.

In [8]:
#refresh metadata
ds = ds.refresh_meta(ncores=4)

ds.agents.head()

Refreshing metadata:   0%|[32m          [0m| 0/172 [00:00<?, ?it/s]

Refreshing metadata: 100%|[32m██████████[0m| 172/172 [00:02<00:00, 62.00it/s]  


New meta/databases saved to c:\code\trackio\notebooks\example0


Unnamed: 0_level_0,MMSI,Name,AISCode,Length,Width,How Wide?,How Long?,npoints,ntracks,Xmin,Xmax,Ymin,Ymax,Start Time,End Time,File,geometry
Agent ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
Vessel_367639070,367639070,NYPD 311,90.0,,,Unknown,Unknown,61,1,-74.12082,-74.02807,40.64296,40.65144,2021-01-01 17:49:18,2021-01-01 18:59:01,c:\code\trackio\notebooks\example0\Vessel_3676...,"POLYGON ((-74.02807 40.64296, -74.02807 40.651..."
Vessel_367397090,367397090,LITTLE LADY II,60.0,19.0,6.0,narrow,short,4,1,-74.0399,-74.03987,40.70972,40.70975,2021-01-01 10:05:18,2021-01-01 10:08:48,c:\code\trackio\notebooks\example0\Vessel_3673...,"POLYGON ((-74.03987 40.70972, -74.03987 40.709..."
Vessel_367061610,367061610,BARNEY TURECAMO,31.0,36.0,10.0,narrow,long,426,1,-74.12516,-74.04262,40.64375,40.66241,2021-01-01 00:18:19,2021-01-01 20:22:57,c:\code\trackio\notebooks\example0\Vessel_3670...,"POLYGON ((-74.04262 40.64375, -74.04262 40.662..."
Vessel_368025020,368025020,,,,,Unknown,Unknown,54,1,-74.04888,-73.97503,40.61863,40.70766,2021-01-01 15:41:31,2021-01-01 16:36:43,c:\code\trackio\notebooks\example0\Vessel_3680...,"POLYGON ((-73.97503 40.61863, -73.97503 40.707..."
Vessel_367798420,367798420,H206,60.0,,,Unknown,Unknown,86,1,-74.00555,-73.9483,40.70195,40.76089,2021-01-01 00:17:15,2021-01-01 02:09:23,c:\code\trackio\notebooks\example0\Vessel_3677...,"POLYGON ((-73.94830 40.70195, -73.94830 40.760..."


# You can also do the same thing with any dynamic data field.

In [9]:
#make data mapper
data_mappers = ds.make_data_mapper(['Draft','Speed'],
                                   ncores=4)

#map the draft data to something else
for k in data_mappers['Draft'].keys():
      if k<=5:
            data_mappers['Draft'][k] = 'shallow'
      else:
            data_mappers['Draft'][k] = 'deep'
            
#map the speed data to something else
for k in data_mappers['Speed'].keys():
      if k<=5:
            data_mappers['Speed'][k] = 'slow'
      else:
            data_mappers['Speed'][k] = 'fast'

data_mappers

Making data mappers: 100%|[32m██████████[0m| 172/172 [00:03<00:00, 55.60it/s]


{'Draft': {2.3: 'shallow',
  2.5: 'shallow',
  2.7: 'shallow',
  2.9: 'shallow',
  3.0: 'shallow',
  3.3: 'shallow',
  3.4: 'shallow',
  3.5: 'shallow',
  3.6: 'shallow',
  3.7: 'shallow',
  3.8: 'shallow',
  3.9: 'shallow',
  4.0: 'shallow',
  4.1: 'shallow',
  4.2: 'shallow',
  4.3: 'shallow',
  4.4: 'shallow',
  4.5: 'shallow',
  4.6: 'shallow',
  4.9: 'shallow',
  5.0: 'shallow',
  5.2: 'deep',
  5.5: 'deep',
  5.8: 'deep',
  6.1: 'deep',
  9.4: 'deep',
  9.8: 'deep',
  9.9: 'deep',
  10.1: 'deep',
  12.0: 'deep',
  14.0: 'deep',
  14.5: 'deep',
  14.9: 'deep',
  nan: 'deep'},
 'Speed': {0.0: 'slow',
  0.1: 'slow',
  0.2: 'slow',
  0.3: 'slow',
  0.4: 'slow',
  0.5: 'slow',
  0.6: 'slow',
  0.7: 'slow',
  0.8: 'slow',
  0.9: 'slow',
  1.0: 'slow',
  1.1: 'slow',
  1.2: 'slow',
  1.3: 'slow',
  1.4: 'slow',
  1.5: 'slow',
  1.6: 'slow',
  1.7: 'slow',
  1.8: 'slow',
  1.9: 'slow',
  2.0: 'slow',
  2.1: 'slow',
  2.2: 'slow',
  2.3: 'slow',
  2.4: 'slow',
  2.5: 'slow',
  2.6: 'slow'

In [10]:
#map the data
ds = ds.map_data(['Draft','Speed'],
                 ['Depth Requirement', 'How Fast?'],
                 data_mappers,
                 ncores=4)

#refresh metadata
ds = ds.refresh_meta(ncores=4)

Mapping agent dynamic data: 100%|[32m██████████[0m| 172/172 [00:02<00:00, 62.44it/s]
Refreshing metadata: 100%|[32m██████████[0m| 172/172 [00:03<00:00, 56.18it/s]  


New meta/databases saved to c:\code\trackio\notebooks\example0


# Now read a random track to view the mapped dynamic data

In [11]:
#read single track
ds.get_track(ds.tracks.index[29]).head(5)

Unnamed: 0,Time,X,Y,Draft,Status,Coursing,Speed,Depth Requirement,How Fast?
0,2021-01-01 00:17:13,-74.03402,40.6844,4.2,0.0,25.0,12.8,shallow,fast
1,2021-01-01 00:18:23,-74.03205,40.68779,4.2,0.0,25.0,10.7,shallow,fast
2,2021-01-01 00:19:33,-74.03019,40.69069,4.2,0.0,24.0,9.7,shallow,fast
3,2021-01-01 00:20:40,-74.0283,40.69311,4.2,0.0,39.0,8.2,shallow,fast
4,2021-01-01 00:21:43,-74.02608,40.69441,4.2,0.0,57.0,7.2,shallow,fast


# You can also map a dynamic data field to boolean Code columns.

# Lets map the `Coursing` data to quadrants 1, 2, 3 or 4 based on the direction.

In [12]:
#make coursing mapper
coursing_mapper = ds.make_data_mapper('Coursing', ncores=4)

#calculate the quadrant
for key in coursing_mapper.keys():
    coursing_mapper[key] = int((key%360)/90) % 4 + 1

Making data mappers: 100%|[32m██████████[0m| 172/172 [00:03<00:00, 57.31it/s]


{1, 2, 3, 4}

# Now, map these integer values to boolean Code columns.

In [13]:
#map quadrants to boolean codes
ds = ds.map_data_to_codes('Coursing',
                          coursing_mapper,
                          ncores=4)

#refresh metadata
ds = ds.refresh_meta(ncores=4)

#read single track
ds.get_track(ds.tracks.index[29]).head(10)

Mapping agent dynamic data to coded boolean arrays: 100%|[32m██████████[0m| 172/172 [00:02<00:00, 64.31it/s]
Refreshing metadata: 100%|[32m██████████[0m| 172/172 [00:02<00:00, 58.84it/s]  


New meta/databases saved to c:\code\trackio\notebooks\example0


Unnamed: 0,Time,X,Y,Draft,Status,Coursing,Speed,Depth Requirement,How Fast?,Code1,Code2,Code3,Code4
0,2021-01-01 00:17:13,-74.03402,40.6844,4.2,0.0,25.0,12.8,shallow,fast,True,False,False,False
1,2021-01-01 00:18:23,-74.03205,40.68779,4.2,0.0,25.0,10.7,shallow,fast,True,False,False,False
2,2021-01-01 00:19:33,-74.03019,40.69069,4.2,0.0,24.0,9.7,shallow,fast,True,False,False,False
3,2021-01-01 00:20:40,-74.0283,40.69311,4.2,0.0,39.0,8.2,shallow,fast,True,False,False,False
4,2021-01-01 00:21:43,-74.02608,40.69441,4.2,0.0,57.0,7.2,shallow,fast,True,False,False,False


# Now you can see these codes stored in the `agent.db` and `track.db` metadata tables, making it possible to query by these properties.

In [22]:
#code is true if ANY point along ANY track for agent was True
ds.agents.filter(like='Code')

Unnamed: 0_level_0,AISCode,Code1,Code2,Code3,Code4
Agent ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Vessel_367639070,90.0,True,True,True,True
Vessel_367397090,60.0,True,False,False,False
Vessel_367061610,31.0,True,True,True,True
Vessel_368025020,,True,True,True,True
Vessel_367798420,60.0,True,True,True,True
...,...,...,...,...,...
Vessel_367000140,60.0,True,True,True,True
Vessel_367531670,,True,True,True,True
Vessel_367770270,31.0,True,True,True,True
Vessel_366946760,31.0,True,True,True,True


In [21]:
#code is True if ANY point along track was True
ds.tracks.head(5).filter(like='Code')

Unnamed: 0_level_0,AISCode,Code1,Code2,Code3,Code4
Track ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Vessel_367639070_T0,90.0,True,True,True,True
Vessel_367397090_T0,60.0,True,False,False,False
Vessel_367061610_T0,31.0,True,True,True,True
Vessel_368025020_T0,,True,True,True,True
Vessel_367798420_T0,60.0,True,True,True,True


# Finally, you can also drop metadata and dynamic data columns from the agents/tracks in your `Dataset`. This is useful when you want to reduce the data size and delete data you are not using.

In [23]:
#drop metadata from agents
ds = ds.drop_meta(['How Wide?', 'How Long?'],
                  ncores=4)

#drop dynamic data from tracks
ds = ds.drop_data(['Depth Requirement', 'How Fast?'],
                  ncores=4)

#refresh metadata
ds = ds.refresh_meta(ncores=4)

Dropping meta from agents: 100%|[32m██████████[0m| 172/172 [00:02<00:00, 73.39it/s]
Dropping dynamic data from agents: 100%|[32m██████████[0m| 172/172 [00:02<00:00, 72.65it/s]
Refreshing metadata: 100%|[32m██████████[0m| 172/172 [00:02<00:00, 77.79it/s]


New meta/databases saved to c:\code\trackio\notebooks\example0


# Notice now that this data is not present in the metadata tables, or the actual agent/track.

In [24]:
ds.agents.columns

Index(['MMSI', 'Name', 'AISCode', 'Length', 'Width', 'npoints', 'ntracks',
       'Xmin', 'Xmax', 'Ymin', 'Ymax', 'Start Time', 'End Time', 'Code1',
       'Code2', 'Code3', 'Code4', 'File', 'geometry'],
      dtype='object')

In [25]:
ds.tracks.columns

Index(['MMSI', 'Name', 'AISCode', 'Length', 'Width', 'Agent ID', 'npoints',
       'Xmin', 'Xmax', 'Ymin', 'Ymax', 'Start Time', 'End Time', 'Code1',
       'Code2', 'Code3', 'Code4', 'File', 'Track Length', 'Duration', 'Year',
       'Month', 'Xstart', 'Ystart', 'Xend', 'Yend', 'Effective Distance',
       'Min Temporal Resolution', 'Mean Temporal Resolution',
       'Max Temporal Resolution', 'Min Spatial Resolution',
       'Mean Spatial Resolution', 'Max Spatial Resolution', 'geometry'],
      dtype='object')

In [28]:
#get random agent
agent = ds.get_agent(ds.agents.index[0])

#view agent meta
agent.meta

{'MMSI': 367639070,
 'Name': 'NYPD 311',
 'AISCode': 90.0,
 'Length': nan,
 'Width': nan,
 'Agent ID': 'Vessel_367639070'}

In [30]:
#view a single track
agent.tracks['T0'].head(5)

Unnamed: 0,Time,X,Y,Draft,Status,Coursing,Speed,Code1,Code2,Code3,Code4
0,2021-01-01 17:49:18,-74.02832,40.64433,,0.0,360.0,0.0,True,False,False,False
1,2021-01-01 17:50:28,-74.02831,40.64434,,0.0,360.0,0.1,True,False,False,False
2,2021-01-01 17:51:39,-74.0283,40.64434,,0.0,360.0,0.0,True,False,False,False
3,2021-01-01 17:52:49,-74.0283,40.64434,,0.0,360.0,0.0,True,False,False,False
4,2021-01-01 17:53:58,-74.0283,40.64433,,0.0,360.0,0.0,True,False,False,False
