# More mappers!

# Mapping here simply means converting an existing value that's already in the data into another value using a dictionary as a lookup.

# Examples of this could be mapping similar AIS Codes to standard group names, or adding a dynamic string descriptor based on the speed of the agent (e.g. slow, medium, fast). This is where you can get creative.
___

In [1]:
import trackio as tio

___
## Let's read the dataset from the last notebook.

In [2]:
#define the data_path
data_path = 'example0'

#read the existing Dataset
ds = tio.read(data_path=data_path)

#check the metadata available
ds

Type:
    <class 'trackio.Dataset.Dataset'>
Status:
    0 Unprocessed CSV Files
    0 Processed CSV Files
    0 Unsplit Agent Files
    172 Split Agent Files
Static Data Fields:
    ['AISCode', 'Agent ID', 'Length', 'MMSI', 'Name', 'Width']
Dynamic Data Fields:
    ['Coursing', 'Draft', 'Speed', 'Status', 'Time', 'X', 'Y']
Metadata:
    X: degrees
    Y: degrees
    CRS: EPSG:4326
Data Path:
    c:\code\trackio\notebooks\example0

____
## First, you can map metadata (static data) for the agents in your `Dataset`:

In [3]:
#make metadata mappers
meta_mapper = ds.make_meta_mapper(['Length'], 
                                  ncores=4)

meta_mapper

[92mMaking meta mappers[0m: 100%|[32m██████████[0m| 172/172 [00:02<00:00, 77.06it/s]


{0.0: None,
 11.0: None,
 12.0: None,
 13.0: None,
 15.0: None,
 16.0: None,
 17.0: None,
 18.0: None,
 19.0: None,
 20.0: None,
 21.0: None,
 22.0: None,
 23.0: None,
 24.0: None,
 25.0: None,
 26.0: None,
 27.0: None,
 28.0: None,
 29.0: None,
 30.0: None,
 31.0: None,
 32.0: None,
 33.0: None,
 34.0: None,
 35.0: None,
 36.0: None,
 37.0: None,
 38.0: None,
 39.0: None,
 42.0: None,
 44.0: None,
 48.0: None,
 52.0: None,
 60.0: None,
 84.0: None,
 88.0: None,
 94.0: None,
 159.0: None,
 169.0: None,
 170.0: None,
 227.0: None,
 228.0: None,
 240.0: None,
 248.0: None,
 333.0: None,
 nan: None}

___
## Or make multiple at once.

In [4]:
#make metadata mappers
meta_mappers = ds.make_meta_mapper(['Length', 'Width'], 
                                   ncores=4)

meta_mappers

[92mMaking meta mappers[0m: 100%|[32m██████████[0m| 172/172 [00:02<00:00, 84.23it/s]


{'Length': {0.0: None,
  11.0: None,
  12.0: None,
  13.0: None,
  15.0: None,
  16.0: None,
  17.0: None,
  18.0: None,
  19.0: None,
  20.0: None,
  21.0: None,
  22.0: None,
  23.0: None,
  24.0: None,
  25.0: None,
  26.0: None,
  27.0: None,
  28.0: None,
  29.0: None,
  30.0: None,
  31.0: None,
  32.0: None,
  33.0: None,
  34.0: None,
  35.0: None,
  36.0: None,
  37.0: None,
  38.0: None,
  39.0: None,
  42.0: None,
  44.0: None,
  48.0: None,
  52.0: None,
  60.0: None,
  84.0: None,
  88.0: None,
  94.0: None,
  159.0: None,
  169.0: None,
  170.0: None,
  227.0: None,
  228.0: None,
  240.0: None,
  248.0: None,
  333.0: None,
  nan: None},
 'Width': {0.0: None,
  4.0: None,
  5.0: None,
  6.0: None,
  7.0: None,
  8.0: None,
  9.0: None,
  10.0: None,
  11.0: None,
  12.0: None,
  14.0: None,
  18.0: None,
  21.0: None,
  26.0: None,
  27.0: None,
  32.0: None,
  33.0: None,
  37.0: None,
  42.0: None,
  nan: None}}

___
## Now make up some mapped data.

In [5]:
#descriptor by width
for k in meta_mappers['Width'].keys():
      if k <= 20:
            meta_mappers['Width'][k] = 'narrow'
      else:
            meta_mappers['Width'][k] = 'wide'      
            
#descript by length
for k in  meta_mappers['Length'].keys():
      if k <= 20:
            meta_mappers['Length'][k] = 'short'
      else:
            meta_mappers['Length'][k] = 'long' 

___
## Now you can map these to the agents metadata.

In [7]:
#map metadata
ds = ds.map_meta(['Width'], 
                 ['How Wide?'], 
                 meta_mappers,
                 ncores=4,
                 fill='Unknown')

[92mMapping metadata to agent[0m: 100%|[32m██████████[0m| 172/172 [00:02<00:00, 82.79it/s]


In [8]:
#map multiple metadata at once
ds = ds.map_meta(['Width', 'Length'], 
                 ['How Wide?', 'How Long?'], 
                 meta_mappers,
                 ncores=4,
                 fill='Unknown')

[92mMapping metadata to agent[0m: 100%|[32m██████████[0m| 172/172 [00:02<00:00, 75.80it/s]


___
## Now if you refresh the metadata tables, you'll see this data in the `agent.db` file by accessing `Dataset.agents`:

In [9]:
#refresh metadata
ds = ds.refresh_meta(ncores=4)

ds.agents.head()

[92mRefreshing metadata[0m:   0%|[32m          [0m| 0/172 [00:00<?, ?it/s]

[92mRefreshing metadata[0m: 100%|[32m██████████[0m| 172/172 [00:02<00:00, 79.83it/s]


New meta/databases saved to c:\code\trackio\notebooks\example0


Unnamed: 0_level_0,MMSI,Name,AISCode,Length,Width,How Wide?,How Long?,npoints,ntracks,Xmin,Xmax,Ymin,Ymax,Start Time,End Time,File,geometry
Agent ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
Vessel_367061610,367061610,BARNEY TURECAMO,31.0,36.0,10.0,narrow,long,426,1,-74.12516,-74.04262,40.64375,40.66241,2021-01-01 00:18:19,2021-01-01 20:22:57,c:\code\trackio\notebooks\example0\Vessel_3670...,"POLYGON ((-74.04262 40.64375, -74.04262 40.662..."
Vessel_367115000,367115000,LINDA LEE BOUCHARD,31.0,39.0,11.0,narrow,long,1228,1,-73.97055,-73.97016,40.70062,40.70108,2021-01-01 00:17:30,2021-01-01 23:59:40,c:\code\trackio\notebooks\example0\Vessel_3671...,"POLYGON ((-73.97016 40.70062, -73.97016 40.701..."
Vessel_367558180,367558180,POTOMAC,31.0,30.0,10.0,narrow,long,508,1,-74.12722,-74.0084,40.64245,40.66725,2021-01-01 00:17:17,2021-01-01 23:59:58,c:\code\trackio\notebooks\example0\Vessel_3675...,"POLYGON ((-74.00840 40.64245, -74.00840 40.667..."
Vessel_367098070,367098070,GULF COAST,31.0,28.0,9.0,narrow,long,747,1,-74.11835,-74.00149,40.63882,40.66594,2021-01-01 00:17:12,2021-01-01 23:57:28,c:\code\trackio\notebooks\example0\Vessel_3670...,"POLYGON ((-74.00149 40.63882, -74.00149 40.665..."
Vessel_367522950,367522950,NEWARK FIRE BOAT 2,,,,Unknown,Unknown,5,1,-74.12276,-74.12273,40.71115,40.71118,2021-01-01 15:25:50,2021-01-01 15:30:30,c:\code\trackio\notebooks\example0\Vessel_3675...,"POLYGON ((-74.12273 40.71115, -74.12273 40.711..."


___
## You can also do the same thing with any dynamic data field.

In [10]:
#make data mapper
data_mappers = ds.make_data_mapper(['Draft','Speed'],
                                   ncores=4)

#map the draft data to something else - doesnt have to be a string, could use a function etc.
for k in data_mappers['Draft'].keys():
      if k<=5:
            data_mappers['Draft'][k] = 'shallow'
      else:
            data_mappers['Draft'][k] = 'deep'
            
#map the speed data to something else
for k in data_mappers['Speed'].keys():
      if k<=5:
            data_mappers['Speed'][k] = 'slow'
      else:
            data_mappers['Speed'][k] = 'fast'

data_mappers

[92mMaking data mappers[0m: 100%|[32m██████████[0m| 172/172 [00:02<00:00, 74.77it/s]


{'Draft': {2.3: 'shallow',
  2.5: 'shallow',
  2.7: 'shallow',
  2.9: 'shallow',
  3.0: 'shallow',
  3.3: 'shallow',
  3.4: 'shallow',
  3.5: 'shallow',
  3.6: 'shallow',
  3.7: 'shallow',
  3.8: 'shallow',
  3.9: 'shallow',
  4.0: 'shallow',
  4.1: 'shallow',
  4.2: 'shallow',
  4.3: 'shallow',
  4.4: 'shallow',
  4.5: 'shallow',
  4.6: 'shallow',
  4.9: 'shallow',
  5.0: 'shallow',
  5.2: 'deep',
  5.5: 'deep',
  5.8: 'deep',
  6.1: 'deep',
  9.4: 'deep',
  9.8: 'deep',
  9.9: 'deep',
  10.1: 'deep',
  12.0: 'deep',
  14.0: 'deep',
  14.5: 'deep',
  14.9: 'deep',
  nan: 'deep'},
 'Speed': {0.0: 'slow',
  0.1: 'slow',
  0.2: 'slow',
  0.3: 'slow',
  0.4: 'slow',
  0.5: 'slow',
  0.6: 'slow',
  0.7: 'slow',
  0.8: 'slow',
  0.9: 'slow',
  1.0: 'slow',
  1.1: 'slow',
  1.2: 'slow',
  1.3: 'slow',
  1.4: 'slow',
  1.5: 'slow',
  1.6: 'slow',
  1.7: 'slow',
  1.8: 'slow',
  1.9: 'slow',
  2.0: 'slow',
  2.1: 'slow',
  2.2: 'slow',
  2.3: 'slow',
  2.4: 'slow',
  2.5: 'slow',
  2.6: 'slow'

In [11]:
#map the data
ds = ds.map_data(['Draft','Speed'],
                 ['Depth Requirement', 'How Fast?'],
                 data_mappers,
                 ncores=4)

#refresh metadata
ds = ds.refresh_meta(ncores=4)

[92mMapping agent dynamic data[0m: 100%|[32m██████████[0m| 172/172 [00:02<00:00, 81.79it/s]
[92mRefreshing metadata[0m: 100%|[32m██████████[0m| 172/172 [00:02<00:00, 77.38it/s]


New meta/databases saved to c:\code\trackio\notebooks\example0


___
## Now read a random track to view the mapped dynamic data

In [13]:
#read single track
ds.get_track(ds.tracks.index[24]).head(5)

Unnamed: 0,Time,X,Y,Draft,Status,Coursing,Speed,Depth Requirement,How Fast?
0,2021-01-01 01:12:38,-74.00965,40.76064,6.1,5.0,208.0,11.9,deep,fast
1,2021-01-01 01:13:47,-74.01178,40.75727,6.1,5.0,207.1,11.9,deep,fast
2,2021-01-01 01:14:56,-74.01371,40.75366,6.1,5.0,195.5,11.8,deep,fast
3,2021-01-01 01:15:57,-74.01458,40.75037,6.1,5.0,191.2,11.9,deep,fast
4,2021-01-01 01:17:07,-74.01556,40.74657,6.1,5.0,191.6,12.0,deep,fast


___
## You can also map a dynamic data field to boolean Code columns. See [ReadMe](https://github.com/derekeden/trackio/blob/main/README.md) for more information about what a Code is in this context.

## Lets map the `Coursing` data to quadrants 1, 2, 3 or 4 based on the direction.

In [14]:
#make coursing mapper
coursing_mapper = ds.make_data_mapper(['Coursing'], 
                                      ncores=4)

#calculate the quadrant
for key in coursing_mapper.keys():
    coursing_mapper[key] = int((key%360)/90) % 4 + 1

[92mMaking data mappers[0m: 100%|[32m██████████[0m| 172/172 [00:02<00:00, 75.87it/s]


___
## Now, map these integer values to boolean Code columns.

In [17]:
#map quadrants to boolean codes
ds = ds.map_data_to_codes(['Coursing'],
                          {'Coursing':coursing_mapper},
                          ncores=4)

#refresh metadata
ds = ds.refresh_meta(ncores=4)

[92mMapping agent dynamic data to coded boolean arrays[0m: 100%|[32m██████████[0m| 172/172 [00:02<00:00, 72.80it/s]
[92mRefreshing metadata[0m: 100%|[32m██████████[0m| 172/172 [00:02<00:00, 74.85it/s]


New meta/databases saved to c:\code\trackio\notebooks\example0


In [20]:
#read single track
ds.get_track(ds.tracks.index[22]).head(10)

Unnamed: 0,Time,X,Y,Draft,Status,Coursing,Speed,Depth Requirement,How Fast?,Code1,Code2,Code3,Code4
0,2021-01-01 21:41:57,-74.12541,40.64283,,12.0,72.9,8.6,,fast,True,False,False,False
1,2021-01-01 21:43:08,-74.12191,40.64379,,12.0,71.5,8.6,,fast,True,False,False,False
2,2021-01-01 21:44:17,-74.11843,40.6446,,12.0,71.7,8.7,,fast,True,False,False,False
3,2021-01-01 21:45:28,-74.11485,40.64542,,12.0,74.0,8.7,,fast,True,False,False,False
4,2021-01-01 21:46:38,-74.11135,40.64617,,12.0,73.1,8.4,,fast,True,False,False,False
5,2021-01-01 21:47:48,-74.10783,40.64657,,12.0,90.5,8.4,,fast,False,True,False,False
6,2021-01-01 21:48:58,-74.10425,40.64649,,12.0,91.6,8.3,,fast,False,True,False,False
7,2021-01-01 21:50:09,-74.10055,40.64635,,12.0,87.5,8.4,,fast,True,False,False,False
8,2021-01-01 21:51:18,-74.09705,40.64671,,12.0,79.5,8.3,,fast,True,False,False,False
9,2021-01-01 21:52:28,-74.09353,40.64734,,12.0,72.8,8.4,,fast,True,False,False,False


___
## Now you can see these codes stored in the `agent.db` and `track.db` metadata tables, making it possible to query by these properties without having to process them again.

In [21]:
#code is true if ANY point along ANY track for agent was True
ds.agents.filter(like='Code')

Unnamed: 0_level_0,AISCode,Code1,Code2,Code3,Code4
Agent ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Vessel_367061610,31.0,True,True,True,True
Vessel_367115000,31.0,False,False,False,True
Vessel_367558180,31.0,True,True,True,True
Vessel_367098070,31.0,True,True,True,True
Vessel_367522950,,True,False,True,True
...,...,...,...,...,...
Vessel_366834000,31.0,True,False,True,False
Vessel_367597530,90.0,True,True,True,True
Vessel_366899000,,False,True,False,False
Vessel_367497610,31.0,True,True,True,True


In [22]:
#code is True if ANY point along track was True
ds.tracks.head(5).filter(like='Code')

Unnamed: 0_level_0,AISCode,Code1,Code2,Code3,Code4
Track ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Vessel_367061610_T0,31.0,True,True,True,True
Vessel_367115000_T0,31.0,False,False,False,True
Vessel_367558180_T0,31.0,True,True,True,True
Vessel_367098070_T0,31.0,True,True,True,True
Vessel_367522950_T0,,True,False,True,True


___
## Finally, you can also drop metadata and dynamic data columns from the agents/tracks in your `Dataset`. This is useful when you want to reduce the data size and delete data you are not using anymore.

In [23]:
#drop metadata from agents
ds = ds.drop_meta(['How Wide?', 'How Long?'],
                  ncores=4)

#drop dynamic data from tracks
ds = ds.drop_data(['Depth Requirement', 'How Fast?'],
                  ncores=4)

#refresh metadata
ds = ds.refresh_meta(ncores=4)

[92mDropping meta from agents[0m: 100%|[32m██████████[0m| 172/172 [00:02<00:00, 74.36it/s]
[92mDropping dynamic data from agents[0m: 100%|[32m██████████[0m| 172/172 [00:02<00:00, 74.48it/s]
[92mRefreshing metadata[0m: 100%|[32m██████████[0m| 172/172 [00:02<00:00, 79.14it/s]


New meta/databases saved to c:\code\trackio\notebooks\example0


___
## Notice now that this data is not present in the metadata tables, or the actual agent/track.

In [24]:
ds.agents.columns

Index(['MMSI', 'Name', 'AISCode', 'Length', 'Width', 'npoints', 'ntracks',
       'Xmin', 'Xmax', 'Ymin', 'Ymax', 'Start Time', 'End Time', 'Code1',
       'Code2', 'Code3', 'Code4', 'File', 'geometry'],
      dtype='object')

In [25]:
ds.tracks.columns

Index(['MMSI', 'Name', 'AISCode', 'Length', 'Width', 'Agent ID', 'npoints',
       'Xmin', 'Xmax', 'Ymin', 'Ymax', 'Start Time', 'End Time', 'Code1',
       'Code2', 'Code3', 'Code4', 'File', 'Track Length', 'Duration', 'Year',
       'Month', 'Xstart', 'Ystart', 'Xend', 'Yend', 'Effective Distance',
       'Min Temporal Resolution', 'Mean Temporal Resolution',
       'Max Temporal Resolution', 'Min Spatial Resolution',
       'Mean Spatial Resolution', 'Max Spatial Resolution', 'Sinuosity',
       'geometry'],
      dtype='object')

In [26]:
#get random agent
agent = ds.get_agent(ds.agents.index[0])

#view agent meta
agent.meta

{'MMSI': 367061610,
 'Name': 'BARNEY TURECAMO',
 'AISCode': 31.0,
 'Length': 36.0,
 'Width': 10.0,
 'Agent ID': 'Vessel_367061610'}

In [27]:
#view first track for agent
agent.tracks['T0'].head(5)

Unnamed: 0,Time,X,Y,Draft,Status,Coursing,Speed,Code1,Code2,Code3,Code4
0,2021-01-01 00:18:19,-74.04371,40.65883,4.3,1.0,91.2,0.2,False,True,False,False
1,2021-01-01 00:21:13,-74.04348,40.65888,4.3,1.0,81.1,0.2,True,False,False,False
2,2021-01-01 00:24:20,-74.04331,40.65891,4.3,1.0,74.8,0.2,True,False,False,False
3,2021-01-01 00:27:16,-74.04316,40.65896,4.3,1.0,52.9,0.1,True,False,False,False
4,2021-01-01 00:30:21,-74.04307,40.65908,4.3,1.0,36.4,0.1,True,False,False,False
