# More mappers!

# Mapping here simply means converting an existing value that's already in the data into another value using a dictionary as a lookup.

# Examples of this could be mapping similar AIS Codes to standard group names, or adding a dynamic string descriptor based on the speed of the agent (e.g. slow, medium, fast). This is where you can get creative.
___

In [1]:
import trackio as tio

___
## Let's read the dataset from the last notebook.

In [2]:
#define the data_path
data_path = 'example0'

#read the existing Dataset
ds = tio.read(data_path=data_path)

#check the metadata available
ds

Type:
    <class 'trackio.Dataset.Dataset'>
Status:
    0 Unprocessed CSV Files
    0 Processed CSV Files
    0 Unsplit Agent Files
    172 Split Agent Files
Static Data Fields:
    ['AISCode', 'Agent ID', 'Length', 'MMSI', 'Name', 'Width']
Dynamic Data Fields:
    ['Coursing', 'Draft', 'Speed', 'Status', 'Time', 'X', 'Y']
Metadata:
    X: degrees
    Y: degrees
    CRS: EPSG:4326
Data Path:
    c:\code\trackio\notebooks\example0

____
## First, you can map metadata (static data) for the agents in your `Dataset`:

In [3]:
#make metadata mappers
meta_mapper = ds.make_meta_mapper(['Length'], 
                                  ncores=4)

meta_mapper

[92mMaking meta mappers[0m: 100%|[32m██████████[0m| 172/172 [00:02<00:00, 64.06it/s]


{0.0: None,
 11.0: None,
 12.0: None,
 13.0: None,
 15.0: None,
 16.0: None,
 17.0: None,
 18.0: None,
 19.0: None,
 20.0: None,
 21.0: None,
 22.0: None,
 23.0: None,
 24.0: None,
 25.0: None,
 26.0: None,
 27.0: None,
 28.0: None,
 29.0: None,
 30.0: None,
 31.0: None,
 32.0: None,
 33.0: None,
 34.0: None,
 35.0: None,
 36.0: None,
 37.0: None,
 38.0: None,
 39.0: None,
 42.0: None,
 44.0: None,
 48.0: None,
 52.0: None,
 60.0: None,
 84.0: None,
 88.0: None,
 94.0: None,
 159.0: None,
 169.0: None,
 170.0: None,
 227.0: None,
 228.0: None,
 240.0: None,
 248.0: None,
 333.0: None,
 nan: None}

___
## Or make multiple at once.

In [4]:
#make metadata mappers
meta_mappers = ds.make_meta_mapper(['Length', 'Width'], 
                                   ncores=4)

meta_mappers

[92mMaking meta mappers[0m: 100%|[32m██████████[0m| 172/172 [00:02<00:00, 76.91it/s]


{'Length': {0.0: None,
  11.0: None,
  12.0: None,
  13.0: None,
  15.0: None,
  16.0: None,
  17.0: None,
  18.0: None,
  19.0: None,
  20.0: None,
  21.0: None,
  22.0: None,
  23.0: None,
  24.0: None,
  25.0: None,
  26.0: None,
  27.0: None,
  28.0: None,
  29.0: None,
  30.0: None,
  31.0: None,
  32.0: None,
  33.0: None,
  34.0: None,
  35.0: None,
  36.0: None,
  37.0: None,
  38.0: None,
  39.0: None,
  42.0: None,
  44.0: None,
  48.0: None,
  52.0: None,
  60.0: None,
  84.0: None,
  88.0: None,
  94.0: None,
  159.0: None,
  169.0: None,
  170.0: None,
  227.0: None,
  228.0: None,
  240.0: None,
  248.0: None,
  333.0: None,
  nan: None},
 'Width': {0.0: None,
  4.0: None,
  5.0: None,
  6.0: None,
  7.0: None,
  8.0: None,
  9.0: None,
  10.0: None,
  11.0: None,
  12.0: None,
  14.0: None,
  18.0: None,
  21.0: None,
  26.0: None,
  27.0: None,
  32.0: None,
  33.0: None,
  37.0: None,
  42.0: None,
  nan: None}}

___
## Now make up some mapped data.

In [5]:
#descriptor by width
for k in meta_mappers['Width'].keys():
      if k <= 20:
            meta_mappers['Width'][k] = 'narrow'
      else:
            meta_mappers['Width'][k] = 'wide'      
            
#descript by length
for k in  meta_mappers['Length'].keys():
      if k <= 20:
            meta_mappers['Length'][k] = 'short'
      else:
            meta_mappers['Length'][k] = 'long' 

___
## Now you can map these to the agents metadata.

In [6]:
#map metadata
ds = ds.map_meta(['Width'], 
                 ['How Wide?'], 
                 meta_mappers,
                 ncores=4,
                 fill='Unknown')

[92mMapping metadata to agent[0m: 100%|[32m██████████[0m| 172/172 [00:02<00:00, 81.29it/s]


In [7]:
#map multiple metadata at once
ds = ds.map_meta(['Width', 'Length'], 
                 ['How Wide?', 'How Long?'], 
                 meta_mappers,
                 ncores=4,
                 fill='Unknown')

[92mMapping metadata to agent[0m: 100%|[32m██████████[0m| 172/172 [00:02<00:00, 57.79it/s]


___
## Now if you refresh the metadata tables, you'll see this data in the `agent.db` file by accessing `Dataset.agents`:

In [8]:
#refresh metadata
ds = ds.refresh_meta(ncores=4)

ds.agents.head()

[92mRefreshing metadata[0m: 100%|[32m██████████[0m| 172/172 [00:03<00:00, 52.88it/s]  


New meta/databases saved to c:\code\trackio\notebooks\example0


Unnamed: 0_level_0,MMSI,Name,AISCode,Length,Width,How Wide?,How Long?,npoints,ntracks,Xmin,Xmax,Ymin,Ymax,Start Time,End Time,File,geometry
Agent ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
Vessel_367796050,367796050,SEA LION,31.0,19.0,6.0,narrow,short,1242,1,-74.07765,-74.07759,40.68621,40.68627,2021-01-01 00:17:17,2021-01-01 23:59:07,c:\code\trackio\notebooks\example0\Vessel_3677...,"POLYGON ((-74.07759 40.68621, -74.07759 40.686..."
Vessel_368111610,368111610,,,,,Unknown,Unknown,35,1,-74.07277,-74.01703,40.62842,40.71297,2021-01-01 02:21:15,2021-01-01 03:02:17,c:\code\trackio\notebooks\example0\Vessel_3681...,"POLYGON ((-74.01703 40.62842, -74.01703 40.712..."
Vessel_367069240,367069240,C F CAMPBELL,31.0,32.0,9.0,narrow,long,324,1,-74.1227,-73.95377,40.63918,40.76081,2021-01-01 00:19:43,2021-01-01 09:36:17,c:\code\trackio\notebooks\example0\Vessel_3670...,"POLYGON ((-73.95377 40.63918, -73.95377 40.760..."
Vessel_367758160,367758160,S/V NO DOUBT,36.0,16.0,4.0,narrow,short,347,1,-74.04491,-74.0448,40.71038,40.71047,2021-01-01 00:19:53,2021-01-01 23:55:56,c:\code\trackio\notebooks\example0\Vessel_3677...,"POLYGON ((-74.04480 40.71038, -74.04480 40.710..."
Vessel_338862000,338862000,FORT SCHUYLER,31.0,29.0,,Unknown,long,175,1,-74.12821,-74.02601,40.64329,40.70685,2021-01-01 00:17:13,2021-01-01 21:39:36,c:\code\trackio\notebooks\example0\Vessel_3388...,"POLYGON ((-74.02601 40.64329, -74.02601 40.706..."


___
## You can also do the same thing with any dynamic data field.

In [9]:
#make data mapper
data_mappers = ds.make_data_mapper(['Draft','Speed'],
                                   ncores=4)

#map the draft data to something else - doesnt have to be a string, could use a function etc.
for k in data_mappers['Draft'].keys():
      if k<=5:
            data_mappers['Draft'][k] = 'shallow'
      else:
            data_mappers['Draft'][k] = 'deep'
            
#map the speed data to something else
for k in data_mappers['Speed'].keys():
      if k<=5:
            data_mappers['Speed'][k] = 'slow'
      else:
            data_mappers['Speed'][k] = 'fast'

data_mappers

[92mMaking data mappers[0m: 100%|[32m██████████[0m| 172/172 [00:03<00:00, 52.66it/s]


{'Draft': {2.3: 'shallow',
  2.5: 'shallow',
  2.7: 'shallow',
  2.9: 'shallow',
  3.0: 'shallow',
  3.3: 'shallow',
  3.4: 'shallow',
  3.5: 'shallow',
  3.6: 'shallow',
  3.7: 'shallow',
  3.8: 'shallow',
  3.9: 'shallow',
  4.0: 'shallow',
  4.1: 'shallow',
  4.2: 'shallow',
  4.3: 'shallow',
  4.4: 'shallow',
  4.5: 'shallow',
  4.6: 'shallow',
  4.9: 'shallow',
  5.0: 'shallow',
  5.2: 'deep',
  5.5: 'deep',
  5.8: 'deep',
  6.1: 'deep',
  9.4: 'deep',
  9.8: 'deep',
  9.9: 'deep',
  10.1: 'deep',
  12.0: 'deep',
  14.0: 'deep',
  14.5: 'deep',
  14.9: 'deep',
  nan: 'deep'},
 'Speed': {0.0: 'slow',
  0.1: 'slow',
  0.2: 'slow',
  0.3: 'slow',
  0.4: 'slow',
  0.5: 'slow',
  0.6: 'slow',
  0.7: 'slow',
  0.8: 'slow',
  0.9: 'slow',
  1.0: 'slow',
  1.1: 'slow',
  1.2: 'slow',
  1.3: 'slow',
  1.4: 'slow',
  1.5: 'slow',
  1.6: 'slow',
  1.7: 'slow',
  1.8: 'slow',
  1.9: 'slow',
  2.0: 'slow',
  2.1: 'slow',
  2.2: 'slow',
  2.3: 'slow',
  2.4: 'slow',
  2.5: 'slow',
  2.6: 'slow'

In [10]:
#map the data
ds = ds.map_data(['Draft','Speed'],
                 ['Depth Requirement', 'How Fast?'],
                 data_mappers,
                 ncores=4)

#refresh metadata
ds = ds.refresh_meta(ncores=4)

[92mMapping agent dynamic data[0m: 100%|[32m██████████[0m| 172/172 [00:02<00:00, 59.64it/s]
[92mRefreshing metadata[0m: 100%|[32m██████████[0m| 172/172 [00:02<00:00, 58.51it/s] 


New meta/databases saved to c:\code\trackio\notebooks\example0


___
## Now read a random track to view the mapped dynamic data

In [11]:
#read single track
ds.get_track(ds.tracks.index[24]).head(5)

Unnamed: 0,Time,X,Y,Draft,Status,Coursing,Speed,Depth Requirement,How Fast?
0,2021-01-01 00:18:06,-73.97277,40.70478,,5.0,360.0,0.1,,slow
1,2021-01-01 00:21:10,-73.97275,40.7048,,5.0,360.0,0.0,,slow
2,2021-01-01 00:24:06,-73.97275,40.7048,,5.0,360.0,0.0,,slow
3,2021-01-01 00:27:10,-73.97275,40.70479,,5.0,360.0,0.0,,slow
4,2021-01-01 00:30:10,-73.97274,40.70479,,5.0,360.0,0.1,,slow


___
## You can also map a dynamic data field to boolean Code columns. See [ReadMe](https://github.com/derekeden/trackio/blob/main/README.md) for more information about what a Code is in this context.

## Lets map the `Coursing` data to quadrants 1, 2, 3 or 4 based on the direction.

In [12]:
#make coursing mapper
coursing_mapper = ds.make_data_mapper(['Coursing'], 
                                      ncores=4)

#calculate the quadrant
for key in coursing_mapper.keys():
    coursing_mapper[key] = int((key%360)/90) % 4 + 1

[92mMaking data mappers[0m: 100%|[32m██████████[0m| 172/172 [00:03<00:00, 56.02it/s]


___
## Now, map these integer values to boolean Code columns.

In [13]:
#map quadrants to boolean codes
ds = ds.map_data_to_codes(['Coursing'],
                          {'Coursing':coursing_mapper},
                          ncores=4)

#refresh metadata
ds = ds.refresh_meta(ncores=4)

[92mMapping agent dynamic data to coded boolean arrays[0m: 100%|[32m██████████[0m| 172/172 [00:02<00:00, 61.35it/s]
[92mRefreshing metadata[0m: 100%|[32m██████████[0m| 172/172 [00:02<00:00, 57.44it/s]  


New meta/databases saved to c:\code\trackio\notebooks\example0


In [14]:
#read single track
ds.get_track(ds.tracks.index[22]).head(10)

Unnamed: 0,Time,X,Y,Draft,Status,Coursing,Speed,Depth Requirement,How Fast?,Code1,Code2,Code3,Code4
0,2021-01-01 00:17:19,-74.04922,40.71192,2.3,3.0,10.2,0.0,shallow,slow,True,False,False,False
1,2021-01-01 00:18:30,-74.04923,40.71187,2.3,3.0,152.7,0.0,shallow,slow,False,True,False,False
2,2021-01-01 00:19:39,-74.04921,40.71194,2.3,3.0,346.6,0.0,shallow,slow,False,False,False,True
3,2021-01-01 00:20:49,-74.04919,40.71193,2.3,3.0,10.8,0.0,shallow,slow,True,False,False,False
4,2021-01-01 00:21:59,-74.04922,40.71193,2.3,3.0,8.1,0.0,shallow,slow,True,False,False,False
5,2021-01-01 00:23:09,-74.04921,40.71187,2.3,3.0,357.3,0.0,shallow,slow,False,False,False,True
6,2021-01-01 00:24:19,-74.0492,40.71193,2.3,3.0,8.4,0.0,shallow,slow,True,False,False,False
7,2021-01-01 00:25:29,-74.04924,40.7119,2.3,3.0,356.9,0.0,shallow,slow,False,False,False,True
8,2021-01-01 00:26:39,-74.04928,40.71182,2.3,3.0,212.0,0.0,shallow,slow,False,False,True,False
9,2021-01-01 00:27:48,-74.04926,40.7119,2.3,3.0,15.3,0.0,shallow,slow,True,False,False,False


___
## Now you can see these codes stored in the `agent.db` and `track.db` metadata tables, making it possible to query by these properties without having to process them again.

In [15]:
#code is true if ANY point along ANY track for agent was True
ds.agents.filter(like='Code')

Unnamed: 0_level_0,AISCode,Code1,Code2,Code3,Code4
Agent ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Vessel_367796050,31.0,True,True,False,True
Vessel_368111610,,True,True,True,True
Vessel_367069240,31.0,True,True,True,True
Vessel_367758160,36.0,True,True,True,True
Vessel_338862000,31.0,True,True,True,True
...,...,...,...,...,...
Vessel_367304010,31.0,True,True,True,True
Vessel_366998820,31.0,True,True,True,True
Vessel_367638140,90.0,True,True,True,True
Vessel_566634000,70.0,True,True,False,False


In [16]:
#code is True if ANY point along track was True
ds.tracks.head(5).filter(like='Code')

Unnamed: 0_level_0,AISCode,Code1,Code2,Code3,Code4
Track ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Vessel_367796050_T0,31.0,True,True,False,True
Vessel_368111610_T0,,True,True,True,True
Vessel_367069240_T0,31.0,True,True,True,True
Vessel_367758160_T0,36.0,True,True,True,True
Vessel_338862000_T0,31.0,True,True,True,True


___
## Finally, you can also drop metadata and dynamic data columns from the agents/tracks in your `Dataset`. This is useful when you want to reduce the data size and delete data you are not using anymore.

In [17]:
#drop metadata from agents
ds = ds.drop_meta(['How Wide?', 'How Long?'],
                  ncores=4)

#drop dynamic data from tracks
ds = ds.drop_data(['Depth Requirement', 'How Fast?'],
                  ncores=4)

#refresh metadata
ds = ds.refresh_meta(ncores=4)

[92mDropping meta from agents[0m: 100%|[32m██████████[0m| 172/172 [00:03<00:00, 48.16it/s]
[92mDropping dynamic data from agents[0m: 100%|[32m██████████[0m| 172/172 [00:03<00:00, 47.41it/s]
[92mRefreshing metadata[0m: 100%|[32m██████████[0m| 172/172 [00:03<00:00, 54.82it/s]  


New meta/databases saved to c:\code\trackio\notebooks\example0


___
## Notice now that this data is not present in the metadata tables, or the actual agent/track.

In [18]:
ds.agents.columns

Index(['MMSI', 'Name', 'AISCode', 'Length', 'Width', 'npoints', 'ntracks',
       'Xmin', 'Xmax', 'Ymin', 'Ymax', 'Start Time', 'End Time', 'Code1',
       'Code2', 'Code3', 'Code4', 'File', 'geometry'],
      dtype='object')

In [19]:
ds.tracks.columns

Index(['MMSI', 'Name', 'AISCode', 'Length', 'Width', 'Agent ID', 'npoints',
       'Xmin', 'Xmax', 'Ymin', 'Ymax', 'Start Time', 'End Time', 'Code1',
       'Code2', 'Code3', 'Code4', 'File', 'Track Length', 'Duration', 'Year',
       'Month', 'Xstart', 'Ystart', 'Xend', 'Yend', 'Effective Distance',
       'Min Temporal Resolution', 'Mean Temporal Resolution',
       'Max Temporal Resolution', 'Min Spatial Resolution',
       'Mean Spatial Resolution', 'Max Spatial Resolution', 'Sinuosity',
       'geometry'],
      dtype='object')

In [20]:
#get random agent
agent = ds.get_agent(ds.agents.index[0])

#view agent meta
agent.meta

{'MMSI': 367796050,
 'Name': 'SEA LION',
 'AISCode': 31.0,
 'Length': 19.0,
 'Width': 6.0,
 'Agent ID': 'Vessel_367796050'}

In [21]:
#view first track for agent
agent.tracks['T0'].head(5)

Unnamed: 0,Time,X,Y,Draft,Status,Coursing,Speed,Code1,Code2,Code3,Code4
0,2021-01-01 00:17:17,-74.07761,40.68623,,0.0,157.6,0.0,False,True,False,False
1,2021-01-01 00:18:26,-74.07761,40.68623,,0.0,157.6,0.0,False,True,False,False
2,2021-01-01 00:19:38,-74.07761,40.68624,,0.0,157.6,0.0,False,True,False,False
3,2021-01-01 00:20:46,-74.07761,40.68624,,0.0,157.6,0.0,False,True,False,False
4,2021-01-01 00:21:57,-74.07764,40.68623,,0.0,157.6,0.0,False,True,False,False
