# Currently editing on the plane.

## GPX data recorded during an early morning Southwest Flight from Fort Lauderdale to New Orleans

## [Interact with this notebook on Binder ](https://mybinder.org/v2/gh/dm-wyncode/zipped-iterables-binder-notebooks/master).

### Resources

* [Map Plus iOS application](http://duweis.com/en/mapplus.html)
* [Pandas](https://pandas.pydata.org/pandas-docs/version/0.23.4/index.html)
* [Minio](https://github.com/minio/minio): a high performance object storage server compatible with Amazon S3 APIs

## Load the data from a [Minio](https://github.com/minio/minio) instance I have deployed.

In [144]:
import urllib.request
import itertools as it
from pprint import pprint
from functools import partial, reduce
import operator as op

# Define configured pprint suitable for notebooks
pprint_ = partial(pprint, indent=4)


with urllib.request.urlopen(
    "https://minio.apps.selfip.com/mymedia/gpx/fort_lauderdale__to__new_orleans.gpx"
) as res:
    data = res.read()

print(data.splitlines()[:10])

[b'<?xml version="1.0" encoding="UTF-8" standalone="no" ?>', b'<gpx xmlns="http://www.topografix.com/GPX/1/1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:gpx_style="http://www.topografix.com/GPX/gpx_style/0/2" xsi:schemaLocation="http://www.topografix.com/GPX/1/1 http://www.topografix.com/GPX/1/1/gpx.xsd http://www.topografix.com/GPX/gpx_style/0/2 http://www.topografix.com/GPX/gpx_style/0/2/gpx_style.xsd" version="1.1" creator="Map Plus 2.8.7.1">', b'  <metadata>', b'    <link href="http://www.duweis.com">', b'      <text>Map Plus</text>', b'    </link>', b'    <time>2019-10-17T12:33:03Z</time>', b'  </metadata>', b'', b'  <trk>']


## Parse the GPX file

In [145]:
from lxml import etree

In [146]:
tree = etree.fromstring(data, etree.XMLParser())

## Display set of tags

In [147]:
{element.tag for element in tree.iter()}

{'{http://www.topografix.com/GPX/1/1}cmt',
 '{http://www.topografix.com/GPX/1/1}ele',
 '{http://www.topografix.com/GPX/1/1}extensions',
 '{http://www.topografix.com/GPX/1/1}gpx',
 '{http://www.topografix.com/GPX/1/1}link',
 '{http://www.topografix.com/GPX/1/1}metadata',
 '{http://www.topografix.com/GPX/1/1}name',
 '{http://www.topografix.com/GPX/1/1}text',
 '{http://www.topografix.com/GPX/1/1}time',
 '{http://www.topografix.com/GPX/1/1}trk',
 '{http://www.topografix.com/GPX/1/1}trkpt',
 '{http://www.topografix.com/GPX/1/1}trkseg',
 '{http://www.topografix.com/GPX/gpx_style/0/2}color',
 '{http://www.topografix.com/GPX/gpx_style/0/2}line',
 '{http://www.topografix.com/GPX/gpx_style/0/2}width'}

In [149]:
set(tree.iterchildren()) == set(tree.iter())

False

In [153]:
meta, trk, = tree.iterchildren()

In [160]:
*_, trkseg = trk.iterchildren()

In [162]:
data_points = list(trkseg.iter())

In [166]:
tags = {item.tag for item in data_points}

## There is 1 trkseg element. It may be the root of all the location points.

In [171]:
[(tag, len([element for element in data_points if element.tag == tag])) for tag in tags]

[('{http://www.topografix.com/GPX/1/1}ele', 1824),
 ('{http://www.topografix.com/GPX/1/1}time', 1829),
 ('{http://www.topografix.com/GPX/1/1}trkpt', 1829),
 ('{http://www.topografix.com/GPX/1/1}trkseg', 1)]

In [211]:
trkpnt_children = list(trkseg.iterchildren())

In [226]:
from collections import namedtuple

In [244]:
TrackPoint = namedtuple("TrackPoint", ("coordinate", "ele", "time"))

In [273]:
trkpnts_ = (
    ((element.attrib,), tuple(e.text for e in element.iterdescendants()))
    for element in trkpnt_children
)

trkpnts = [tuple(it.chain(*item)) for item in trkpnts_]

## Not all items have a all three of  `('coordinate', 'ele', 'time')`

In [274]:
{len(items) for items in trkpnts}

{2, 3}

## See what is missing in those with only 2 parts.

In [275]:
[items for items in trkpnts if len(items) == 2]

[({'lat': '26.07364077867658', 'lon': '-80.13974719286466'},
  '2019-10-17T10:32:23Z'),
 ({'lat': '26.07330806773481', 'lon': '-80.13861468216476'},
  '2019-10-17T10:32:44Z'),
 ({'lat': '26.07329140328246', 'lon': '-80.13861683228613'},
  '2019-10-17T10:33:09Z'),
 ({'lat': '26.07358034244973', 'lon': '-80.13865072072218'},
  '2019-10-17T10:34:37Z'),
 ({'lat': '26.07370411580322', 'lon': '-80.14069088505309'},
  '2019-10-17T10:34:46Z')]

In [276]:
[items for items in trkpnts if len(items) == 3][:5]

[({'lat': '26.07408333333334', 'lon': '-80.136275'},
  '14',
  '2019-10-17T10:16:44Z'),
 ({'lat': '26.07371', 'lon': '-80.13643666666667'},
  '5.8',
  '2019-10-17T10:16:52Z'),
 ({'lat': '26.07379666666666', 'lon': '-80.13646999999999'},
  '1.1',
  '2019-10-17T10:17:24Z'),
 ({'lat': '26.07390333333334', 'lon': '-80.13640000000001'},
  '5.3',
  '2019-10-17T10:17:47Z'),
 ({'lat': '26.07400833333334', 'lon': '-80.13633'},
  '5.9',
  '2019-10-17T10:18:32Z')]

## Rewrite the comprehensions to account for a lack of `ele` in a trkpnt.

In [293]:
def trkpnt_handler(trkpnt):
    """Insert a None if there is no ele data point."""
    length = len(trkpnt)
    assert length in range(1, 3), f"Length is {length}: {trkpnt}"
    try:
        ele, datetime = trkpnt
    except ValueError:
        datetime, = trkpnt
        ele = None
    return ele, datetime

In [303]:
trkpnts_ = (
    (
        (element.attrib.values(),),
        trkpnt_handler(tuple(e.text for e in element.iterdescendants())),
    )
    for element in trkpnt_children
)

trkpnts = [tuple(it.chain(*item)) for item in trkpnts_]

## Lenght of all trkpnts is 3

In [304]:
{len(items) for items in trkpnts}

{3}

In [306]:
[(*coordinate, ele, datetime) for coordinate, ele, datetime in trkpnts if ele is None]

[('26.07364077867658', '-80.13974719286466', None, '2019-10-17T10:32:23Z'),
 ('26.07330806773481', '-80.13861468216476', None, '2019-10-17T10:32:44Z'),
 ('26.07329140328246', '-80.13861683228613', None, '2019-10-17T10:33:09Z'),
 ('26.07358034244973', '-80.13865072072218', None, '2019-10-17T10:34:37Z'),
 ('26.07370411580322', '-80.14069088505309', None, '2019-10-17T10:34:46Z')]

## Load data into a Pandas dataframe.

In [307]:
import pandas as pd

In [308]:
columns = ('lat', 'lon', 'ele', 'datetime')

In [311]:
df = pd.DataFrame(
    [(*coordinate, ele, datetime) for coordinate, ele, datetime in trkpnts],
    columns=columns
)
df.head()

Unnamed: 0,lat,lon,ele,datetime
0,26.07408333333334,-80.136275,14.0,2019-10-17T10:16:44Z
1,26.07371,-80.13643666666667,5.8,2019-10-17T10:16:52Z
2,26.07379666666666,-80.13646999999999,1.1,2019-10-17T10:17:24Z
3,26.07390333333334,-80.13640000000001,5.3,2019-10-17T10:17:47Z
4,26.07400833333334,-80.13633,5.9,2019-10-17T10:18:32Z
