In [1]:
import aerofiles as af # To read track files
import glob
import operator
import pandas as pd
import datetime
import time
from joblib import Parallel, delayed
import itertools

In [2]:
 def readSingleTrack(trackfile: str):
    reader = af.igc.Reader()
    try:
        with open(trackfile) as f:
            data = reader.read(f)
        start, stop = operator.itemgetter(0, -1)(data['fix_records'][1])
        # result = {'take off': start, 'landing': stop}
        result = [
            start['time'], start['lon'], start['lat'],
            stop['time'], stop['lon'], stop['lat']
        ]
    except:
        print(f'Could not read file {trackfile}')
        result = None
    return result

In [3]:
def timing(f):
    def wrap(*args, **kwargs):
        time1 = time.time()
        ret = f(*args, **kwargs)
        time2 = time.time()
        print('{:s} function took {:.3f} ms'.format(f.__name__, (time2-time1)*1000.0))

        return ret
    return wrap

In [4]:
@timing
def readTracks(path='./data/*.igc', nMax=1000):
    inputs = itertools.islice(sorted(glob.glob(path), reverse=True), nMax)
    data = []
    for track in inputs:
        data.append(readSingleTrack(track))
    return data

In [5]:
data = readTracks(nMax=100)

readTracks function took 8455.912 ms


In [6]:
# Faster parallel version
@timing
def readTracksParallel(path='./data/*.igc', nMax=1000):
    inputs = itertools.islice(sorted(glob.glob(path), reverse=True), nMax)
    data = Parallel(n_jobs=-1)(
        delayed(readSingleTrack)(track) for track in inputs
    )
    return data

In [7]:
data = readTracksParallel(nMax=100)

readTracksParallel function took 3657.229 ms


In [None]:
# Took ~35 minutes on my old macbook (for 123k tracks)
data = readTracksParallel(nMax=None)

In [8]:
data

[[datetime.time(10, 53, 33),
  5.8909,
  45.3104,
  datetime.time(13, 23, 4),
  5.9603166666666665,
  45.32515],
 [datetime.time(9, 47, 47),
  6.2721,
  45.69846666666667,
  datetime.time(13, 59, 18),
  6.3162,
  45.75485],
 [datetime.time(16, 53, 46),
  1.3244666666666667,
  42.22848333333334,
  datetime.time(18, 7, 18),
  1.3240333333333334,
  42.228433333333335],
 [datetime.time(14, 6, 12),
  1.3245,
  42.22848333333334,
  datetime.time(15, 34, 9),
  1.3215166666666667,
  42.22265],
 [datetime.time(16, 30, 25),
  1.3244666666666667,
  42.22846666666667,
  datetime.time(17, 13, 3),
  1.3217166666666667,
  42.22225],
 [datetime.time(17, 21, 54),
  1.3244666666666667,
  42.22846666666667,
  datetime.time(17, 53, 37),
  1.3215,
  42.22293333333333],
 [datetime.time(15, 50, 18),
  1.3245166666666668,
  42.22841666666667,
  datetime.time(16, 38, 43),
  1.3242166666666666,
  42.22848333333334],
 [datetime.time(13, 16, 31),
  1.3243833333333335,
  42.2284,
  datetime.time(13, 56, 29),
  1.3

In [9]:
data = [d for d in data if d is not None]
df = pd.DataFrame(data, columns=['start time', 'start longitude', 'start latitude', 'stop time', 'stop longitude', 'stop latitude'])
df

Unnamed: 0,start time,start longitude,start latitude,stop time,stop longitude,stop latitude
0,10:53:33,5.890900,45.310400,13:23:04,5.960317,45.325150
1,09:47:47,6.272100,45.698467,13:59:18,6.316200,45.754850
2,16:53:46,1.324467,42.228483,18:07:18,1.324033,42.228433
3,14:06:12,1.324500,42.228483,15:34:09,1.321517,42.222650
4,16:30:25,1.324467,42.228467,17:13:03,1.321717,42.222250
...,...,...,...,...,...,...
95,08:09:06,6.763433,45.648283,16:33:56,6.752400,45.591933
96,08:32:43,6.483250,45.879167,13:57:15,6.466300,45.872067
97,11:09:01,7.014800,47.926083,15:02:17,6.985183,48.056217
98,13:34:31,4.056850,47.873950,14:24:38,3.899467,47.905533


In [10]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 6 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   start time       100 non-null    object 
 1   start longitude  100 non-null    float64
 2   start latitude   100 non-null    float64
 3   stop time        100 non-null    object 
 4   stop longitude   100 non-null    float64
 5   stop latitude    100 non-null    float64
dtypes: float64(4), object(2)
memory usage: 4.8+ KB


In [11]:
df.describe()

Unnamed: 0,start longitude,start latitude,stop longitude,stop latitude
count,100.0,100.0,100.0,100.0
mean,5.201979,45.798975,5.107323,45.823655
std,2.463812,2.02617,2.472279,2.046199
min,-0.498717,42.2284,-1.0808,42.219167
25%,4.032579,45.636058,3.409183,45.590296
50%,6.445833,45.866208,6.328508,45.842992
75%,6.763433,47.873858,6.706625,47.900858
max,12.740267,48.993283,12.645167,49.084317


In [12]:
def time_between(df, date=datetime.date.today()):
    return (datetime.datetime.combine(date.today(), df['stop time']) - datetime.datetime.combine(date.today(), df['start time'])).total_seconds()

df['duration'] = df.apply(time_between, axis=1)
df

Unnamed: 0,start time,start longitude,start latitude,stop time,stop longitude,stop latitude,duration
0,10:53:33,5.890900,45.310400,13:23:04,5.960317,45.325150,8971.0
1,09:47:47,6.272100,45.698467,13:59:18,6.316200,45.754850,15091.0
2,16:53:46,1.324467,42.228483,18:07:18,1.324033,42.228433,4412.0
3,14:06:12,1.324500,42.228483,15:34:09,1.321517,42.222650,5277.0
4,16:30:25,1.324467,42.228467,17:13:03,1.321717,42.222250,2558.0
...,...,...,...,...,...,...,...
95,08:09:06,6.763433,45.648283,16:33:56,6.752400,45.591933,30290.0
96,08:32:43,6.483250,45.879167,13:57:15,6.466300,45.872067,19472.0
97,11:09:01,7.014800,47.926083,15:02:17,6.985183,48.056217,13996.0
98,13:34:31,4.056850,47.873950,14:24:38,3.899467,47.905533,3007.0


In [13]:
df.to_csv('data.csv.gz', index=False, compression='gzip')