## Make Positions

The positions in the GPX file occur approximately every eight hours. To get a smoother animation that allows the progression of the weather to be seen more frequent positions are required. Additionally, to save money we turned the Yellowbrick off when in port. The last position is often out at sea and 

This notebook takes the original fixes, adds some manual positions for when we were in port and then interpolates between the points to get hourly positions. Various interpolation techniques are possible; a simple linear interpolation based on the bearing and distance between fixes will be tried first.

In [None]:
from geographiclib.geodesic import Geodesic
from geopy import distance
import gpxpy
import pandas as pd

In [2]:
gpx_file = 'yblog-1589101179046.gpx'

In [3]:
with open(gpx_file) as fh:
    gpx = gpxpy.parse(fh)
data = gpx.tracks[0].segments[0].points

In [4]:
print(data[0])

[trkpt:55.94507999999999,-4.746780000000001@8.0@2013-09-04 16:19:01+00:00]


In [5]:
df = pd.DataFrame(columns=['lat', 'lon', 'time'])

# Skip the first entry as it was just a test one week before we departed
for point in data[1:]: 
    df = df.append({'lon': point.longitude, 'lat' : point.latitude, 'time' : point.time}, ignore_index=True)

In [6]:
geod = Geodesic.WGS84

dist_dif = []
time_dif = []

# Remember that we're skipping the first entry
for index in range(1, len(data) - 1):
    start = data[index]
    stop = data[index+1]
    distance_2d = geod.Inverse(start.latitude, start.longitude,
                               stop.latitude, stop.longitude)['s12']
    dist_dif.append(distance_2d)
    time_delta = (stop.time - start.time).total_seconds()    
    time_dif.append(time_delta)
# Add the last row
dist_dif.append(0)
time_dif.append(0)
    
df['time_dif'] = time_dif
df['dis_dif'] = dist_dif

In [7]:
print(df[:10])
print(df[-3:])


        lat      lon                       time  time_dif        dis_dif
0  55.94507 -4.74655  2013-09-16 21:54:06+00:00   65160.0   45971.356742
1  55.56232 -5.02120  2013-09-17 16:00:06+00:00   28802.0  100928.014355
2  54.66902 -5.29117  2013-09-18 00:00:08+00:00   28798.0  117976.169012
3  53.64186 -4.84571  2013-09-18 08:00:06+00:00   28800.0   38121.353753
4  53.32146 -4.64268  2013-09-18 16:00:06+00:00   28800.0       5.331001
5  53.32146 -4.64260  2013-09-19 00:00:06+00:00   28800.0       6.397101
6  53.32148 -4.64269  2013-09-19 08:00:06+00:00   28800.0       0.000000
7  53.32148 -4.64269  2013-09-19 16:00:06+00:00   28831.0    2372.313017
8  53.34245 -4.64908  2013-09-20 00:00:37+00:00   28788.0  111154.812503
9  52.48326 -5.49176  2013-09-20 08:00:25+00:00   28813.0  111159.060838
          lat      lon                       time  time_dif       dis_dif
546  58.03371 -6.33969  2014-05-24 00:00:06+00:00   28800.0  19772.891561
547  58.20922 -6.39012  2014-05-24 08:00:06+00:00

In [14]:
hourly = pd.DataFrame(columns=['lat', 'lon', 'time', 'src'])
hourly['time'] = pd.date_range('2013-09-16', '2014-05-25', freq='H', tz='UTC')
print(hourly[:10])

   lat  lon                      time  src
0  NaN  NaN 2013-09-16 00:00:00+00:00  NaN
1  NaN  NaN 2013-09-16 01:00:00+00:00  NaN
2  NaN  NaN 2013-09-16 02:00:00+00:00  NaN
3  NaN  NaN 2013-09-16 03:00:00+00:00  NaN
4  NaN  NaN 2013-09-16 04:00:00+00:00  NaN
5  NaN  NaN 2013-09-16 05:00:00+00:00  NaN
6  NaN  NaN 2013-09-16 06:00:00+00:00  NaN
7  NaN  NaN 2013-09-16 07:00:00+00:00  NaN
8  NaN  NaN 2013-09-16 08:00:00+00:00  NaN
9  NaN  NaN 2013-09-16 09:00:00+00:00  NaN


In [9]:
for index, row in df.iterrows():
    print(index)
    print(row)
    break

0
lat                           55.9451
lon                          -4.74655
time        2013-09-16 21:54:06+00:00
time_dif                        65160
dis_dif                       45971.4
Name: 0, dtype: object


In [10]:
df.loc[0]

lat                           55.9451
lon                          -4.74655
time        2013-09-16 21:54:06+00:00
time_dif                        65160
dis_dif                       45971.4
Name: 0, dtype: object

In [15]:
hourly_index = 0
hourly_end_index = len(hourly.index)
seconds_in_hour = 60**2
for orig_index in range(len(df.index) - 1):
    while hourly_index < hourly_end_index:
        print(f'hourly_index {hourly_index}')
        print(hourly.loc[hourly_index]['time'])
        if (hourly.loc[hourly_index]['time'] < df.loc[orig_index]['time'] and 
                hourly.loc[hourly_index + 1]['time'] < df.loc[orig_index]['time']):
            print('Bingo!')
            hourly.loc[hourly_index]['lat'] = df.loc[orig_index]['lat']
            hourly.loc[hourly_index]['lon'] = df.loc[orig_index]['lon']
            l = geod.InverseLine(
                df.loc[orig_index]['lat'],
                df.loc[orig_index]['lon'],
                df.loc[orig_index + 1]['lat'],
                df.loc[orig_index + 1]['lon'],
            )
            end_time = (
                df.loc[orig_index + 1]['time']
                .replace(minute=0, second=0, microsecond=0)
            )
            time_delta = int((end_time - df.loc[orig_index]['time']).total_seconds() 
                             / seconds_in_hour)
            for i in range(time_delta + 1):
                g = l.Position(l.s13, Geodesic.STANDARD | Geodesic.LONG_UNROLL)
                hourly.loc[hourly_index]['lat'] = g['lat2']
                hourly.loc[hourly_index]['lon'] = g['lon2']
                hourly_index += 1
        elif orig_index == 0:
            # We haven't received our first fix yet so keep us at what will
            # be the first fix
            hourly.loc[hourly_index]['lat'] = df.loc[0]['lat']
            hourly.loc[hourly_index]['lon'] = df.loc[0]['lon']
            hourly_index += 1
        else:
            # Where are we here?
            print(f'hourly_index {hourly_index} orig_index {orig_index}')
            raise NotImplementedError
            # hourly_index += 1
        if hourly_index > 100:
            break



hourly_index 0
2013-09-16 00:00:00+00:00
Bingo!
hourly_index 19
2013-09-16 19:00:00+00:00
Bingo!
hourly_index 38
2013-09-17 14:00:00+00:00
hourly_index 39
2013-09-17 15:00:00+00:00
hourly_index 40
2013-09-17 16:00:00+00:00
hourly_index 41
2013-09-17 17:00:00+00:00
hourly_index 42
2013-09-17 18:00:00+00:00
hourly_index 43
2013-09-17 19:00:00+00:00
hourly_index 44
2013-09-17 20:00:00+00:00
hourly_index 45
2013-09-17 21:00:00+00:00
hourly_index 46
2013-09-17 22:00:00+00:00
hourly_index 47
2013-09-17 23:00:00+00:00
hourly_index 48
2013-09-18 00:00:00+00:00
hourly_index 49
2013-09-18 01:00:00+00:00
hourly_index 50
2013-09-18 02:00:00+00:00
hourly_index 51
2013-09-18 03:00:00+00:00
hourly_index 52
2013-09-18 04:00:00+00:00
hourly_index 53
2013-09-18 05:00:00+00:00
hourly_index 54
2013-09-18 06:00:00+00:00
hourly_index 55
2013-09-18 07:00:00+00:00
hourly_index 56
2013-09-18 08:00:00+00:00
hourly_index 57
2013-09-18 09:00:00+00:00
hourly_index 58
2013-09-18 10:00:00+00:00
hourly_index 59
2013-

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if sys.path[0] == '':
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the docu

NotImplementedError: 

In [18]:
hourly.loc[0]['lat'] = 55.
print(hourly[:24])

    lat  lon                      time  src
0   NaN  NaN 2013-09-16 00:00:00+00:00  NaN
1   NaN  NaN 2013-09-16 01:00:00+00:00  NaN
2   NaN  NaN 2013-09-16 02:00:00+00:00  NaN
3   NaN  NaN 2013-09-16 03:00:00+00:00  NaN
4   NaN  NaN 2013-09-16 04:00:00+00:00  NaN
5   NaN  NaN 2013-09-16 05:00:00+00:00  NaN
6   NaN  NaN 2013-09-16 06:00:00+00:00  NaN
7   NaN  NaN 2013-09-16 07:00:00+00:00  NaN
8   NaN  NaN 2013-09-16 08:00:00+00:00  NaN
9   NaN  NaN 2013-09-16 09:00:00+00:00  NaN
10  NaN  NaN 2013-09-16 10:00:00+00:00  NaN
11  NaN  NaN 2013-09-16 11:00:00+00:00  NaN
12  NaN  NaN 2013-09-16 12:00:00+00:00  NaN
13  NaN  NaN 2013-09-16 13:00:00+00:00  NaN
14  NaN  NaN 2013-09-16 14:00:00+00:00  NaN
15  NaN  NaN 2013-09-16 15:00:00+00:00  NaN
16  NaN  NaN 2013-09-16 16:00:00+00:00  NaN
17  NaN  NaN 2013-09-16 17:00:00+00:00  NaN
18  NaN  NaN 2013-09-16 18:00:00+00:00  NaN
19  NaN  NaN 2013-09-16 19:00:00+00:00  NaN
20  NaN  NaN 2013-09-16 20:00:00+00:00  NaN
21  NaN  NaN 2013-09-16 21:00:00

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [26]:
geod = Geodesic.WGS84
hourly = pd.DataFrame(columns=['lat', 'lon', 'time', 'src'])
# src is a character with meanings: f: fix
#                                   i: interpolated between fixes
#                                   m: manually inserted point (e.g. when 
#                                      tracker was off in port)
seconds_in_hour = 60**2
for orig_index in range(len(df.index) - 1):
    l = geod.InverseLine(
        df.loc[orig_index]['lat'],
        df.loc[orig_index]['lon'],
        df.loc[orig_index + 1]['lat'],
        df.loc[orig_index + 1]['lon'],
    )
    start_time = (
        df.iloc[orig_index]['time']
        .replace(minute=0, second=0, microsecond=0)
    )
    end_time = (
        df.iloc[orig_index + 1]['time']
        .replace(minute=0, second=0, microsecond=0)
    )
    time_delta_hours = int((end_time - start_time).total_seconds() / 
                           seconds_in_hour)
    for i in range(time_delta_hours + 1):
        g = l.Position(l.s13 * i, Geodesic.STANDARD | Geodesic.LONG_UNROLL)
        hourly = hourly.append({
            'time': start_time + pd.Timedelta(hours=i),
            'lat': g['lat2'],
            'lon': g['lon2'],
            'src': 'f' if i == 0 else 'i',
        }, ignore_index=True)

In [27]:
print(hourly[:24])

          lat       lon                       time src
0   55.945070 -4.746550  2013-09-16 21:00:00+00:00   f
1   55.562320 -5.021200  2013-09-16 22:00:00+00:00   i
2   55.178942 -5.290557  2013-09-16 23:00:00+00:00   i
3   54.794957 -5.554788  2013-09-17 00:00:00+00:00   i
4   54.410382 -5.814054  2013-09-17 01:00:00+00:00   i
5   54.025237 -6.068509  2013-09-17 02:00:00+00:00   i
6   53.639538 -6.318302  2013-09-17 03:00:00+00:00   i
7   53.253303 -6.563575  2013-09-17 04:00:00+00:00   i
8   52.866546 -6.804467  2013-09-17 05:00:00+00:00   i
9   52.479285 -7.041109  2013-09-17 06:00:00+00:00   i
10  52.091532 -7.273630  2013-09-17 07:00:00+00:00   i
11  51.703304 -7.502151  2013-09-17 08:00:00+00:00   i
12  51.314612 -7.726792  2013-09-17 09:00:00+00:00   i
13  50.925471 -7.947665  2013-09-17 10:00:00+00:00   i
14  50.535893 -8.164882  2013-09-17 11:00:00+00:00   i
15  50.145890 -8.378548  2013-09-17 12:00:00+00:00   i
16  49.755475 -8.588765  2013-09-17 13:00:00+00:00   i
17  49.364