# extract_gps_data.ipynb

* 2021-05-06 Added code to adjust coordinates using rolling averages
* 2021-05-02 First version by Aubrey Moore

Extracts geotagging data from EXIF tags stored in one or more image files.

Example usage:

    papermill extract_gps_data.ipynb \
    '../open-camera-test/home-uog/extract_gps_data_output.ipynb' \
    -p IMAGE_FILE_PATH '../open-camera-test/home-uog/*.jpg' \
    -p CSV_OUTPUT_FILE '../open-camera-test/home-uog/gps-data.csv'
    
When the above command line is executed in the directory containing **extract_gps_data.ipynb**, 
GPS data will be extracted from all **jpg** files in the **../open-camera-test/home-uog** directory
and will be saved **../open-camera-test/home-uog/gps-data.csv**.

## References

https://developer.here.com/blog/getting-started-with-geocoding-exif-image-metadata-in-python3

http://www.50northspatial.org/using-open-camera-geotagging-photos/

In [28]:
from PIL import Image
from PIL.ExifTags import TAGS, GPSTAGS
import glob
import pandas as pd
import os
import plotly.express as px
import numpy as np
import logging

In [29]:
# parameters for papermill

IMAGE_FILE_PATH = '../2021-05-14/*.jpg'         # Path to one or more image files. Can include wildcards. See https://pymotw.com/2/glob/ for pattern matching details.
CSV_OUTPUT_FILE = '../2021-05-14/gps-data.csv'  # Path to a CSV file where the GPS data will be stored. 
ADJUST_COORDINATES = True
MAKE_MAPS = True

In [30]:
def get_exif(filename):
    image = Image.open(filename)
    image.verify()
    return image._getexif()


def get_geotagging(exif):
    if not exif:
        raise ValueError("No EXIF metadata found")
    geotagging = {}
    for (idx, tag) in TAGS.items():
        if tag == 'GPSInfo':
            if idx not in exif:
                raise ValueError("No EXIF geotagging found")
            for (key, val) in GPSTAGS.items():
                if key in exif[idx]:
                    geotagging[val] = exif[idx][key]
    return geotagging


def get_gps_coordinates(image_file_name):
    exif = get_exif(image_file_name)
    gpsdata = get_geotagging(exif)
        
    d,m,s = gpsdata['GPSLatitude']
    latitude = d + m/60.0 + s/3600.0
    if gpsdata['GPSLatitudeRef']=='S':
        latitude = -latitude
    latitude = round(latitude, 6)

    d,m,s = gpsdata['GPSLongitude']
    longitude = d + m/60.0 + s/3600.0
    if gpsdata['GPSLongitudeRef']=='W':
        longitude = -longitude
    longitude = round(longitude, 6)
    
    date = gpsdata['GPSDateStamp']
    date = date.replace(':', '-')
    h, m, s = gpsdata['GPSTimeStamp']
    timestamp = f'{date} {int(h):02}:{int(m):02}:{int(s):02}'
    return longitude,latitude, timestamp

def create_dataframe():
    # Get a sorted list of image files
    image_files = sorted(glob.glob(IMAGE_FILE_PATH))
    n = len(image_files)

    # Extract coordinates from each image file
    df = pd.DataFrame(columns=['imagefile','longitude','latitude','timestamp'])
    for i, image_file in enumerate(image_files):
        longitude, latitude, timestamp = get_gps_coordinates(image_file)
        df = df.append({'imagefile':os.path.basename(image_file),
                        'longitude':longitude,
                        'latitude':latitude,
                        'timestamp':pd.to_datetime(timestamp)}, ignore_index=True) 
        if ((i+1) % 100 == 0):
            logging.info(f'{i+1} of {n} images processed')
    return df 


def adjust_gps_coordinates():
    '''
    Calculates rolling averages for latitude and longitude to get better estimates for camera 
    positions and saves them in new columns: longitude_adjusted and latitude_adjusted. 
    This is a work-a-round for low precision GPS EXIF data data saved by the 
    OpenCamera app. For some reason, the app saves only degrees, minutes and seconds without decimal places. 
    This notebook calculates new points using 5-point rolling averages of latitude and longitude. 
    '''
    df['time_diff'] = df["timestamp"].diff().apply(lambda x: x/np.timedelta64(1,'s')).fillna(0).astype('int64')
    
    # Find location of segment breaks
    # A new segment begins when an (image is taken is more than 60s after previous image

    segments = []
    segment_breaks = df.index[df['time_diff'] > 60].tolist()
    segment_breaks.append(df.shape[0]) # Last index plus 1
    for i, segment_break in enumerate(segment_breaks):
        if i == 0:
            start = 0
        else:
            start = segment_breaks[i-1]
        segments.append({'first_index': start, 'last_index': segment_break-1})
    logging.info(f'segments: {segments}')

    # Calculate rolling averages to locations within each segment
    
#     df['longitude_adjusted'] = 0
#     df['latitude_adjusted'] = 0
        
    for segment in segments:
#         df.loc[segment['first_index']:(segment['last_index']+1)]['longitude_adjusted'] = df.loc[segment['first_index']:(segment['last_index']+1)]['longitude'].rolling(5, center=True, min_periods=1).mean() 
#         df.loc[segment['first_index']:(segment['last_index']+1)]['latitude_adjusted'] = df.loc[segment['first_index']:(segment['last_index']+1)]['latitude'].rolling(5, center=True, min_periods=1).mean() 
        i1 = segment['first_index']
        i2 = segment['last_index']
        df.loc[i1:i2, 'longitude_adjusted'] = df.loc[i1:i2, 'longitude'].rolling(5, center=True, min_periods=1).mean() 
        df.loc[i1:i2, 'latitude_adjusted'] = df.loc[i1:i2, 'latitude'].rolling(5, center=True, min_periods=1).mean() 
        
#     df['longitude_adjusted'] = df['longitude'].rolling(5, center=True, min_periods=1).mean() 
#     df['latitude_adjusted'] = df['latitude'].rolling(5, center=True, min_periods=1).mean() 
    
    return

In [31]:
# MAIN

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(funcName)s %(message)s",
    datefmt="%Y-%m-%dT%H:%M:%S%z",
    handlers=[logging.StreamHandler()])
logging.info('Starting georef.py')

df = create_dataframe()

if ADJUST_COORDINATES:
    logging.info('Adjusting coordinates')
    adjust_gps_coordinates()
    
df.to_csv(CSV_OUTPUT_FILE, index=False)
logging.info(f'FINISHED: Data saved in {CSV_OUTPUT_FILE}')

2021-05-18T13:05:32+1000 [INFO] <module> Starting georef.py
2021-05-18T13:05:32+1000 [INFO] create_dataframe 100 of 13525 images processed
2021-05-18T13:05:33+1000 [INFO] create_dataframe 200 of 13525 images processed
2021-05-18T13:05:33+1000 [INFO] create_dataframe 300 of 13525 images processed
2021-05-18T13:05:33+1000 [INFO] create_dataframe 400 of 13525 images processed
2021-05-18T13:05:34+1000 [INFO] create_dataframe 500 of 13525 images processed
2021-05-18T13:05:34+1000 [INFO] create_dataframe 600 of 13525 images processed
2021-05-18T13:05:34+1000 [INFO] create_dataframe 700 of 13525 images processed
2021-05-18T13:05:34+1000 [INFO] create_dataframe 800 of 13525 images processed
2021-05-18T13:05:35+1000 [INFO] create_dataframe 900 of 13525 images processed
2021-05-18T13:05:35+1000 [INFO] create_dataframe 1000 of 13525 images processed
2021-05-18T13:05:35+1000 [INFO] create_dataframe 1100 of 13525 images processed
2021-05-18T13:05:36+1000 [INFO] create_dataframe 1200 of 13525 images

In [32]:
df

Unnamed: 0,imagefile,longitude,latitude,timestamp,time_diff,longitude_adjusted,latitude_adjusted
0,IMG_20210514_093415.jpg,144.802500,13.448056,2021-05-13 23:34:15,0,144.802500,13.448056
1,IMG_20210514_093416.jpg,144.802500,13.448056,2021-05-13 23:34:16,1,144.802500,13.448056
2,IMG_20210514_093417.jpg,144.802500,13.448056,2021-05-13 23:34:17,1,144.802500,13.448056
3,IMG_20210514_093418.jpg,144.802500,13.448056,2021-05-13 23:34:18,1,144.802500,13.448056
4,IMG_20210514_093419.jpg,144.802500,13.448056,2021-05-13 23:34:19,1,144.802500,13.448056
...,...,...,...,...,...,...,...
13520,IMG_20210514_150918.jpg,144.801667,13.464167,2021-05-14 05:09:18,1,144.801667,13.464167
13521,IMG_20210514_150919.jpg,144.801667,13.464167,2021-05-14 05:09:19,1,144.801667,13.464167
13522,IMG_20210514_150920.jpg,144.801667,13.464167,2021-05-14 05:09:20,1,144.801667,13.464167
13523,IMG_20210514_150921.jpg,144.801667,13.464167,2021-05-14 05:09:21,1,144.801667,13.464167


In [33]:
if MAKE_MAPS:
    fig = px.scatter_mapbox(df, lat="latitude", lon="longitude", title='Original coordinates', zoom=9)
    fig.update_layout(mapbox_style="open-street-map", margin={"r":0,"t":30,"l":0,"b":0})
    fig.show()

In [34]:
if MAKE_MAPS and ADJUST_COORDINATES:
    fig = px.scatter_mapbox(df, lat="latitude_adjusted", lon="longitude_adjusted", 
                            title='Adjusted coordinates', zoom=9)
    fig.update_layout(mapbox_style="open-street-map", margin={"r":0,"t":30,"l":0,"b":0})
    fig.show()    