In [1]:
import sys
import platform
import os
import random as rnd
from math import sqrt, sin, cos, radians
import numpy as np
import pandas as pd
from numba import jit
import numba
import numexpr as ne

# Software Versions and Machine Hardware

In [2]:
print("Python: {}\nNumpy: {}\nPandas: {}\nNumba: {}\nNumexpr: {}".format(sys.version,
                                                                         np.__version__,
                                                                         pd.__version__,
                                                                         numba.__version__,
                                                                         ne.__version__))
print("Processor: {}\n# of Cores: {}\nMachine: {}\nArchitecture: {}".format(platform.processor(),
                                                                            os.cpu_count(),
                                                                            platform.machine(),
                                                                            platform.architecture()))
#Note:
#Intel64 Family 6 Model 78 Stepping 3 = Intel(R) Core(TM) i5-6300U CPU @ 2.40GHz

Python: 3.5.2 |Anaconda 4.1.1 (64-bit)| (default, Jul  5 2016, 11:41:13) [MSC v.1900 64 bit (AMD64)]
Numpy: 1.11.1
Pandas: 0.18.1
Numba: 0.26.0
Numexpr: 2.6.0
Processor: Intel64 Family 6 Model 78 Stepping 3, GenuineIntel
# of Cores: 4
Machine: AMD64
Architecture: ('64bit', 'WindowsPE')


# Define Sample Data

List of tuples where list is 1,000,000 long and each tuple has 4 values defining the point.

In [3]:
size = 1000000
points = []
for i in range(size):
    points.append((rnd.uniform(-85,85),rnd.uniform(-175,175),rnd.uniform(-50,12000), rnd.uniform(-50,50)))
print ('{} of {} containing {}.'.format(type(points),type(points[0]),type(points[0][0])))

<class 'list'> of <class 'tuple'> containing <class 'float'>.


Numpy arrays, 4 arrays each 1,000,000 long.

In [4]:
#instead of using np.random.uniform to define these arrays, converting the points list
#so the same values are used for all methods
lat_np = np.empty(size)
long_np = np.empty(size)
alt_np = np.empty(size)
geoid_np = np.empty(size)
for i, point in enumerate(points):
    lat_np[i] = point[0]
    long_np[i] = point[1]
    alt_np[i] = point[2]
    geoid_np[i] = point[3]
print ('{} (x4) containing {}.'.format(type(lat_np),type(lat_np[0])))

<class 'numpy.ndarray'> (x4) containing <class 'numpy.float64'>.


Pandas DataFrame of the points.

In [5]:
points_df = pd.DataFrame(points, columns=['Lat','Long','Alt','Geoid'])
print ('{} containing {}.'.format(type(points_df),type(points_df.ix[0,0])))

<class 'pandas.core.frame.DataFrame'> containing <class 'numpy.float64'>.


In [6]:
#results df
timing_results = pd.DataFrame(columns=['Loops','Repeat','Best (sec)'])
def timing_results_filler (r):
    #Loops,Repeat,Best
    return [r.loops, r.repeat, r.best]

# Constants

In [7]:
a = 6378137  # Semi-major axis 
b = 6356752.3142  # Semi-minor axis
f = (a - b) / a  # flattening
e = 0.081819191 # eccentricity
ee = e*e

# Native Python Lists

In [8]:
def LLAtoXYZ_raw (latitude, longitude, altitude, geoidSepIn = 0):
    # LLAtoXYZ converts a position (latitude, longitude, altitude) to the ECEF X, Y, Z format.
    # The function expects Lat and Long in degrees and Alt in meters and outputs X, Y, Z in meters. 
    # geoidSepIn is the height of the geoid above the WGS84 ellipsoid. It is optional and assumed 
    # to be 0 if not provided (meaning geoid-to-ellipsoid differences are ignored)
    
    latitude_rad = radians(latitude)
    longitude_rad = radians(longitude)
    height = altitude - geoidSepIn
    cos_lat = cos(latitude_rad)
     
    r = a / sqrt(1 - ee * sin(latitude_rad) * sin(latitude_rad))
    x = (r + height) * cos_lat * cos(longitude_rad)
    y = (r + height) * cos_lat * sin(longitude_rad)
    z = ((1 - ee) * r + height) * sin(latitude_rad)
        
    return x,y,z
LLAtoXYZ_raw(-38.123456,-124.65432,230,-20)
#X= -2856867.422762463
#Y= -4132876.8004144537
#Z= -3916387.577890978

(-2856867.422762463, -4132876.8004144537, -3916387.577890978)

In [9]:
results = []
r = %timeit -n 1 -r 5 -o for p in points: results.append(LLAtoXYZ_raw(p[0],p[1],p[2],p[3]))
timing_results.loc['Native Python'] = timing_results_filler(r)

1 loop, best of 5: 1.97 s per loop


# Numpy Arrays

In [10]:
#import numpy functions directly
from numpy import sqrt, sin, cos, radians

In [11]:
def LLAtoXYZ_numpy (latitude, longitude, altitude, geoidSepIn = 0, return_array = False):
    # LLAtoXYZ converts a position (latitude, longitude, altitude) to the ECEF X, Y, Z format.
    # The function expects Lat and Long in degrees and Alt in meters and outputs X, Y, Z in meters. 
    # geoidSepIn is the height of the geoid above the WGS84 ellipsoid. It is optional and assumed 
    # to be 0 if not provided (meaning geoid-to-ellipsoid differences are ignored)
    
    latitude_rad = radians(latitude)
    longitude_rad = radians(longitude)
    height = altitude - geoidSepIn
    cos_lat = cos(latitude_rad)
     
    r = a / sqrt(1 - ee * sin(latitude_rad) * sin(latitude_rad))
    x = (r + height) * cos_lat * cos(longitude_rad)
    y = (r + height) * cos_lat * sin(longitude_rad)
    z = ((1 - ee) * r + height) * sin(latitude_rad)
    
    if return_array:
        return np.array([x,y,z])
    else:
        return x,y,z
    
LLAtoXYZ_numpy(-38.123456,-124.65432,230,-20)
#X= -2856867.422762463
#Y= -4132876.8004144537
#Z= -3916387.577890978

(-2856867.4227624629, -4132876.8004144537, -3916387.5778909782)

In [12]:
r = %timeit -n 1 -r 5 -o results=LLAtoXYZ_numpy(lat_np,long_np,alt_np,geoid_np,True)
timing_results.loc['Numpy'] = timing_results_filler(r)

1 loop, best of 5: 142 ms per loop


# Pandas (Serialized)

In [13]:
r = %timeit -n 1 -r 5 -o points_df['X'],points_df['Y'],points_df['Z']  = zip(*points_df.apply(lambda row: LLAtoXYZ_raw(row[0],row[1],row[2],row[3]), axis=1))
timing_results.loc['Pandas (Serialized)'] = timing_results_filler(r)

1 loop, best of 5: 59.7 s per loop


# Pandas (Vectorized)

In [14]:
r = %timeit -n 1 -r 5 -o points_df['X'],points_df['Y'],points_df['Z'] = LLAtoXYZ_numpy(points_df['Lat'],points_df['Long'],points_df['Alt'],points_df['Geoid'],False)
timing_results.loc['Pandas (Vectorized)'] = timing_results_filler(r)

1 loop, best of 5: 144 ms per loop


# Numba

In [15]:
@jit
def LLAtoXYZ_jit (latitude, longitude, altitude, geoidSepIn = 0, return_array = False):
    # LLAtoXYZ converts a position (latitude, longitude, altitude) to the ECEF X, Y, Z format.
    # The function expects Lat and Long in degrees and Alt in meters and outputs X, Y, Z in meters. 
    # geoidSepIn is the height of the geoid above the WGS84 ellipsoid. It is optional and assumed 
    # to be 0 if not provided (meaning geoid-to-ellipsoid differences are ignored)
    
    latitude_rad = radians(latitude)
    longitude_rad = radians(longitude)
    height = altitude - geoidSepIn
    cos_lat = cos(latitude_rad)
     
    r = a / np.sqrt(1 - ee * sin(latitude_rad) * sin(latitude_rad))
    x = (r + height) * cos_lat * cos(longitude_rad)
    y = (r + height) * cos_lat * sin(longitude_rad)
    z = ((1 - ee) * r + height) * sin(latitude_rad)
    
    if return_array:
        return np.array([x,y,z])
    else:
        return x,y,z
    
LLAtoXYZ_jit(-38.123456,-124.65432,230,-20)
#X= -2856867.422762463
#Y= -4132876.8004144537
#Z= -3916387.577890978

(-2856867.4227624629, -4132876.8004144537, -3916387.5778909782)

In [16]:
r = %timeit -n 1 -r 5 -o results = LLAtoXYZ_jit(lat_np,long_np,alt_np,geoid_np,True)
timing_results.loc['Numba with Numpy Arrays'] = timing_results_filler(r)

1 loop, best of 5: 138 ms per loop


In [17]:
r = %timeit -n 1 -r 5 -o points_df['X'],points_df['Y'],points_df['Z'] = LLAtoXYZ_jit(points_df['Lat'],points_df['Long'],points_df['Alt'],points_df['Geoid'],False)
timing_results.loc['Numba with Vectorized Pandas'] = timing_results_filler(r)

1 loop, best of 5: 147 ms per loop


# Numexpr

In [25]:
def LLAtoXYZ_ne (latitude, longitude, altitude, geoidSepIn = 0, return_array = False):
    # LLAtoXYZ converts a position (latitude, longitude, altitude) to the ECEF X, Y, Z format.
    # The function expects Lat and Long in degrees and Alt in meters and outputs X, Y, Z in meters. 
    # geoidSepIn is the height of the geoid above the WGS84 ellipsoid. It is optional and assumed 
    # to be 0 if not provided (meaning geoid-to-ellipsoid differences are ignored)
    
    latitude_rad = radians(latitude)
    longitude_rad = radians(longitude)
    height = altitude - geoidSepIn
     
    r = ne.evaluate("a / sqrt(1 - ee * sin(latitude_rad)**2)")
    x = ne.evaluate("(r + height) * cos(latitude_rad) * cos(longitude_rad)")
    y = ne.evaluate("(r + height) * cos(latitude_rad) * sin(longitude_rad)")
    z = ne.evaluate("((1 - ee) * r + height) * sin(latitude_rad)")
    
    if return_array:
        return np.array([x,y,z])
    else:
        return x,y,z
    
LLAtoXYZ_ne(-38.123456,-124.65432,230,-20)
#X= -2856867.422762463
#Y= -4132876.8004144537
#Z= -3916387.577890978

(array(-2856867.422762463),
 array(-4132876.8004144537),
 array(-3916387.577890978))

In [37]:
r = %timeit -n 1 -r 5 -o results = LLAtoXYZ_ne(lat_np,long_np,alt_np,geoid_np,True)
timing_results.loc['Numexpr with Numpy Arrays'] = timing_results_filler(r)

1 loop, best of 5: 44 ms per loop


In [38]:
r = %timeit -n 1 -r 5 -o points_df['X'],points_df['Y'],points_df['Z'] = LLAtoXYZ_ne(points_df['Lat'],points_df['Long'],points_df['Alt'],points_df['Geoid'],False)
timing_results.loc['Numexpr with Vectorized Pandas'] = timing_results_filler(r)

1 loop, best of 5: 46 ms per loop


# Summary

In [39]:
timing_results['Vs Native Python'] = timing_results.loc['Native Python']['Best (sec)'] / timing_results['Best (sec)']

In [40]:
timing_results.sort_values('Best (sec)')

Unnamed: 0,Loops,Repeat,Best (sec),Vs Native Python
Numexpr with Numpy Arrays,1.0,5.0,0.044016,44.866481
Numexpr with Vectorized Pandas,1.0,5.0,0.045973,42.956875
Numba with Numpy Arrays,1.0,5.0,0.137773,14.334156
Numpy,1.0,5.0,0.141659,13.940934
Pandas (Vectorized),1.0,5.0,0.144275,13.688123
Numba with Vectorized Pandas,1.0,5.0,0.147003,13.434113
Native Python,1.0,5.0,1.974855,1.0
Pandas (Serialized),1.0,5.0,59.717804,0.03307


Note: %timeit runs the function X number of loops and then repeats this for Y runs. It reports the average loop time (time for run / X) for the best of the Y runs. X is termed "Loops" and Y is termed "Repeat". 
Example: Loops = X = 1000 & Repeat = Y = 3. Best = 20ms. This would mean that it ran 3000 total loops (3 times of running 1000 loops) and the fastest of the 3 super cycles was 20,000 ms meaning the average time was 20ms.