In [1]:
import sys
import platform
import os
import random as rnd
from math import sqrt, sin, cos, radians
import numpy as np
import pandas as pd
from numba import jit
import numba
import numexpr as ne

# Software Versions and Machine Hardware

In [2]:
print("Python: {}\nNumpy: {}\nPandas: {}\nNumba: {}\nNumexpr: {}\n{}".format(sys.version,
                                                                             np.__version__,
                                                                             pd.__version__,
                                                                             numba.__version__,
                                                                             ne.__version__,
                                                                             ne.get_vml_version()))
print("Processor: {}\n# of Cores: {}\nMachine: {}\nArchitecture: {}".format(platform.processor(),
                                                                            os.cpu_count(),
                                                                            platform.machine(),
                                                                            platform.architecture()))
#Note:
#Intel64 Family 6 Model 78 Stepping 3 = Intel(R) Core(TM) i5-6300U CPU @ 2.40GHz

Python: 3.4.4 |Anaconda 2.3.0 (x86_64)| (default, Jan  9 2016, 17:30:09) 
[GCC 4.2.1 (Apple Inc. build 5577)]
Numpy: 1.10.4
Pandas: 0.18.0
Numba: 0.19.1
Numexpr: 2.5.2
Intel(R) Math Kernel Library Version 11.3.3 Product Build 20160413 for Intel(R) 64 architecture applications
Processor: i386
# of Cores: 4
Machine: x86_64
Architecture: ('64bit', '')


# Define Sample Data

List of tuples where list is 1,000,000 long and each tuple has 4 values defining the point.

In [3]:
size = 1000000
points = []
for i in range(size):
    points.append((rnd.uniform(-85,85),
                   rnd.uniform(-175,175),
                   rnd.uniform(-50,12000), 
                   rnd.uniform(-50,50)))
print ('{} of {} containing {}.'.format(type(points),type(points[0]),type(points[0][0])))

<class 'list'> of <class 'tuple'> containing <class 'float'>.


Numpy arrays, 4 arrays each 1,000,000 long.

In [4]:
#instead of using np.random.uniform to define these arrays, converting the points list
#so the same values are used for all methods
lat_np = np.empty(size, dtype=np.double)
long_np = np.empty(size, dtype=np.double)
alt_np = np.empty(size, dtype=np.double)
geoid_np = np.empty(size, dtype=np.double)
for i, point in enumerate(points):
    lat_np[i] = point[0]
    long_np[i] = point[1]
    alt_np[i] = point[2]
    geoid_np[i] = point[3]
print ('{} (x4) containing {}.'.format(type(lat_np),type(lat_np[0])))

<class 'numpy.ndarray'> (x4) containing <class 'numpy.float64'>.


Pandas DataFrame of the points.

In [5]:
points_df = pd.DataFrame(points, columns=['Lat','Long','Alt','Geoid'])
print ('{} containing {}.'.format(type(points_df),type(points_df.ix[0,0])))

<class 'pandas.core.frame.DataFrame'> containing <class 'numpy.float64'>.


In [6]:
#results df
timing_results = pd.DataFrame(columns=['Loops','Repeat','Best (sec)'])
def timing_results_filler (r):
    #Loops,Repeat,Best
    return [r.loops, r.repeat, r.best]

# Constants

In [7]:
a = 6378137  # Semi-major axis 
b = 6356752.3142  # Semi-minor axis
f = (a - b) / a  # flattening
e = 0.081819191 # eccentricity
ee = e*e

# Native Python Lists

In [8]:
def LLAtoXYZ_raw (latitude, longitude, altitude, geoidSepIn = 0):
    # LLAtoXYZ converts a position (latitude, longitude, altitude) to the ECEF X, Y, Z format.
    # The function expects Lat and Long in degrees and Alt in meters and outputs X, Y, Z in meters. 
    # geoidSepIn is the height of the geoid above the WGS84 ellipsoid. It is optional and assumed 
    # to be 0 if not provided (meaning geoid-to-ellipsoid differences are ignored)
    
    latitude_rad = radians(latitude)
    longitude_rad = radians(longitude)
    height = altitude - geoidSepIn
    cos_lat = cos(latitude_rad)
     
    r = a / sqrt(1 - ee * sin(latitude_rad) * sin(latitude_rad))
    x = (r + height) * cos_lat * cos(longitude_rad)
    y = (r + height) * cos_lat * sin(longitude_rad)
    z = ((1 - ee) * r + height) * sin(latitude_rad)
        
    return x,y,z
LLAtoXYZ_raw(-38.123456,-124.65432,230,-20)
#X= -2856867.422762463
#Y= -4132876.8004144537
#Z= -3916387.577890978

(-2856867.422762463, -4132876.8004144537, -3916387.577890978)

In [10]:
results = []
r = %timeit -n 1 -r 5 -o for p in points: results.append(LLAtoXYZ_raw(p[0],p[1],p[2],p[3]))
timing_results.loc['Native Python'] = timing_results_filler(r)

1 loop, best of 5: 1.8 s per loop


# Numpy Arrays

In [11]:
#import numpy functions directly
from numpy import sqrt, sin, cos, radians

In [12]:
def LLAtoXYZ_numpy (latitude, longitude, altitude, geoidSepIn = 0, return_array = False):
    # LLAtoXYZ converts a position (latitude, longitude, altitude) to the ECEF X, Y, Z format.
    # The function expects Lat and Long in degrees and Alt in meters and outputs X, Y, Z in meters. 
    # geoidSepIn is the height of the geoid above the WGS84 ellipsoid. It is optional and assumed 
    # to be 0 if not provided (meaning geoid-to-ellipsoid differences are ignored)
    
    latitude_rad = radians(latitude)
    longitude_rad = radians(longitude)
    height = altitude - geoidSepIn
    cos_lat = cos(latitude_rad)
     
    r = a / sqrt(1 - ee * sin(latitude_rad) * sin(latitude_rad))
    x = (r + height) * cos_lat * cos(longitude_rad)
    y = (r + height) * cos_lat * sin(longitude_rad)
    z = ((1 - ee) * r + height) * sin(latitude_rad)
    
    if return_array:
        return np.array([x,y,z])
    else:
        return x,y,z
    
LLAtoXYZ_numpy(-38.123456,-124.65432,230,-20)
#X= -2856867.422762463
#Y= -4132876.8004144537
#Z= -3916387.577890978

(-2856867.4227624629, -4132876.8004144537, -3916387.5778909782)

In [13]:
r = %timeit -n 1 -r 5 -o results=LLAtoXYZ_numpy(lat_np,long_np,alt_np,geoid_np,True)
timing_results.loc['Numpy'] = timing_results_filler(r)

1 loop, best of 5: 189 ms per loop


# Pandas (Serialized)
added to show how slow pandas can be if not vectorized

In [14]:
#r = %timeit -n 1 -r 5 -o points_df['X'],points_df['Y'],points_df['Z'] = zip(*points_df.apply(lambda row: LLAtoXYZ_raw(row[0],row[1],row[2],row[3]), axis=1))
#timing_results.loc['Pandas (Serialized)'] = timing_results_filler(r)

# Pandas (Vectorized)

In [15]:
r = %timeit -n 1 -r 5 -o points_df['X'],points_df['Y'],points_df['Z'] = LLAtoXYZ_numpy(points_df['Lat'],points_df['Long'],points_df['Alt'],points_df['Geoid'],False)
timing_results.loc['Pandas (Vectorized)'] = timing_results_filler(r)

1 loop, best of 5: 188 ms per loop


# Numba

In [16]:
@jit
def LLAtoXYZ_jit (latitude, longitude, altitude, geoidSepIn = 0, return_array = False):
    # LLAtoXYZ converts a position (latitude, longitude, altitude) to the ECEF X, Y, Z format.
    # The function expects Lat and Long in degrees and Alt in meters and outputs X, Y, Z in meters. 
    # geoidSepIn is the height of the geoid above the WGS84 ellipsoid. It is optional and assumed 
    # to be 0 if not provided (meaning geoid-to-ellipsoid differences are ignored)
    
    latitude_rad = radians(latitude)
    longitude_rad = radians(longitude)
    height = altitude - geoidSepIn
    cos_lat = cos(latitude_rad)
     
    r = a / np.sqrt(1 - ee * sin(latitude_rad) * sin(latitude_rad))
    x = (r + height) * cos_lat * cos(longitude_rad)
    y = (r + height) * cos_lat * sin(longitude_rad)
    z = ((1 - ee) * r + height) * sin(latitude_rad)
    
    if return_array:
        return np.array([x,y,z])
    else:
        return x,y,z
    
LLAtoXYZ_jit(-38.123456,-124.65432,230,-20)
#X= -2856867.422762463
#Y= -4132876.8004144537
#Z= -3916387.577890978

(-2856867.4227624629, -4132876.8004144537, -3916387.5778909782)

In [17]:
r = %timeit -n 1 -r 5 -o results = LLAtoXYZ_jit(lat_np,long_np,alt_np,geoid_np,True)
timing_results.loc['Numba with Numpy Arrays'] = timing_results_filler(r)

1 loop, best of 5: 182 ms per loop


In [18]:
r = %timeit -n 1 -r 5 -o points_df['X'],points_df['Y'],points_df['Z'] = LLAtoXYZ_jit(points_df['Lat'],points_df['Long'],points_df['Alt'],points_df['Geoid'],False)
timing_results.loc['Numba with Vectorized Pandas'] = timing_results_filler(r)

1 loop, best of 5: 180 ms per loop


# Numexpr

In [19]:
def LLAtoXYZ_ne (latitude, longitude, altitude, geoidSepIn = 0, return_array = False):
    # LLAtoXYZ converts a position (latitude, longitude, altitude) to the ECEF X, Y, Z format.
    # The function expects Lat and Long in degrees and Alt in meters and outputs X, Y, Z in meters. 
    # geoidSepIn is the height of the geoid above the WGS84 ellipsoid. It is optional and assumed 
    # to be 0 if not provided (meaning geoid-to-ellipsoid differences are ignored)
    
    latitude_rad = radians(latitude)
    longitude_rad = radians(longitude)
    height = altitude - geoidSepIn
     
    r = ne.evaluate("a / sqrt(1 - ee * sin(latitude_rad)**2)")
    x = ne.evaluate("(r + height) * cos(latitude_rad) * cos(longitude_rad)")
    y = ne.evaluate("(r + height) * cos(latitude_rad) * sin(longitude_rad)")
    z = ne.evaluate("((1 - ee) * r + height) * sin(latitude_rad)")
    
    if return_array:
        return np.array([x,y,z])
    else:
        return x,y,z
    
LLAtoXYZ_ne(-38.123456,-124.65432,230,-20)
#X= -2856867.422762463
#Y= -4132876.8004144537
#Z= -3916387.577890978

(array(-2856867.422762463),
 array(-4132876.8004144537),
 array(-3916387.577890978))

In [20]:
r = %timeit -n 1 -r 5 -o results = LLAtoXYZ_ne(lat_np,long_np,alt_np,geoid_np,True)
timing_results.loc['Numexpr with Numpy Arrays'] = timing_results_filler(r)

1 loop, best of 5: 43.6 ms per loop


In [21]:
r = %timeit -n 1 -r 5 -o points_df['X'],points_df['Y'],points_df['Z'] = LLAtoXYZ_ne(points_df['Lat'],points_df['Long'],points_df['Alt'],points_df['Geoid'],False)
timing_results.loc['Numexpr with Vectorized Pandas'] = timing_results_filler(r)

1 loop, best of 5: 53.6 ms per loop


# Cython

In [22]:
%load_ext Cython

In [23]:
%%cython
from cython cimport boundscheck, wraparound
from libc.math cimport sin, cos, sqrt, M_PI
import numpy as np
cimport numpy as cnp

cdef double geoidSep = -29.701  # meters 

cdef double a = 6378137       # Semi-major axis 
cdef double b = 6356752.3142  # Semi-minor axis
cdef double f = (a - b) / a   # flattening
cdef double e = 0.081819191
cdef double ecc = e*e
cdef double radians = M_PI/180.0

@boundscheck(False)
@wraparound(False)
def LLAtoXYZ_cy(double[:] latitude, 
                double[:] longitude, 
                double[:] altitude, 
                double[:] geoidSepIn):
    cdef int i
    cdef int l = len(latitude)
    
    cdef double[:] x = np.empty(l, dtype=np.double)
    cdef double[:] y = np.empty(l, dtype=np.double)
    cdef double[:] z = np.empty(l, dtype=np.double)
    
    cdef double latitude_rad
    cdef double longitude_rad
    cdef double height
    cdef double s_lat
    cdef double c_lat
    cdef double r
    
    for i in range(l):
        latitude_rad = radians*latitude[i]
        longitude_rad = radians*longitude[i]
        height = altitude[i] - geoidSepIn[i]
        s_lat  = sin(latitude_rad)
        c_lat  = cos(latitude_rad)
         
        r = a / sqrt(1 - ecc * s_lat * s_lat)
        x[i] = (r + height) * c_lat * cos(longitude_rad)
        y[i] = (r + height) * c_lat * sin(longitude_rad)
        z[i] = ((1-ecc) * r + height) * s_lat
        
    return x,y,z

  warn("get_ipython_cache_dir has moved to the IPython.paths module")


In [24]:
results_c = LLAtoXYZ_cy(np.array([-38.123456], dtype=np.double),
                     np.array([-124.65432], dtype=np.double),
                     np.array([230], dtype=np.double),
                       np.array([-20], dtype=np.double))
print (results_c[0][0], results_c[1][0], results_c[2][0])
#X= -2856867.422762463
#Y= -4132876.8004144537
#Z= -3916387.577890978)

-2856867.422762463 -4132876.8004144537 -3916387.577890978


In [25]:
r = %timeit -n 1 -r 5 -o results=LLAtoXYZ_cy(lat_np,long_np,alt_np,geoid_np)
timing_results.loc['Cython'] = timing_results_filler(r)

1 loop, best of 5: 68.8 ms per loop


# Summary

In [26]:
timing_results['Vs Native Python'] = timing_results.loc['Native Python']['Best (sec)'] / timing_results['Best (sec)']

In [27]:
timing_results.sort_values('Best (sec)')

Unnamed: 0,Loops,Repeat,Best (sec),Vs Native Python
Numexpr with Numpy Arrays,1.0,5.0,0.043557,41.261702
Numexpr with Vectorized Pandas,1.0,5.0,0.05364,33.505603
Cython,1.0,5.0,0.068755,26.139951
Numba with Vectorized Pandas,1.0,5.0,0.179867,9.99209
Numba with Numpy Arrays,1.0,5.0,0.181816,9.884963
Pandas (Vectorized),1.0,5.0,0.187702,9.575012
Numpy,1.0,5.0,0.188832,9.517707
Native Python,1.0,5.0,1.797244,1.0
