In [1]:
import sys
import platform
import os
import random as rnd
import math
import numpy as np
import pandas as pd
from numba import jit
import numba
import numexpr as ne

# Software Versions and Machine Hardware

In [2]:
print("Python: {}\nNumpy: {}\nPandas: {}\nNumba: {}\nNumexpr: {}".format(sys.version,
                                                                         np.__version__,
                                                                         pd.__version__,
                                                                         numba.__version__,
                                                                         ne.__version__))
print("Processor: {}\n# of Cores: {}\nMachine: {}\nArchitecture: {}".format(platform.processor(),
                                                                            os.cpu_count(),
                                                                            platform.machine(),
                                                                            platform.architecture()))
#Note:
#Intel64 Family 6 Model 78 Stepping 3 = Intel(R) Core(TM) i5-6300U CPU @ 2.40GHz

Python: 3.5.2 |Anaconda 4.1.1 (64-bit)| (default, Jul  5 2016, 11:41:13) [MSC v.1900 64 bit (AMD64)]
Numpy: 1.11.1
Pandas: 0.18.1
Numba: 0.26.0
Numexpr: 2.6.0
Processor: Intel64 Family 6 Model 78 Stepping 3, GenuineIntel
# of Cores: 4
Machine: AMD64
Architecture: ('64bit', 'WindowsPE')


# Define Sample Data

List of tuples where list is 1,000,000 long and each tuple has 4 values defining the point.

In [3]:
size=1000000
points = []
for i in range(size):
    points.append((rnd.uniform(-85,85),rnd.uniform(-175,175),rnd.uniform(-50,12000), rnd.uniform(-50,50)))
print ('{} of {} containing {}.'.format(type(points),type(points[0]),type(points[0][0])))

<class 'list'> of <class 'tuple'> containing <class 'float'>.


Numpy arrays, 4 arrays each 1000 long.

In [4]:
#instead of using np.random.uniform to define these arrays, converting the points list
#so the same values are used for all methods
lat_np = np.empty(size)
long_np = np.empty(size)
alt_np = np.empty(size)
geoid_np = np.empty(size)
for i, point in enumerate(points):
    lat_np[i] = point[0]
    long_np[i] = point[1]
    alt_np[i] = point[2]
    geoid_np[i] = point[3]
print ('{} (x4) containing {}.'.format(type(lat_np),type(lat_np[0])))

<class 'numpy.ndarray'> (x4) containing <class 'numpy.float64'>.


Pandas DataFrame that is 1000 x 4

In [5]:
points_df = pd.DataFrame(points, columns=['Lat','Long','Alt','Geoid'])
print ('{} containing {}.'.format(type(points_df),type(points_df.ix[0,0])))

<class 'pandas.core.frame.DataFrame'> containing <class 'numpy.float64'>.


In [6]:
#results df
timing_results = pd.DataFrame(columns=['Loops','Repeat','Best (sec)'])
def timing_results_filler (r):
    #Loops,Repeat,Best
    return [r.loops, r.repeat, r.best]

# Native Python Lists

In [7]:
a = 6378137  # Semi-major axis 
b = 6356752.3142  # Semi-minor axis
f = (a - b) / a  # flattening
e = 0.081819191

def LLAtoXYZ_raw (latitude, longitude, altitude, geoidSepIn = 0):
    # LLAtoXYZ converts a position (latitude, longitude, altitude) to the ECEF X, Y, Z format.
    # The function expects Lat and Long in degrees and Alt in meters and outputs X, Y, Z in meters. 
    # geoidSepIn is the height of the geoid above the WGS84 ellipsoid. It is optional and assumed 
    # to be 0 if not provided (meaning geoid-to-ellipsoid differences are ignored)
    
    latitude_rad = math.radians(latitude)
    longitude_rad = math.radians(longitude)
    height = altitude - geoidSepIn
     
    r = a / math.sqrt(1 - math.pow(e, 2) * math.pow(math.sin(latitude_rad), 2))
    x = (r + height) * math.cos(latitude_rad) * math.cos(longitude_rad)
    y = (r + height) * math.cos(latitude_rad) * math.sin(longitude_rad)
    z = ((1 - math.pow(e, 2)) * r + height) * math.sin(latitude_rad)
        
    return x,y,z
LLAtoXYZ_raw(-38.123456,-124.65432,230,-20)
#X= -2856867.422762463
#Y= -4132876.8004144537
#Z= -3916387.577890978

(-2856867.422762463, -4132876.8004144537, -3916387.577890978)

In [8]:
results = []
r = %timeit -n 1 -r 5 -o for p in points: results.append(LLAtoXYZ_raw(p[0],p[1],p[2],p[3]))
timing_results.loc['Native Python'] = timing_results_filler(r)

1 loop, best of 5: 3.1 s per loop


# Numpy Arrays

In [9]:
def LLAtoXYZ_numpy (latitude, longitude, altitude, geoidSepIn = 0, return_array = False):
    # LLAtoXYZ converts a position (latitude, longitude, altitude) to the ECEF X, Y, Z format.
    # The function expects Lat and Long in degrees and Alt in meters and outputs X, Y, Z in meters. 
    # geoidSepIn is the height of the geoid above the WGS84 ellipsoid. It is optional and assumed 
    # to be 0 if not provided (meaning geoid-to-ellipsoid differences are ignored)
    
    latitude_rad = np.radians(latitude)
    longitude_rad = np.radians(longitude)
    height = altitude - geoidSepIn
     
    r = a / np.sqrt(1 - np.power(e, 2) * np.power(np.sin(latitude_rad), 2))
    x = (r + height) * np.cos(latitude_rad) * np.cos(longitude_rad)
    y = (r + height) * np.cos(latitude_rad) * np.sin(longitude_rad)
    z = ((1 - np.power(e, 2)) * r + height) * np.sin(latitude_rad)
    
    if return_array:
        return np.array([x,y,z])
    else:
        return x,y,z
    
LLAtoXYZ_numpy(-38.123456,-124.65432,230,-20)
#X= -2856867.422762463
#Y= -4132876.8004144537
#Z= -3916387.577890978

(-2856867.4227624629, -4132876.8004144537, -3916387.5778909782)

In [10]:
r = %timeit -n 1 -r 5 -o results=LLAtoXYZ_numpy(lat_np,long_np,alt_np,geoid_np,True)
timing_results.loc['Numpy'] = timing_results_filler(r)

1 loop, best of 5: 186 ms per loop


# Pandas (Serialized)

In [11]:
r = %timeit -n 1 -r 5 -o points_df['X'],points_df['Y'],points_df['Z']  = zip(*points_df.apply(lambda row: LLAtoXYZ_raw(row[0],row[1],row[2],row[3]), axis=1))
timing_results.loc['Pandas (Serialized)'] = timing_results_filler(r)

1 loop, best of 5: 52.2 s per loop


# Pandas (Vectorized)

In [12]:
r = %timeit -n 1 -r 5 -o points_df['X'],points_df['Y'],points_df['Z'] = LLAtoXYZ_numpy(points_df['Lat'],points_df['Long'],points_df['Alt'],points_df['Geoid'],False)
timing_results.loc['Pandas (Vectorized)'] = timing_results_filler(r)

1 loop, best of 5: 185 ms per loop


# Numba

In [13]:
@jit
def LLAtoXYZ_jit (latitude, longitude, altitude, geoidSepIn = 0, return_array = False):
    # LLAtoXYZ converts a position (latitude, longitude, altitude) to the ECEF X, Y, Z format.
    # The function expects Lat and Long in degrees and Alt in meters and outputs X, Y, Z in meters. 
    # geoidSepIn is the height of the geoid above the WGS84 ellipsoid. It is optional and assumed 
    # to be 0 if not provided (meaning geoid-to-ellipsoid differences are ignored)
    
    latitude_rad = np.radians(latitude)
    longitude_rad = np.radians(longitude)
    height = altitude - geoidSepIn
     
    r = a / np.sqrt(1 - np.power(e, 2) * np.power(np.sin(latitude_rad), 2))
    x = (r + height) * np.cos(latitude_rad) * np.cos(longitude_rad)
    y = (r + height) * np.cos(latitude_rad) * np.sin(longitude_rad)
    z = ((1 - np.power(e, 2)) * r + height) * np.sin(latitude_rad)
    
    if return_array:
        return np.array([x,y,z])
    else:
        return x,y,z
    
LLAtoXYZ_jit(-38.123456,-124.65432,230,-20)
#X= -2856867.422762463
#Y= -4132876.8004144537
#Z= -3916387.577890978

(-2856867.4227624629, -4132876.8004144537, -3916387.5778909782)

In [14]:
r = %timeit -n 1 -r 5 -o results = LLAtoXYZ_jit(lat_np,long_np,alt_np,geoid_np,True)
timing_results.loc['Numba with Numpy Arrays'] = timing_results_filler(r)

1 loop, best of 5: 183 ms per loop


In [15]:
r = %timeit -n 1 -r 5 -o points_df['X'],points_df['Y'],points_df['Z'] = LLAtoXYZ_jit(points_df['Lat'],points_df['Long'],points_df['Alt'],points_df['Geoid'],False)
timing_results.loc['Numba with Vectorized Pandas'] = timing_results_filler(r)

1 loop, best of 5: 183 ms per loop


# Numexpr

In [16]:
def LLAtoXYZ_ne (latitude, longitude, altitude, geoidSepIn = 0, return_array = False):
    # LLAtoXYZ converts a position (latitude, longitude, altitude) to the ECEF X, Y, Z format.
    # The function expects Lat and Long in degrees and Alt in meters and outputs X, Y, Z in meters. 
    # geoidSepIn is the height of the geoid above the WGS84 ellipsoid. It is optional and assumed 
    # to be 0 if not provided (meaning geoid-to-ellipsoid differences are ignored)
    
    latitude_rad = np.radians(latitude)
    longitude_rad = np.radians(longitude)
    height = altitude - geoidSepIn
     
    r = ne.evaluate("a / sqrt(1 - e**2 * sin(latitude_rad)**2)")
    x = ne.evaluate("(r + height) * cos(latitude_rad) * cos(longitude_rad)")
    y = ne.evaluate("(r + height) * cos(latitude_rad) * sin(longitude_rad)")
    z = ne.evaluate("((1 - e**2) * r + height) * sin(latitude_rad)")
    
    if return_array:
        return np.array([x,y,z])
    else:
        return x,y,z
    
LLAtoXYZ_ne(-38.123456,-124.65432,230,-20)
#X= -2856867.422762463
#Y= -4132876.8004144537
#Z= -3916387.577890978

(array(-2856867.422762463),
 array(-4132876.8004144537),
 array(-3916387.577890978))

In [17]:
r = %timeit -n 1 -r 5 -o results = LLAtoXYZ_ne(lat_np,long_np,alt_np,geoid_np,True)
timing_results.loc['Numexpr with Numpy Arrays'] = timing_results_filler(r)

1 loop, best of 5: 39.9 ms per loop


In [18]:
r = %timeit -n 1 -r 5 -o points_df['X'],points_df['Y'],points_df['Z'] = LLAtoXYZ_ne(points_df['Lat'],points_df['Long'],points_df['Alt'],points_df['Geoid'],False)
timing_results.loc['Numexpr with Vectorized Pandas'] = timing_results_filler(r)

1 loop, best of 5: 46.5 ms per loop


# Summary
Method|Timing (µs)|Vs Native Python
---|---:|---:
Native Python|2990|N/A
Numpy|193|15.5X faster
Pandas (Serialized)|61400|20.5X *slower*
Pandas (Vectorized)|2570|1.2X faster
Numba with Numpy Arrays|187|16.0X faster
Numba with Vectorized Pandas|2190|1.4X faster
Numexpr with Numpy Arrays|151|19.8X faster
Numexpr with Vectorized Pandas|960|3.1X faster

In [24]:
timing_results['Vs Native Python'] = timing_results.loc['Native Python']['Best'] / timing_results['Best']

In [25]:
timing_results.sort_values('Best')

Unnamed: 0,Loops,Repeat,Best,Vs Native Python
Numexpr with Numpy Arrays,1.0,5.0,0.039934,77.726683
Numexpr with Vectorized Pandas,1.0,5.0,0.046478,66.783515
Numba with Vectorized Pandas,1.0,5.0,0.18267,16.992103
Numba with Numpy Arrays,1.0,5.0,0.182699,16.989322
Pandas (Vectorized),1.0,5.0,0.184662,16.808798
Numpy,1.0,5.0,0.186104,16.678508
Native Python,1.0,5.0,3.10394,1.0
Pandas (Serialized),1.0,5.0,52.20279,0.059459


Note: %timeit runs the function X number of loops and then repeats this for Y runs. It reports the average loop time (time for run / X) for the best of the Y runs. X is termed "Loops" and Y is termed "Repeat". 
Example: Loops = X = 1000 & Repeat = Y = 3. Best = 20ms. This would mean that it ran 3000 total loops (3 times of running 1000 loops) and the fastest of the 3 super cycles was 20,000 ms meaning the average time was 20ms.