In [6]:
# Import dependencies
import pandas as pd
import numpy as np

In [7]:
# Universal Functions
# Computation on NumPy arrays can be very fast, or it can be very slow.
# Optimizing the computation of Vectorized operations is through use of NumPy's universal
# function (ufuncs)

# Python's default implementation, CPython does some operations very slowly.
# other implementations have been attempted to address this such as PyPI project,
# Cython, and Numba project.

In [8]:
# Timing Code
import numpy as np
rng = np.random.default_rng(seed=1000)

def compute_reciprocals(values):
    output = np.empty(len(values))
    for i in range(len(values)):
        output[i] = 1.0 / values[i]
    return output

values = rng.integers(1, 10, size=5)
compute_reciprocals(values)

array([0.5       , 0.2       , 0.125     , 0.16666667, 0.125     ])

In [15]:
# This implementation probably feels fairly natural.
bigArray = rng.integers(1, 100, size=1_000_000)
%timeit compute_reciprocals(bigArray)

3.72 s ± 83.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [16]:
compute_reciprocals(bigArray)

array([0.01587302, 0.02439024, 0.01111111, ..., 0.01470588, 0.0212766 ,
       0.01470588])

In [10]:
compute_reciprocals(values)

array([0.5       , 0.2       , 0.125     , 0.16666667, 0.125     ])

In [11]:
print(1.0/values)

[0.5        0.2        0.125      0.16666667 0.125     ]


In [13]:
%timeit (1.0/bigArray)

6.08 ms ± 129 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [20]:
# Looking at the execution time for our big array, we see that it completes orders of
# magnitude faster than the Python loop:

# Vectorized operations in NumPy are implemented via ufuncs, whose main purpose
# is to quickly execute operations on values in NumPy arrays.

In [21]:
# operation between arrays
np.arange(5) / np.arange(1, 6)

array([0.        , 0.5       , 0.66666667, 0.75      , 0.8       ])

In [25]:
# N-dimensions
x = np.arange(9).reshape((3, 3))
2**x

array([[  1,   2,   4],
       [  8,  16,  32],
       [ 64, 128, 256]])

In [28]:
x = np.arange(5)
y = np.empty(5)
np.multiply(x, 10, out=y)
print(y)

[ 0. 10. 20. 30. 40.]


In [34]:
y = np.zeros(10)
np.power(2, x, out=y[::2])
y

array([ 1.,  0.,  2.,  0.,  4.,  0.,  8.,  0., 16.,  0.])

In [32]:
y[::2] = 2**x

In [33]:
y

array([ 1.,  0.,  2.,  0.,  4.,  0.,  8.,  0., 16.,  0.])

In [None]:
# Pandas inherits element-wise (such as addition, subtraction, multiplication etc) and unary operations (such as
#  negation, trigomentry, exponential, etc ...) from NumPy.

# For unary operations, Pandas preserves index and column labels.
# For binary operations, Pandas will automatically align indices when pssing
# the objects to the ufunc.

In [35]:
# Unfuncs: Index Preservation
rng = np.random.default_rng(42)
ser = pd.Series(rng.integers(0, 10, 4))
ser

0    0
1    7
2    6
3    4
dtype: int64

In [36]:
df = pd.DataFrame(rng.integers(0, 10, (3, 4)),
                  columns=['A', 'B', 'C', 'D'])
df

Unnamed: 0,A,B,C,D
0,4,8,0,6
1,2,0,5,9
2,7,7,7,7


In [37]:
# Applying a NumPy ufunc
np.exp(ser)

0       1.000000
1    1096.633158
2     403.428793
3      54.598150
dtype: float64

In [40]:
np.sin(df*np.pi / 4)

Unnamed: 0,A,B,C,D
0,1.224647e-16,-2.449294e-16,0.0,-1.0
1,1.0,0.0,-0.707107,0.707107
2,-0.7071068,-0.7071068,-0.707107,-0.707107


In [41]:
# Index alignment
# Pandas align inputs in the process of performing the operation
# Convenient when working with incomplete data

# General Elections 1996
votes = pd.Series(
    {'Apac': 169_583,
     'Arua': 214_680,
     'Bundibugyo': 53_819,
     'Gulu': 129_200,
    }
)

validVotes = pd.Series(
    {'Apac': 159_332,
     'Arua': 201_446,
     'Bundibugyo': 52_463,
     'Bushenyi': 248_730,
     'Gulu': 123_134,
    }
)

invalidVotes = votes - validVotes
invalidVotes    

Apac          10251.0
Arua          13234.0
Bundibugyo     1356.0
Bushenyi          NaN
Gulu           6066.0
dtype: float64