In [1]:
from IPython.core.display import HTML
css_file = '../../../style/style03my.css'
HTML(open(css_file, "r").read())

> Content under [Creative Commons Attribution license CC-BY 4.0](http://creativecommons.org/licenses/by/4.0/), [code under MIT license (c)](http://en.wikipedia.org/wiki/MIT_License) 2016-2017 Sergio Rojas (srojas@usb.ve). 

<b> <center> 
# NumPy and its functionality
<font color='red'>
# Avoiding for loops
</font>
</center></b>

Performing repetitive tasks is at the heart of computer programs. An example of a repetitive task could be the execution of the operations for computing the [United Nations Human Development Index and its components](http://hdr.undp.org/en/composite/HDI) [http://hdr.undp.org/en/composite/HDI] for each country.

Such repetitive tasks are executed using loops. Two common ones in any modern computer language are ** *for loops* ** and ** *while loops* **.

The problem with user implementation of these loops using standard language instructions is that the computational task could be slow depending on the number of times the loop is repeated.

In this sense, NumPy provides ways to avoid computing using Python loops in many operations. In here we are going to see some of such ways that are useful for efficient computing with large data sets.


In [2]:
import numpy as np

In [3]:
x=np.array([0.0, 1.5, 3.0, 4.5, 6.0, ]);
y=np.array([0.0, 0.86, 1.71, 2.57, 3.43, 4.29, 5.14, 6.0]);

In [4]:
def xy_for(x,y):
    nrows = len(x); ncols = len(y)
    temp=np.empty((nrows,ncols))
    for i in range(nrows):
        for j in range(ncols):
            temp[i,j]=x[i]*y[j]
    return temp
print(xy_for(x,y))

[[  0.      0.      0.      0.      0.      0.      0.      0.   ]
 [  0.      1.29    2.565   3.855   5.145   6.435   7.71    9.   ]
 [  0.      2.58    5.13    7.71   10.29   12.87   15.42   18.   ]
 [  0.      3.87    7.695  11.565  15.435  19.305  23.13   27.   ]
 [  0.      5.16   10.26   15.42   20.58   25.74   30.84   36.   ]]


In [5]:
def xy_mat(x,y):
    X, Y  = np.meshgrid(y,x);
    return X*Y
print(xy_mat(x,y))

[[  0.      0.      0.      0.      0.      0.      0.      0.   ]
 [  0.      1.29    2.565   3.855   5.145   6.435   7.71    9.   ]
 [  0.      2.58    5.13    7.71   10.29   12.87   15.42   18.   ]
 [  0.      3.87    7.695  11.565  15.435  19.305  23.13   27.   ]
 [  0.      5.16   10.26   15.42   20.58   25.74   30.84   36.   ]]


## Timing execution using *timeit*
[https://docs.python.org/3/library/timeit.html](https://docs.python.org/3/library/timeit.html)

In [6]:
%timeit xy_mat(x,y)

The slowest run took 7.96 times longer than the fastest. This could mean that an intermediate result is being cached.
100000 loops, best of 3: 12.1 µs per loop


In [7]:
%timeit xy_for(x,y)

100000 loops, best of 3: 14.9 µs per loop


In [8]:
xx=np.append(x,x); yy=np.append(y,y);
xx=np.append(xx,x); yy=np.append(yy,y)
print('xlen = {0}; ylen = {1}'.format(len(xx),len(yy)))

xlen = 15; ylen = 24


In [9]:
%timeit xy_mat(xx,yy)

The slowest run took 4.43 times longer than the fastest. This could mean that an intermediate result is being cached.
100000 loops, best of 3: 14.4 µs per loop


In [10]:
%timeit xy_for(xx,yy)

10000 loops, best of 3: 116 µs per loop


## Avoid loops via matrix operations

In [11]:
temp=xy_mat(x,y)
print(temp)

[[  0.      0.      0.      0.      0.      0.      0.      0.   ]
 [  0.      1.29    2.565   3.855   5.145   6.435   7.71    9.   ]
 [  0.      2.58    5.13    7.71   10.29   12.87   15.42   18.   ]
 [  0.      3.87    7.695  11.565  15.435  19.305  23.13   27.   ]
 [  0.      5.16   10.26   15.42   20.58   25.74   30.84   36.   ]]


In [12]:
Z=temp.T
print(Z)

[[  0.      0.      0.      0.      0.   ]
 [  0.      1.29    2.58    3.87    5.16 ]
 [  0.      2.565   5.13    7.695  10.26 ]
 [  0.      3.855   7.71   11.565  15.42 ]
 [  0.      5.145  10.29   15.435  20.58 ]
 [  0.      6.435  12.87   19.305  25.74 ]
 [  0.      7.71   15.42   23.13   30.84 ]
 [  0.      9.     18.     27.     36.   ]]


In [13]:
Z[0]

array([ 0.,  0.,  0.,  0.,  0.])

In [14]:
Z[:,0]

array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.])

In [15]:
Z[0:3,
  1:3]

array([[ 0.   ,  0.   ],
       [ 1.29 ,  2.58 ],
       [ 2.565,  5.13 ]])

In [16]:
Zlr=np.fliplr(Z)
print(Zlr)

[[  0.      0.      0.      0.      0.   ]
 [  5.16    3.87    2.58    1.29    0.   ]
 [ 10.26    7.695   5.13    2.565   0.   ]
 [ 15.42   11.565   7.71    3.855   0.   ]
 [ 20.58   15.435  10.29    5.145   0.   ]
 [ 25.74   19.305  12.87    6.435   0.   ]
 [ 30.84   23.13   15.42    7.71    0.   ]
 [ 36.     27.     18.      9.      0.   ]]


In [17]:
Zud=np.flipud(Z)
print(Zud)

[[  0.      9.     18.     27.     36.   ]
 [  0.      7.71   15.42   23.13   30.84 ]
 [  0.      6.435  12.87   19.305  25.74 ]
 [  0.      5.145  10.29   15.435  20.58 ]
 [  0.      3.855   7.71   11.565  15.42 ]
 [  0.      2.565   5.13    7.695  10.26 ]
 [  0.      1.29    2.58    3.87    5.16 ]
 [  0.      0.      0.      0.      0.   ]]


In [18]:
XY = np.dot(Z.T,Z) # matrix multiplication
print(XY)

[[    0.         0.         0.         0.         0.    ]
 [    0.       231.4287   462.8574   694.2861   925.7148]
 [    0.       462.8574   925.7148  1388.5722  1851.4296]
 [    0.       694.2861  1388.5722  2082.8583  2777.1444]
 [    0.       925.7148  1851.4296  2777.1444  3702.8592]]


In [19]:
Z = 4*Z**2  + Z # element wise operations
print(Z)

[[    0.         0.         0.         0.         0.    ]
 [    0.         7.9464    29.2056    63.7776   111.6624]
 [    0.        28.8819   110.3976   244.5471   431.3304]
 [    0.        63.2991   245.4864   546.5619   966.5256]
 [    0.       111.0291   433.8264   968.3919  1714.7256]
 [    0.       172.0719   675.4176  1510.0371  2675.9304]
 [    0.       245.4864   966.5256  2163.1176  3835.2624]
 [    0.       333.      1314.      2943.      5220.    ]]


In [20]:
np.sum(Z)

28131.444

In [21]:
columnsum = np.sum(Z, axis = 0) #column wise sum
print(columnsum)

[     0.        961.7148   3774.8592   8439.4332  14955.4368]


In [22]:
rowsum = np.sum(Z, axis = 1) #row wise sum
print(rowsum)

[    0.      212.592   815.157  1821.873  3227.973  5033.457  7210.392
  9810.   ]


In [23]:
print(np.sum(columnsum))
print(np.sum(rowsum))

28131.444
28131.444


In [24]:
print(np.min(Z, axis = 0)); print(np.max(Z, axis = 0)); 

[ 0.  0.  0.  0.  0.]
[    0.   333.  1314.  2943.  5220.]


In [25]:
print(np.sum(Z, axis = 0)); print(np.mean(Z, axis = 0)); 

[     0.        961.7148   3774.8592   8439.4332  14955.4368]
[    0.        120.21435   471.8574   1054.92915  1869.4296 ]


In [26]:
print(np.var(Z, axis = 0)); print(np.std(Z, axis = 0))

[       0.            12797.61728942   199658.86718076  1002239.79394322
  3154134.37149216]
[    0.           113.12655431   446.83203464  1001.11927059  1775.98828022]


## Doing operations with Nan and Inf values

In [27]:
rowsum = np.sum(Z, axis = 1) #row wise sum
print(rowsum)

[    0.      212.592   815.157  1821.873  3227.973  5033.457  7210.392
  9810.   ]


In [28]:
tempvals = [Z[2,3], Z[4,3]]
print(Z[2,3]); print(Z[4,3])

244.5471
968.3919


In [29]:
Z[2,3] = np.nan ; Z[4,4] = np.inf
print(Z[2,3]); print(Z[4,4])

nan
inf


In [30]:
print(np.min(Z, axis = 0)); print(np.max(Z, axis = 0)); 

[  0.   0.   0.  nan   0.]
[    0.   333.  1314.    nan    inf]


In [31]:
print(np.sum(Z, axis = 0)); print(np.mean(Z, axis = 0)); 

[    0.       961.7148  3774.8592        nan        inf]
[   0.       120.21435  471.8574         nan        inf]


In [32]:
Z = np.ma.masked_array(Z, np.isnan(Z));
Z = np.ma.masked_array(Z, np.isinf(Z));
print(Z)

[[0.0 0.0 0.0 0.0 0.0]
 [0.0 7.946400000000001 29.205600000000004 63.7776 111.6624]
 [0.0 28.8819 110.3976 -- 431.3304]
 [0.0 63.29909999999998 245.48639999999995 546.5619 966.5255999999997]
 [0.0 111.02910000000001 433.8264000000001 968.3919 --]
 [0.0 172.07190000000003 675.4176000000001 1510.0371 2675.9304]
 [0.0 245.48639999999995 966.5255999999997 2163.1176 3835.262399999999]
 [0.0 333.0 1314.0 2943.0 5220.0]]


In [33]:
print(np.min(Z, axis = 0)); print(np.max(Z, axis = 0)); 

[0.0 0.0 0.0 0.0 0.0]
[0.0 333.0 1314.0 2943.0 5220.0]


In [34]:
print(np.sum(Z, axis = 0)); print(np.mean(Z, axis = 0)); 

[0.0 961.7148000000001 3774.8592 8194.8861 13240.7112]
[0.0 120.21435000000001 471.8574 1170.6980142857142 1891.5301714285713]


In [35]:
print(np.var(Z, axis = 0)); print(np.std(Z, axis = 0))

[       0.            12797.61728942   199658.86718076  1038197.46785965
  3600817.51393128]
[0.0 113.1265543072116 446.8320346402661 1018.9197553584138
 1897.582017708664]


To work with NaN values, an alternative to the presented method is that some functions can be preceded by "nan" like: ** *nanmedian, nanmean, nanstd,* ** and ** *nanvar* ** (other functions are listed in http://docs.scipy.org/doc/numpy-dev/reference/routines.statistics.html)

## Replacing Nan and Inf values

In [36]:
Z[2,3] = np.nan ; Z[4,4] = np.inf
print(Z[2,3]); print(Z[4,4])

nan
inf


In [37]:
print(np.where(np.isnan(Z))); print(np.where(np.isinf(Z)))

(array([2]), array([3]))
(array([4]), array([4]))


In [38]:
Z[np.where(np.isnan(Z))] = tempvals[0]
Z[np.where(np.isinf(Z))] = tempvals[1]
print(Z)

[[0.0 0.0 0.0 0.0 0.0]
 [0.0 7.946400000000001 29.205600000000004 63.7776 111.6624]
 [0.0 28.8819 110.3976 244.5471 431.3304]
 [0.0 63.29909999999998 245.48639999999995 546.5619 966.5255999999997]
 [0.0 111.02910000000001 433.8264000000001 968.3919 968.3919]
 [0.0 172.07190000000003 675.4176000000001 1510.0371 2675.9304]
 [0.0 245.48639999999995 966.5255999999997 2163.1176 3835.262399999999]
 [0.0 333.0 1314.0 2943.0 5220.0]]


In [39]:
#from IPython.display import HTML
from IPython.core.display import HTML, display
from IPython.display import IFrame


<b> References </b>

* ** Array manipulation routines **<br>
<http://docs.scipy.org/doc/numpy/reference/routines.array-manipulation.html>

In [40]:
HTML('<iframe src=http://docs.scipy.org/doc/numpy/reference/routines.array-manipulation.html width=700 height=350></iframe>')

* **Numpy Universal functions (ufunc)**<br>
<http://docs.scipy.org/doc/numpy/reference/ufuncs.html>

In [41]:
HTML('<iframe src=http://docs.scipy.org/doc/numpy/reference/ufuncs.html width=700 height=350></iframe>')

* ** NumPy Reference **<br>
<http://docs.scipy.org/doc/numpy/reference/index.html>

In [42]:
HTML('<iframe src=http://docs.scipy.org/doc/numpy/reference/index.html width=700 height=350></iframe>')

* ** The numpy.ma module **<br>
<http://docs.scipy.org/doc/numpy/reference/maskedarray.generic.html>

In [43]:
HTML('<iframe src=http://docs.scipy.org/doc/numpy/reference/maskedarray.generic.html width=700 height=350></iframe>')

> Content under [Creative Commons Attribution license CC-BY 4.0](http://creativecommons.org/licenses/by/4.0/), [code under MIT license (c)](http://en.wikipedia.org/wiki/MIT_License) 2016-2017 Sergio Rojas (srojas@usb.ve) 