In [1]:
# Notebook Package Importing
import numpy as np

# NUMPY

 - Numpy provides python with its numerical muscle. 
 - This is your go-to package. 
 - The package is written in C and made to deal with N-dimensional arrays, all basic mathematical operations, linear algebra operations, et cetera. 
 - We will not be going through all of the power of this module. For more 
 https://numpy.org/doc/stable/reference/index.html

Numpy arrays are the base object containing a variety of powerful methods. <br>
Making a Numpy array is easy:


In [2]:
array1 = np.array([1,2,3])

Note that it is np.array([1,2,3]) <br>
not np.array[1, 2, 3]

In [3]:
# You can convert Lists to NP Arrays
# and believe me, you will convert lots of lists to NP arrays
list1 = [1.0, 2.0, 3.0]

In [4]:
array2 = np.array(list1)

All data in a Numpy array must be of a single data type (dtype). <br>
Numpy has a large number of possible data types:

- np.str ==> string
- np.bool ==> boolean (i.e., True|False)
- np.int ==> integer
- np.float ==> floating point
- np.complex ==> complex (i.e., 1+1j)

In [5]:
# currently, array1 is an integer array
# if we want to convert that integer array into a float array, 
# we need to use an associated function (with an underscore). For example
array3 = np.float_(array1)

In [6]:
print(array1)
print(array3)

[1 2 3]
[1. 2. 3.]


## Multidimensionality

Numpy arrays can be N-dimensional, which is of particular use with tables of data (i.e. 2-D)

In [7]:
# Creating a 3x2 Array:
array4 = np.array( [[1, 2], [3, 4], [5, 6]])
print(array4)

[[1 2]
 [3 4]
 [5 6]]


In [8]:
array4.shape

(3, 2)

In [9]:
array4.size

6

In [10]:
array4[:,1]

array([2, 4, 6])

In [11]:
array4[1,:]

array([3, 4])

In [12]:
array4.flatten()

array([1, 2, 3, 4, 5, 6])

In [13]:
array4.reshape((2,3))

array([[1, 2, 3],
       [4, 5, 6]])

In [14]:
array4.reshape((-1,1))

array([[1],
       [2],
       [3],
       [4],
       [5],
       [6]])

# Special Array Creation Functions

In [15]:
array5 = np.arange(10)
print(array5)

[0 1 2 3 4 5 6 7 8 9]


In [16]:
array6 = np.arange(0,51,5)
print(array6)

[ 0  5 10 15 20 25 30 35 40 45 50]


In [17]:
array7 = np.ones(10)
print(array7)

[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]


In [18]:
array8 = np.zeros((3,5))
print(array8)

[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]


In [19]:
array9 = np.identity(6)
print(array9)

[[1. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 1.]]


## Some Built-in Numpy Functionality

In [20]:
np.arange(9).reshape((3,3))

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [21]:
array6

array([ 0,  5, 10, 15, 20, 25, 30, 35, 40, 45, 50])

In [22]:
array6.min(), array6.max()

(0, 50)

In [23]:
array1 = np.arange(1,10,1.0)
array1.mean(), array1.sum(), array1.prod()

(5.0, 45.0, 362880.0)

In [24]:
array10 = np.random.randint(1,10,12).reshape((4,3))
print(array10)

[[7 1 3]
 [2 4 5]
 [1 5 1]
 [3 3 5]]


In [25]:
array10.mean()

3.3333333333333335

In [26]:
# average of each column
array10.mean(axis=0)

array([3.25, 3.25, 3.5 ])

In [27]:
# average of each row
array10.mean(axis=1)

array([3.66666667, 3.66666667, 2.33333333, 3.66666667])

In [28]:
# Average of column and rows.
array10.mean(axis=(0,1))

3.3333333333333335

In [29]:
# sum of columns
array10.sum(axis=0)

array([13, 13, 14])

In [30]:
# sum of rows
array10.sum(axis=1)

array([11, 11,  7, 11])

In [31]:
print(array10)

[[7 1 3]
 [2 4 5]
 [1 5 1]
 [3 3 5]]


In [32]:
array10.transpose()

array([[7, 2, 1, 3],
       [1, 4, 5, 3],
       [3, 5, 1, 5]])

In [33]:
# another method of transposing
array10.T

array([[7, 2, 1, 3],
       [1, 4, 5, 3],
       [3, 5, 1, 5]])

In [34]:
# and yet another method of transposing
array10.swapaxes(0, 1)

array([[7, 2, 1, 3],
       [1, 4, 5, 3],
       [3, 5, 1, 5]])

In [35]:
# # RESHAPING

In [36]:
np.arange(90).reshape((9, 10))

array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24, 25, 26, 27, 28, 29],
       [30, 31, 32, 33, 34, 35, 36, 37, 38, 39],
       [40, 41, 42, 43, 44, 45, 46, 47, 48, 49],
       [50, 51, 52, 53, 54, 55, 56, 57, 58, 59],
       [60, 61, 62, 63, 64, 65, 66, 67, 68, 69],
       [70, 71, 72, 73, 74, 75, 76, 77, 78, 79],
       [80, 81, 82, 83, 84, 85, 86, 87, 88, 89]])

In [37]:
np.arange(90).reshape((-1, 10))
# here -1 means "hey python, you determine the length along this axis"
# we know there will be 10 columns, NP determines the number of row

array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24, 25, 26, 27, 28, 29],
       [30, 31, 32, 33, 34, 35, 36, 37, 38, 39],
       [40, 41, 42, 43, 44, 45, 46, 47, 48, 49],
       [50, 51, 52, 53, 54, 55, 56, 57, 58, 59],
       [60, 61, 62, 63, 64, 65, 66, 67, 68, 69],
       [70, 71, 72, 73, 74, 75, 76, 77, 78, 79],
       [80, 81, 82, 83, 84, 85, 86, 87, 88, 89]])

In [38]:
array1 = np.arange(90).reshape((-1, 10))
array1.shape == (9, 10)

True

In [39]:
# Let's change some elements and observe where they are

In [40]:
array1[0,0]= 1000
print(array1)

[[1000    1    2    3    4    5    6    7    8    9]
 [  10   11   12   13   14   15   16   17   18   19]
 [  20   21   22   23   24   25   26   27   28   29]
 [  30   31   32   33   34   35   36   37   38   39]
 [  40   41   42   43   44   45   46   47   48   49]
 [  50   51   52   53   54   55   56   57   58   59]
 [  60   61   62   63   64   65   66   67   68   69]
 [  70   71   72   73   74   75   76   77   78   79]
 [  80   81   82   83   84   85   86   87   88   89]]


In [41]:
array1[5,:]= np.ones(10)
print(array1)

[[1000    1    2    3    4    5    6    7    8    9]
 [  10   11   12   13   14   15   16   17   18   19]
 [  20   21   22   23   24   25   26   27   28   29]
 [  30   31   32   33   34   35   36   37   38   39]
 [  40   41   42   43   44   45   46   47   48   49]
 [   1    1    1    1    1    1    1    1    1    1]
 [  60   61   62   63   64   65   66   67   68   69]
 [  70   71   72   73   74   75   76   77   78   79]
 [  80   81   82   83   84   85   86   87   88   89]]


In [42]:
array1[:,4]= np.zeros(9)
print(array1)

[[1000    1    2    3    0    5    6    7    8    9]
 [  10   11   12   13    0   15   16   17   18   19]
 [  20   21   22   23    0   25   26   27   28   29]
 [  30   31   32   33    0   35   36   37   38   39]
 [  40   41   42   43    0   45   46   47   48   49]
 [   1    1    1    1    0    1    1    1    1    1]
 [  60   61   62   63    0   65   66   67   68   69]
 [  70   71   72   73    0   75   76   77   78   79]
 [  80   81   82   83    0   85   86   87   88   89]]


In [43]:
array1[3,1:8] = 2*np.ones(7)
print(array1)

[[1000    1    2    3    0    5    6    7    8    9]
 [  10   11   12   13    0   15   16   17   18   19]
 [  20   21   22   23    0   25   26   27   28   29]
 [  30    2    2    2    2    2    2    2   38   39]
 [  40   41   42   43    0   45   46   47   48   49]
 [   1    1    1    1    0    1    1    1    1    1]
 [  60   61   62   63    0   65   66   67   68   69]
 [  70   71   72   73    0   75   76   77   78   79]
 [  80   81   82   83    0   85   86   87   88   89]]


In [44]:
# -1 means "last"
array1[5:-1,1:3] = [[3,3],[3,3],[3,3]]
print(array1)

[[1000    1    2    3    0    5    6    7    8    9]
 [  10   11   12   13    0   15   16   17   18   19]
 [  20   21   22   23    0   25   26   27   28   29]
 [  30    2    2    2    2    2    2    2   38   39]
 [  40   41   42   43    0   45   46   47   48   49]
 [   1    3    3    1    0    1    1    1    1    1]
 [  60    3    3   63    0   65   66   67   68   69]
 [  70    3    3   73    0   75   76   77   78   79]
 [  80   81   82   83    0   85   86   87   88   89]]


In [45]:
# Non-sequential indexing!
array1[(5, 7), (6, 8)] = [-20,-20]
print(array1)

[[1000    1    2    3    0    5    6    7    8    9]
 [  10   11   12   13    0   15   16   17   18   19]
 [  20   21   22   23    0   25   26   27   28   29]
 [  30    2    2    2    2    2    2    2   38   39]
 [  40   41   42   43    0   45   46   47   48   49]
 [   1    3    3    1    0    1  -20    1    1    1]
 [  60    3    3   63    0   65   66   67   68   69]
 [  70    3    3   73    0   75   76   77  -20   79]
 [  80   81   82   83    0   85   86   87   88   89]]


In [46]:
# a = b vs a = copy(b)
array1 = np.array([1, 2, 3]) 
array2 = array1
print('Before______')
print(array2)
array1[0] = 5
print('After_______')
print(array2)

Before______
[1 2 3]
After_______
[5 2 3]


In [47]:
# Numpy arrays are generally passed by reference (to minimize space used in memory)
# This is why when we made a change in array1 above, we also changed array2
#
# To ensure that values are independent, use the copy function:
array1 = np.array([1, 2, 3]) 
array2 = np.copy(array1)
print('Before______')
print(array2)
array1[0] = 5
print('After_______')
print(array2)

Before______
[1 2 3]
After_______
[1 2 3]


### Numpy Load Text Files: np.loadtxt
There are lots of options on this function, so check the docs, but some of the most used: <br> 
array2 = np.loadtxt(filename, dtype=dtype, comments=‘#’, delimiter=‘,’, skiprows=5,
 usecols=(0, 1, 2)) <br>
 - This skips all comments (designated with a #) and the first 5 rows. 
 - It then reads in columns 0, 1, and 2, delimited by a comma

When in doubt about arguments and what form they should be, check the docs:

In [48]:
np.loadtxt?

In [49]:
# Alternatively remember using the Shift+Tab to access the documentation.

Loadtxt can read in gzipped (.gz) and Bzip2 (.bz2) files without them being unzipped.

### Mathematical Operations
Mathematical operations proceed element-wise, as follows

In [50]:
array1 = np.array([0.5, 1.0, 1.5, 2.0])
print(array1+5)

[5.5 6.  6.5 7. ]


In [51]:
print(array1*2)

[1. 2. 3. 4.]


In [52]:
print(array1**2)

[0.25 1.   2.25 4.  ]


In [53]:
array2 = np.copy(array1)
print(array1 + array2)

[1. 2. 3. 4.]


In [54]:
print(array1*array2)

[0.25 1.   2.25 4.  ]


In [55]:
np.log10(array1)

array([-0.30103   ,  0.        ,  0.17609126,  0.30103   ])

In [56]:
np.exp(array1)

array([1.64872127, 2.71828183, 4.48168907, 7.3890561 ])

In [57]:
np.sin(array1)

array([0.47942554, 0.84147098, 0.99749499, 0.90929743])

In [58]:
np.cosh(array1)

array([1.12762597, 1.54308063, 2.35240962, 3.76219569])

## Matrix Math

In [59]:
# For 2-D (and higher) matrices, you can do standard matrix math:
arr1 = np.array([[0,1],[2,3]])
arr2 = np.array([[4,5],[6,7]])
print(arr1)
print(arr2)

[[0 1]
 [2 3]]
[[4 5]
 [6 7]]


In [60]:
# Doing standard matrix math: 
np.dot(arr1, arr2)  # dot product

array([[ 6,  7],
       [26, 31]])

In [61]:
# Cross product
np.cross(arr1, arr2) 

array([-4, -4])

In [62]:
# Eigenvalues and eigenvectors
np.linalg.eig(arr1) 

(array([-0.56155281,  3.56155281]),
 array([[-0.87192821, -0.27032301],
        [ 0.48963374, -0.96276969]]))

In [63]:
# calculating inverses:
np.linalg.inv(arr1)

array([[-1.5,  0.5],
       [ 1. ,  0. ]])

In [64]:
# determinant
np.linalg.det(arr1)

-2.0

### Searching Arrays

In [65]:
arr1 = np.arange(6).reshape((2,3))
print(arr1)

[[0 1 2]
 [3 4 5]]


In [66]:
# following command gives the elements of arr1 > 1
# [0,2], [1,0], [1,1], [1,2]
# but we will have them in row/col tuple
print ('Indices of elements >1')
np.where(arr1 > 1)

Indices of elements >1


(array([0, 1, 1, 1], dtype=int64), array([2, 0, 1, 2], dtype=int64))

In [67]:
print(arr1[0, 2])

2


In [68]:
print(arr1[1, 0])

3


In [69]:
print("Elements which are >1")
indices = np.where(arr1 > 1)
print(arr1[indices])

Elements which are >1
[2 3 4 5]


In [70]:
print("Elements which are >1")
print(arr1[np.where(arr1 > 1)])

Elements which are >1
[2 3 4 5]


In [71]:
# you can use more than one criteria
# but in that case we should use extra ()s as follows 
np.where((arr1 > 1) & (arr1<4))

(array([0, 1], dtype=int64), array([2, 0], dtype=int64))

## Vectorizing Functions

In [72]:
# Sometimes, you’ll want to make complex functions that don’t 
# necessarily automatically work with numpy arrays.
# SoLution: vectorization.
# Let's see how vectorization works with an example

In [73]:
def funct1(val): # Defined "funct1" accepts only one value.
    if val < np.pi/2: # Doesn’t work with array
        x = np.sin(val)
    else:
        x = np.cos(val)
    return x

In [74]:
# this will work
funct1(np.pi/4)

0.7071067811865476

In [75]:
z = np.linspace(0,np.pi,7)
print(z)

[0.         0.52359878 1.04719755 1.57079633 2.0943951  2.61799388
 3.14159265]


In [76]:
# Calling funct1 with the results of "z" will fail.
# Error provides details on the array having more than one element.
funct1(z)

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

In [77]:
# Now it will work
vfunct1 = np.vectorize(funct1)
vfunct1(z)
# because the vectorize function makes functions like this work for arrays

array([ 0.00000000e+00,  5.00000000e-01,  8.66025404e-01,  6.12323400e-17,
       -5.00000000e-01, -8.66025404e-01, -1.00000000e+00])

In [78]:
# Another option could be to use a loop oveer the elements of the array.

# Using list comprehensions. Define an empty array with same number of elements in z array.
z_vect = [0 for a in range(len(z))]

# Could also copy the array z.
#z_vect = z.copy()
# Note that I could use z_vect = z. Even though this will initially work it will replace z.

for element in range(len(z)):
    z_vect[element] = funct1(z[element])
print(z_vect)

[0.0, 0.49999999999999994, 0.8660254037844386, 6.123233995736766e-17, -0.4999999999999998, -0.8660254037844385, -1.0]


While this is fine to do for functions you don’t need high performance on, it is slow(ish). Consider writing the function better for speed.

## Saving your output

In [79]:
# For individual numpy arrays, there are some quick and dirty methods to save your data:

In [80]:
# Quick and Dirty in Text: 
x = np.arange(100).reshape((25,4))
np.savetxt('test.dat', x)

In [81]:
# Numpy also has some proprietary formats (.npy, .npz) that allow for quick reading of data
# Saving a single array: 
np.save('test.npy', arr1)

# Saving multiple arrays: 
x2 = np.arange(20).reshape((5,4))
np.savez('test', a1=x, a2=x2)   # .npz suffix added

# Output file extensions are based on how many arrays you have in the save file:
# .npy is for a single array and 
# .npz is for multiple

### Loading Saved Output
Python List files in directory use os, "ls" function. Note that "ls" is a Unix function.

In [82]:
import os

In [83]:
os.listdir()

['.ipynb_checkpoints',
 '1_DATA601_Additional_Material_Review.pdf',
 '1_DATA601_Additional_Material_Review.pptx',
 '2_DATA601_Numpy_Tutorial.ipynb',
 '3_Numpy_Exercise.ipynb',
 'README',
 'test.dat',
 'test.npy',
 'test.npz']

In [84]:
ls

 Volume in drive C is OS
 Volume Serial Number is 0A49-61E1

 Directory of C:\Felix_ASUS_Docs\1A_Python_Projects\DATA601-Fall 2022\Lecture03_Numpy

09/25/2022  09:38 PM    <DIR>          .
09/21/2022  10:25 PM    <DIR>          ..
09/25/2022  09:24 PM    <DIR>          .ipynb_checkpoints
09/20/2022  04:03 PM           259,116 1_DATA601_Additional_Material_Review.pdf
09/20/2022  02:51 PM           313,169 1_DATA601_Additional_Material_Review.pptx
09/25/2022  09:38 PM            80,630 2_DATA601_Numpy_Tutorial.ipynb
09/20/2022  02:46 PM            10,634 3_Numpy_Exercise.ipynb
08/06/2022  11:30 PM                27 README
09/25/2022  09:39 PM             2,525 test.dat
09/25/2022  09:39 PM               152 test.npy
09/25/2022  09:39 PM               974 test.npz
               8 File(s)        667,227 bytes
               3 Dir(s)  828,895,522,816 bytes free


In [85]:
# To load a single numpy array (.npy file): 
arr1a = np.load('test.npy')
print(arr1a)

[[0 1 2]
 [3 4 5]]


In [86]:
# to list current variables used so far in the Jupyter Notebook.
%who

arr1	 arr1a	 arr2	 array1	 array10	 array2	 array3	 array4	 array5	 
array6	 array7	 array8	 array9	 element	 funct1	 indices	 list1	 np	 
os	 vfunct1	 x	 x2	 z	 z_vect	 


In [87]:
# To load a multiple numpy arrays (.npz file): 
alldata = np.load('test.npz')
# to learn what arrays are in your NPZ file do this
alldata.files

['a1', 'a2']

In [88]:
# All Data Object is dictionary-like:
var1 = alldata['a1']
var2 = alldata['a2']
print(var1)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]
 [16 17 18 19]
 [20 21 22 23]
 [24 25 26 27]
 [28 29 30 31]
 [32 33 34 35]
 [36 37 38 39]
 [40 41 42 43]
 [44 45 46 47]
 [48 49 50 51]
 [52 53 54 55]
 [56 57 58 59]
 [60 61 62 63]
 [64 65 66 67]
 [68 69 70 71]
 [72 73 74 75]
 [76 77 78 79]
 [80 81 82 83]
 [84 85 86 87]
 [88 89 90 91]
 [92 93 94 95]
 [96 97 98 99]]


## LAMBDA FUNCTIONS

In [89]:
# Sometimes you want to define a simple function without the full function syntax. 
# Lambda functions exist for this exact reason: 
# Defining the Function:
funct1 = lambda x: x**2 # Returns the square of x

In [90]:
# Using the Function:
tmpvar1 = funct1(5)
print(tmpvar1)

25


In [91]:
# Can use multiple variables:
funct2 = lambda x,y: x + y

In [92]:
# Using the Function:
tmpvar2 = funct2(5, 6) 
print(tmpvar2)

11


In [93]:
# LAMBDA FUNCTIONS ARE VERY USEFUL IN E.D.A

# Pipe
Pipe is a module that enables shell like input. For more information: https://pypi.org/project/pipe/.

If you need to install "pipe" uncomment the cell below. Just run once. The same works with Anaconda Prompt or Terminal.

In [94]:
#!pip install pipe

In [95]:
from pipe import select, where

In [96]:
arr = [1, 2, 3, 4, 5, 6, 10]
# Develops list of array elements where is divisible by 2 and slected elements squared.
# This is using Pipe "where" and "select" functions.
list(arr| where(lambda x:x%2==0) | select(lambda x:x**2)) 

[4, 16, 36, 100]

<b>Exercise:</b>
1. Create a 6x4 array, which includes random integers from 1 to 10, name this array "A". print the array A.
2. Find all the numbers bigger than 5 and multiply them with 10, name this new array B.
3. Replace the element on the 2nd row, 3rd column of B, with -10 

In [97]:
# step-1
A = np.random.randint(1,11,24).reshape(6,4)
print(A)

[[10  5  1  4]
 [ 2  4  1  4]
 [ 2  5  9  3]
 [10  6  8  4]
 [10  5  3  1]
 [ 2  6  2  8]]


In [98]:
# step-2: Method-1: Old-fashion
B = np.copy(A)
for i in range(len(A)):
    for j in range(len(A[0])):
        if A[i,j]>5:
            B[i,j] = 10*A[i,j]
print(B)      

[[100   5   1   4]
 [  2   4   1   4]
 [  2   5  90   3]
 [100  60  80   4]
 [100   5   3   1]
 [  2  60   2  80]]


In [99]:
# step-2: Method-2: One For Loop
inda = np.asarray(np.where(A>5))
B = np.copy(A)
for i in range(int(inda.size/2)):
    B[inda[0,i],inda[1,i]] = 10*A[inda[0,i],inda[1,i]]
print(B)  

[[100   5   1   4]
 [  2   4   1   4]
 [  2   5  90   3]
 [100  60  80   4]
 [100   5   3   1]
 [  2  60   2  80]]


In [100]:
# step-2: Method-3: Proper use of "where"
B = A.copy()
ind1, ind2 = np.where(A>5.0)
B[ind1,ind2] = B[ind1,ind2]*10
print(B)

[[100   5   1   4]
 [  2   4   1   4]
 [  2   5  90   3]
 [100  60  80   4]
 [100   5   3   1]
 [  2  60   2  80]]


In [101]:
# step-2 Method-4 (Pipe + Lambda)
B = np.array(list(A.flatten().tolist()| select(lambda x: 10*x if x>5 else x))).reshape(6,4)
print(B)

[[100   5   1   4]
 [  2   4   1   4]
 [  2   5  90   3]
 [100  60  80   4]
 [100   5   3   1]
 [  2  60   2  80]]


In [102]:
# step-2: Method-5: zip-method
B = np.array([j*10 if i else j for i,j in 
              zip((A > 5).flatten(), A.flatten())]).reshape((6,4))
print(B)

[[100   5   1   4]
 [  2   4   1   4]
 [  2   5  90   3]
 [100  60  80   4]
 [100   5   3   1]
 [  2  60   2  80]]


In [103]:
A.tolist()

[[10, 5, 1, 4],
 [2, 4, 1, 4],
 [2, 5, 9, 3],
 [10, 6, 8, 4],
 [10, 5, 3, 1],
 [2, 6, 2, 8]]

In [104]:
B = np.select([A<=5, A>5], [A, A*10])
print(B)

[[100   5   1   4]
 [  2   4   1   4]
 [  2   5  90   3]
 [100  60  80   4]
 [100   5   3   1]
 [  2  60   2  80]]


In [105]:
# step-3
B[1,2] = -10
print(B)

[[100   5   1   4]
 [  2   4 -10   4]
 [  2   5  90   3]
 [100  60  80   4]
 [100   5   3   1]
 [  2  60   2  80]]


# Notebook End