### Indexing, Slicing and Iterating

In [2]:
import numpy as np

In [3]:
#indexing
a = np.arange(10,16)
print(a)
print(a[4])
print(a[-1])
print(a[-6])

[10 11 12 13 14 15]
14
15
10


In [4]:
a[[1, 3, 4]]

array([11, 13, 14])

In [5]:
A = np.arange(10,19).reshape((3,3))
print(A)
print(A[1,2]) #A[row,column]

[[10 11 12]
 [13 14 15]
 [16 17 18]]
15


In [6]:
#slicing

a = np.arange(10,16)
print(a)
print(a[1:5])
#remember that the first number is inclusive and 2nd is exclusive

[10 11 12 13 14 15]
[11 12 13 14]


In [7]:
#if you want extract an item from a previous portion and skip a
#specific number of following items, then extract the next and skip
#again, you can use a 3rd number that defines the gap in the 
#sequence of elements. For, ex: a value of 2 will skip every OTHER
#element in the array --> 2nd number is STILL exclusive
a[1:5:2] 

array([11, 13])

In [8]:
#if we omit the first number we begin at the initial index: 0
#if we omit the second it takes that as the maximum value in the narray
print(a[::2]) #this one does first to last with gaps of 2
print(a[:5:2]) #this does the same, just explicitly
print(a[:5:]) 
print(a[::]) #takes beginning to end

[10 12 14]
[10 12 14]
[10 11 12 13 14]
[10 11 12 13 14 15]


In [9]:
A = np.arange(10,19).reshape((3,3)) #gives 3x3 matrix of the numbers
print(A)
print(A[0,:]) #A[row,col] so first row, all cols
print(A[:,0]) #All rows, first col
print(A[1,2]) #selects 1 element from 2nd row 3rd col

[[10 11 12]
 [13 14 15]
 [16 17 18]]
[10 11 12]
[10 13 16]
15


In [10]:
A[0:2, 0:2] #can also use slicing to extract a smaller matrix

array([[10, 11],
       [13, 14]])

In [11]:
A[[0,2], 0:2] #we can also specify an array of indexes

array([[10, 11],
       [16, 17]])

In [12]:
for i in a:
    print(i)

10
11
12
13
14
15


In [13]:
for row in A:
    print(row)

[10 11 12]
[13 14 15]
[16 17 18]


In [14]:
for item in A.flat: #does iteration element by element
    print(item)

10
11
12
13
14
15
16
17
18


In [15]:
np.apply_along_axis(np.mean, axis=0, arr=A) #axis is 0 for col by col

array([ 13.,  14.,  15.])

In [16]:
np.apply_along_axis(np.mean, axis=1, arr=A) #axis is 1 for row by row

array([ 11.,  14.,  17.])

In [17]:
def foo(x):
    return x/2

np.apply_along_axis(foo, axis=1, arr=A) #can even use our own defined functions in apply_along_axis

array([[ 5. ,  5.5,  6. ],
       [ 6.5,  7. ,  7.5],
       [ 8. ,  8.5,  9. ]])

In [18]:
np.apply_along_axis(foo, axis=0, arr=A)

array([[ 5. ,  5.5,  6. ],
       [ 6.5,  7. ,  7.5],
       [ 8. ,  8.5,  9. ]])

### Conditions and Boolean Arrays

In [19]:
A = np.random.random((4,4))
A

array([[ 0.59585235,  0.78624123,  0.74885126,  0.61940285],
       [ 0.61078107,  0.93629677,  0.14511127,  0.87493872],
       [ 0.99345743,  0.67712697,  0.67822064,  0.97316775],
       [ 0.32880271,  0.43868504,  0.34330718,  0.66347493]])

In [20]:
A < 0.5 #returns the array as a boolean where the operation was tested

array([[False, False, False, False],
       [False, False,  True, False],
       [False, False, False, False],
       [ True,  True,  True, False]], dtype=bool)

In [21]:
A[A < 0.5] #inserting this conition will allow you to extract those
            #that satisfy the condition

array([ 0.14511127,  0.32880271,  0.43868504,  0.34330718])

### Shape manipulation

In [22]:
a = np.random.random(12) #gives 1-D array
print(a)
A = a.reshape(3,4) #reshapes to 3 rows, 4 cols
A

[ 0.91463193  0.77300451  0.02386254  0.45327451  0.14558055  0.18436907
  0.04846919  0.93462937  0.28051781  0.77101069  0.70140839  0.22224186]


array([[ 0.91463193,  0.77300451,  0.02386254,  0.45327451],
       [ 0.14558055,  0.18436907,  0.04846919,  0.93462937],
       [ 0.28051781,  0.77101069,  0.70140839,  0.22224186]])

In [23]:
#if you want to modify the object by modifying the shape, you have
#to assign a tuple containing the new dimensions directly to its shape
#attribute
a.shape = (3,4)
a

array([[ 0.91463193,  0.77300451,  0.02386254,  0.45327451],
       [ 0.14558055,  0.18436907,  0.04846919,  0.93462937],
       [ 0.28051781,  0.77101069,  0.70140839,  0.22224186]])

In [24]:
#this method converts the shape back to a 1-D array
a = a.ravel()
a

array([ 0.91463193,  0.77300451,  0.02386254,  0.45327451,  0.14558055,
        0.18436907,  0.04846919,  0.93462937,  0.28051781,  0.77101069,
        0.70140839,  0.22224186])

In [25]:
#this acts directly on the shape attribute of array itself
a.shape = (12)
a

array([ 0.91463193,  0.77300451,  0.02386254,  0.45327451,  0.14558055,
        0.18436907,  0.04846919,  0.93462937,  0.28051781,  0.77101069,
        0.70140839,  0.22224186])

In [31]:
#mathematically this is (i,j) -> (j,i)
print(A)
A.transpose()

[[ 0.91463193  0.77300451  0.02386254  0.45327451]
 [ 0.14558055  0.18436907  0.04846919  0.93462937]
 [ 0.28051781  0.77101069  0.70140839  0.22224186]]


array([[ 0.91463193,  0.14558055,  0.28051781],
       [ 0.77300451,  0.18436907,  0.77101069],
       [ 0.02386254,  0.04846919,  0.70140839],
       [ 0.45327451,  0.93462937,  0.22224186]])

In [41]:
#Array Manipulation

A = np.ones((3,3))
B = np.zeros((3,3))
print("A =","\n", A)
print("B=","\n",  B)
vstack = np.vstack((A,B)) #vertical stacking
print("vstack A,B=","\n", vstack)


A = 
 [[ 1.  1.  1.]
 [ 1.  1.  1.]
 [ 1.  1.  1.]]
B= 
 [[ 0.  0.  0.]
 [ 0.  0.  0.]
 [ 0.  0.  0.]]
vstack A,B= 
 [[ 1.  1.  1.]
 [ 1.  1.  1.]
 [ 1.  1.  1.]
 [ 0.  0.  0.]
 [ 0.  0.  0.]
 [ 0.  0.  0.]]


In [43]:
hstack = np.hstack((A,B))
print("this is horizontal=" , "\n", hstack)

this is horizontal= 
 [[ 1.  1.  1.  0.  0.  0.]
 [ 1.  1.  1.  0.  0.  0.]
 [ 1.  1.  1.  0.  0.  0.]]


In [46]:
a = np.array([0,1,2])
print("a = ", "\n", a)
b = np.array([3,4,5])
print("b = ", "\n", b)
c = np.array([6,7,8])
print("c = ", "\n", c)

col_stack = np.column_stack((a,b,c))
print("col_stack =", '\n', col_stack)
#generally these are made from 1-D arrays that are stacked as cols or
#rows in order to form a new n-D array, as seen by ##.shape
col_stack.shape

a =  
 [0 1 2]
b =  
 [3 4 5]
c =  
 [6 7 8]
col_stack = 
 [[0 3 6]
 [1 4 7]
 [2 5 8]]


(3, 3)

In [47]:
row_stack = np.row_stack((a,b,c))
print("row stack = ", '\n', row_stack)
row_stack.shape


row stack =  
 [[0 1 2]
 [3 4 5]
 [6 7 8]]


(3, 3)

### Splitting arrays

In [51]:
A = np.arange(16).reshape((4,4))
print('A=','\n',A)
[B,C] = np.hsplit(A,2)
print('B = ','\n',B)
print('C = ','\n',C)
print(A.shape)
print(B.shape)
print(C.shape)
#obviously this is an horizontal split, where the width of the array
#is divided into two parts so 4x4 is split into 2x4

A= 
 [[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]]
B =  
 [[ 0  1]
 [ 4  5]
 [ 8  9]
 [12 13]]
C =  
 [[ 2  3]
 [ 6  7]
 [10 11]
 [14 15]]
(4, 4)
(4, 2)
(4, 2)


In [54]:
[B,C] = np.vsplit(A,2)
print('new B','\n',B)
print('new C','\n',C)
print(B.shape)
print(C.shape)
#this is split into 2 4x2 matrices

new B 
 [[0 1 2 3]
 [4 5 6 7]]
new C 
 [[ 8  9 10 11]
 [12 13 14 15]]
(2, 4)
(2, 4)


In [59]:
print(A)
[A1,A2,A3] = np.split(A, [1,3], axis=1)
#is axis = 1, then the indexes will be cols
print(A1)
print(A2)
print(A3)
print(A1.shape)
print(A2.shape)
print(A3.shape)
#the split function allows asymmetrical parts

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]]
[[ 0]
 [ 4]
 [ 8]
 [12]]
[[ 1  2]
 [ 5  6]
 [ 9 10]
 [13 14]]
[[ 3]
 [ 7]
 [11]
 [15]]
(4, 1)
(4, 2)
(4, 1)


In [60]:
print(A)
[A1,A2,A3] = np.split(A, [1,3], axis=0)
#is axis = 0 then indexes will be row indexes
print(A1)
print(A2)
print(A3)
print(A1.shape)
print(A2.shape)
print(A3.shape)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]]
[[0 1 2 3]]
[[ 4  5  6  7]
 [ 8  9 10 11]]
[[12 13 14 15]]
(1, 4)
(2, 4)
(1, 4)


### General Concepts

In [61]:
#none of the numpy assignments produces copies of arrays, nor
#any element contained in them
a = np.array([1,2,3,4])
b = a
print(b)
a[2] = 0
print(b)
#if we assign one array "a" to "b" we aren't copying it but rather 
#just creating another way of calling 'a'
#by changing a (3rd) value in 'a' you also change the (3rd) value
#in 'b'

[1 2 3 4]
[1 2 0 4]


In [62]:
c = a[0:2]
print(c)
a[0] = 0
print(c)
#even when slicing, you are actually pointing to the same object
#if you want to generate a complete and distinct array use copy()

[1 2]
[0 2]


In [65]:
a = np.array([1,2,3,4])
print(a)
c = a.copy()
print(c)
a[0] = 0
print(c)
print(a)

[1 2 3 4]
[1 2 3 4]
[1 2 3 4]
[0 2 3 4]


### Vectorization/Broadcasting

In [77]:
#this is the basis of internal implementation of NumPy
#vectorization is the absence of an explicit loop during development
#of code
#allows for 'Pythonic' code
#allows for more mathematical expression of operations

#broadcasting allows an operator or a function
#toact on two or more arrays to operate even if the two arrays don't
#have the same shape

#although there are rules:
#the two arrays must be compatible i.e., the length of each dimension
#must be equal or one of them must be equal to 1.

#shift+cmd+c shows underlying code in jupyter notebook
A = np.arange(16).reshape(4,4)
b = np.arange(4)
print('A=','\n',A)
print('b=','\n',b)
print('A+b= \n',A + b) #2nd rule
print('A*b= \n', A*b)

#1st we must add a 1 to each missing dimension
#2nd assumes the missing elements (size, length 1) are filled with
#replicas of the values contanied in extrended sizes
#

A= 
 [[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]]
b= 
 [0 1 2 3]
A+b= 
 [[ 0  2  4  6]
 [ 4  6  8 10]
 [ 8 10 12 14]
 [12 14 16 18]]
A*b= 
 [[ 0  1  4  9]
 [ 0  5 12 21]
 [ 0  9 20 33]
 [ 0 13 28 45]]


In [75]:
#whereas other languages require a nested loop such as:
#for(i = 0; i<rows; i++){
#    c[i] = a[i]*b[i];
#}

#for matrix multiplication we have:

#for(i=0; i<rows; i++){
#    for(j=0; j< columns; j++){
#        c[i][j] = a[i][j]*b[i][j];
#    }
#}

In [78]:
m = np.arange(6).reshape(3,1,2)
print(m.shape)
n = np.arange(6).reshape(3,2,1)
print(n.shape)
#these two are still compatible, and both arrays undergo extension
print(m)
print(n)
m + n

(3, 1, 2)
(3, 2, 1)
[[[0 1]]

 [[2 3]]

 [[4 5]]]
[[[0]
  [1]]

 [[2]
  [3]]

 [[4]
  [5]]]


array([[[ 0,  1],
        [ 1,  2]],

       [[ 4,  5],
        [ 5,  6]],

       [[ 8,  9],
        [ 9, 10]]])

### Structured Arrays

In [79]:
#these arrays contain structs or records instead of individual
#items, for example, dtype option we can specify a list of
#comma seperated specifiers to indicate the elements
#then we can have a struct that has a integer, long strings and boolean
#values but we must specify what we want

structured = np.array([(1, 'First', 0.5, i+2j),(2, 'Second', 1.3, 2-2j),(3, 'Third', 0.8, 1+3j)], dtype=('i2, a6, f4, c8'))
structured

#data types:
#
#b1 = bytes
#i1,i2,i4,i8 = int
#u1,u2,u4,u8 = unsigned ints
#f2,f4,f8 = floats
#c8,c16 = complex
#a<n> = fixed length strings

array([(1, b'First',  0.5       ,  15.+2.j),
       (2, b'Second',  1.29999995,   2.-2.j),
       (3, b'Third',  0.80000001,   1.+3.j)],
      dtype=[('f0', '<i2'), ('f1', 'S6'), ('f2', '<f4'), ('f3', '<c8')])

In [80]:
structured = np.array([(1, 'First', 0.5, i+2j),(2, 'Second', 1.3, 2-2j),(3, 'Third', 0.8, 1+3j)], dtype=('int16, a6, float32, complex64'))
structured
#different way to create the struct, which is more explicit in 
#using the data structures

array([(1, b'First',  0.5       ,  15.+2.j),
       (2, b'Second',  1.29999995,   2.-2.j),
       (3, b'Third',  0.80000001,   1.+3.j)],
      dtype=[('f0', '<i2'), ('f1', 'S6'), ('f2', '<f4'), ('f3', '<c8')])

In [81]:
structured[1] #reference index points to 2nd entry

(2, b'Second',  1.29999995,  2.-2.j)

In [82]:
structured['f1'] #can refer to all elements of same type or the same
                 #columns, here the names are automatically assigned
                 #with an f(which stands for field) and a progressive
                 # integer that indicated the position in the sequence

array([b'First', b'Second', b'Third'],
      dtype='|S6')

In [83]:
structured = np.array([(1, 'First', 0.5, i+2j),(2, 'Second', 1.3, 2-2j),(3, 'Third', 0.8, 1+3j)],\
                      dtype=[('id','i2'),('position','a6'),('value','f4'),('complex','c8')])
structured
#providing an array declaration
#we give names to int, string, and float with appropriate names

array([(1, b'First',  0.5       ,  15.+2.j),
       (2, b'Second',  1.29999995,   2.-2.j),
       (3, b'Third',  0.80000001,   1.+3.j)],
      dtype=[('id', '<i2'), ('position', 'S6'), ('value', '<f4'), ('complex', '<c8')])

In [87]:
#or can do it at a later time like in this case:
structured.dtype.names = ('id', 'order', 'value', 'complex')
structured['order']

array([b'First', b'Second', b'Third'],
      dtype='|S6')

### Reading and Writing Array Data on Files

In [88]:
data = np.random.random((4,3))
data

array([[ 0.20981048,  0.00372728,  0.04812263],
       [ 0.56284731,  0.16479679,  0.60948204],
       [ 0.79644153,  0.17662346,  0.18212944],
       [ 0.98639314,  0.01720923,  0.8661358 ]])

In [89]:
np.save('saved_data', data) #automatically gets .npy extension

In [90]:
loaded_data = np.load('saved_data.npy')
loaded_data

array([[ 0.20981048,  0.00372728,  0.04812263],
       [ 0.56284731,  0.16479679,  0.60948204],
       [ 0.79644153,  0.17662346,  0.18212944],
       [ 0.98639314,  0.01720923,  0.8661358 ]])

### Reading File with Tabular Data

In [91]:
#this function implicitly performs 2 loops:
    #one to read each line
        #next to separate and convert the values contained in it
data = np.genfromtxt('data.csv', delimiter=',', names=True)
data

array([( 1.,  123.,  1.4,  23.), ( 2.,  110.,  0.5,  18.),
       ( 3.,  164.,  2.1,  19.)],
      dtype=[('id', '<f8'), ('value1', '<f8'), ('value2', '<f8'), ('value3', '<f8')])

In [92]:
data2 = np.genfromtxt('data2.csv', delimiter=',', names=True)
data2
#replaces the empty spaces with nan values

array([( 1.,  123.,  1.4,  23.), ( 2.,  110.,  0.5,  18.),
       ( 3.,   nan,  2.1,  19.)],
      dtype=[('id', '<f8'), ('value1', '<f8'), ('value2', '<f8'), ('value3', '<f8')])

In [93]:
data2['id']

array([ 1.,  2.,  3.])

In [94]:
data2[0]

( 1.,  123.,  1.4,  23.)