In [2]:
import numpy as np

# Manipulating Arrays

Let's look at some ways to manipulate arrays, i.e. changing the shape, combining and splitting arrays, etc.   

## Adding / Removing Elements

### Appends values to end

In [None]:
np.append(arr,values)

### Inserts values into index

In [None]:
# Insert values into arr before index 2
np.insert(arr,2,values) 

### Deletes row on index

In [None]:
#  Deletes row on index 3 of arr
np.delete(arr,3,axis=0)

### Deletes column on index

In [None]:
# Deletes column on index 4 of arr
np.delete(arr,4,axis=1) 

## Reshaping Arrays

Reshaping is done using the ```reshape()``` function.

In [3]:
import numpy as np

# Reshape a 1-D array to a 3 x 4 array
some_array = np.arange(0, 12).reshape(3, 4)
print(some_array)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]


In [2]:
# Can reshape it further 
some_array.reshape(2, 6)

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11]])

In [3]:
# If you specify -1 as a dimension, the dimensions are automatically calculated
# -1 means "whatever dimension is needed" 
some_array.reshape(4, -1)

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

In [18]:
# Flatten the array
b = np.array([(1.5,2,3), (4,5,6)], dtype = float)
b.ravel()

array([1.5, 2. , 3. , 4. , 5. , 6. ])

## Array Concatenation

In [None]:
# concatenating 2-D arrays
matrix1 = np.array([[1, 2, 3], 
                    [4, 5, 6]])
print("matrix 1")
matrix1

In [None]:
matrix2 = np.array([[7, 8, 9], 
                    [10, 11, 12]])
print("matrix 2",)
matrix2

### Concatenate Row wise

In [None]:
# concatenate along the first axis
matrix_1_2_combineRowwise = np.concatenate([matrix1, matrix2],axis=0)
print("Combined on Row (axis=0)")
matrix_1_2_combineRowwise

In [None]:
x = np.array([1, 2, 3])
y = np.array([3, 2, 1])
np.concatenate([x, y])

In [None]:
# np.concatenate can also be used for two-dimensional arrays:
grid = np.array([[1, 2, 3],
                 [4, 5, 6]])
# concatenate along the first axis
np.concatenate([grid, grid])

### Concatenate Column wise

In [None]:
# concatenate along the second axis (zero-indexed)
matrix_1_2_combineColwise = np.concatenate([matrix1, matrix2], axis=1)
print("Combined on Col (axis=1)")
matrix_1_2_combineColwise

In [None]:
# concatenate along the second axis (zero-indexed)
np.concatenate([grid, grid], axis=1)

## Stacking Arrays
Stacking is done using the ```np.hstack()``` and ```np.vstack()``` methods. For horizontal stacking, the number of rows should be the same, while for vertical stacking, the number of columns should be the same.

In [5]:
# Creating two arrays
array_1 = np.arange(12).reshape(3, 4)
array_2 = np.arange(20).reshape(5, 4)

print(array_1)
print("\n")
print(array_2)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]


[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]
 [16 17 18 19]]


### `vstack`

In [6]:
# vstack
# Note that np.vstack(a, b) throws an error - you need to pass the arrays as a list
np.vstack((array_1, array_2))

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19]])

In [3]:
p = np.ones([2, 3], int)
np.vstack([p, 2*p])

array([[1, 1, 1],
       [1, 1, 1],
       [2, 2, 2],
       [2, 2, 2]])

### `r_`
Stack arrays vertically (row-wise)

In [4]:
np.r_[p,2*p]

array([[1, 1, 1],
       [1, 1, 1],
       [2, 2, 2],
       [2, 2, 2]])

### `hstack`
Similarly, two arrays having the same number of rows can be horizontally stacked using ```np.hstack((a, b))```.

Use `hstack` to stack arrays in sequence horizontally (column wise).

In [5]:
np.hstack([p, 2*p])

array([[1, 1, 1, 2, 2, 2],
       [1, 1, 1, 2, 2, 2]])

### `column_stack`
Create stacked collumn-wise arrays

In [6]:
np.column_stack([p, 2*p])

array([[1, 1, 1, 2, 2, 2],
       [1, 1, 1, 2, 2, 2]])

### `c_`
Create stacked column-wise arrays

In [7]:
np.c_[p,2*p]

array([[1, 1, 1, 2, 2, 2],
       [1, 1, 1, 2, 2, 2]])

## Splitting Arrays
Notice that N split points lead to N + 1 subarrays.

In [9]:
p = np.arange(12)
array = np.array([('IBM', 'Apple Inc.', 'Intel', 'Sony', 'Dell', 'Akamai'),
                ('New York','California', 'California', 'Texas', 'Wshington', "massachusetts")])

### `split`

In [10]:
print('Split the array to produce 4 equal-size subarrays')
np.split(p,4)

Split the array to produce 4 equal-size subarrays


[array([0, 1, 2]), array([3, 4, 5]), array([6, 7, 8]), array([ 9, 10, 11])]

In [11]:
print('Split the array at positions indicated 1-D array : ')
np.split(p, [3,8])

Split the array at positions indicated 1-D array : 


[array([0, 1, 2]), array([3, 4, 5, 6, 7]), array([ 8,  9, 10, 11])]

### `hsplit`
Horizontal Split - columnwise

In [12]:
x1, x2 = np.hsplit(array, 2)

In [13]:
x1

array([['IBM', 'Apple Inc.', 'Intel'],
       ['New York', 'California', 'California']], dtype='<U13')

In [14]:
x2

array([['Sony', 'Dell', 'Akamai'],
       ['Texas', 'Wshington', 'massachusetts']], dtype='<U13')

### `vsplit`

In [15]:
y1, y2 = np.vsplit(array, 2)

In [16]:
y1

array([['IBM', 'Apple Inc.', 'Intel', 'Sony', 'Dell', 'Akamai']],
      dtype='<U13')

In [17]:
y2

array([['New York', 'California', 'California', 'Texas', 'Wshington',
        'massachusetts']], dtype='<U13')

## Sorting Arrays

In [5]:
# Sort an array
some_array.sort()

In [9]:
# Sort the elements of an array's axis
c = np.array([[(1.5,2,3), (4,5,6)], [(3,2,1), (4,5,6)]], dtype = float)
c.sort(axis=0)

## Transposing Array

In [14]:
b = np.array([(1.5,2,3), (4,5,6)], dtype = float)
b

array([[1.5, 2. , 3. ],
       [4. , 5. , 6. ]])

In [17]:
# Permute array dimensions
i = np.transpose(b)
i

array([[1.5, 4. ],
       [2. , 5. ],
       [3. , 6. ]])

```array.T``` returns the transpose of an array. The number of rows has swapped with the number of columns.

In [16]:
i.T

array([[1.5, 2. , 3. ],
       [4. , 5. , 6. ]])

In [4]:
# Transposing an array
some_array.T

array([[ 0,  4,  8],
       [ 1,  5,  9],
       [ 2,  6, 10],
       [ 3,  7, 11]])

In [18]:
tech_companies = np.array([('IMB','Apple Inc.', 'Intel', 'Dell', 'Microsoft'),
                          ('New York', 'California', 'California', 'Texas', 'Washington')])
tech_companies.T

array([['IMB', 'New York'],
       ['Apple Inc.', 'California'],
       ['Intel', 'California'],
       ['Dell', 'Texas'],
       ['Microsoft', 'Washington']], dtype='<U10')

In [19]:
np.transpose(tech_companies)

array([['IMB', 'New York'],
       ['Apple Inc.', 'California'],
       ['Intel', 'California'],
       ['Dell', 'Texas'],
       ['Microsoft', 'Washington']], dtype='<U10')

# Comparison

In [None]:
a = np.array([1,2,3])
b = np.array([(1.5,2,3), (4,5,6)], dtype = float)

## Element-wise comparison

In [None]:
a == b

In [None]:
a < 2

## Array-wise comparison

In [None]:
np.array_equal(a, b)

# resize
`resize` changes the shape and size of array in-place.

In [20]:
o = np.linspace(0, 4, 9)
o

array([0. , 0.5, 1. , 1.5, 2. , 2.5, 3. , 3.5, 4. ])

In [21]:
o.resize(3, 3)
o

array([[0. , 0.5, 1. ],
       [1.5, 2. , 2.5],
       [3. , 3.5, 4. ]])

# Iterating Over Arrays

In [22]:
test = np.random.randint(0, 10, (4,3))
test

array([[8, 8, 5],
       [7, 4, 2],
       [3, 7, 2],
       [2, 2, 9]])

## Iterate by row

In [23]:
for row in test:
    print(row)

[8 8 5]
[7 4 2]
[3 7 2]
[2 2 9]


## Iterate by index

In [24]:
for i in range(len(test)):
    print(test[i])

[8 8 5]
[7 4 2]
[3 7 2]
[2 2 9]


## Iterate by row and index

In [25]:
for i, row in enumerate(test):
    print('row', i, 'is', row)

row 0 is [8 8 5]
row 1 is [7 4 2]
row 2 is [3 7 2]
row 3 is [2 2 9]


## Use `zip` to iterate over multiple iterables

In [26]:
test2 = test**2
test2

array([[64, 64, 25],
       [49, 16,  4],
       [ 9, 49,  4],
       [ 4,  4, 81]])

In [27]:
for i, j in zip(test, test2):
    print(i,'+',j,'=',i+j)

[8 8 5] + [64 64 25] = [72 72 30]
[7 4 2] + [49 16  4] = [56 20  6]
[3 7 2] + [ 9 49  4] = [12 56  6]
[2 2 9] + [ 4  4 81] = [ 6  6 90]


In [28]:
for data in test.flatten(order = 'F'):
    print(data)

8
7
3
2
8
4
7
2
5
2
2
9


In [29]:
for i in np.nditer(test):
    print(i)

8
8
5
7
4
2
3
7
2
2
2
9


In [30]:
for i in np.nditer(test, order='F'):
    print(i)

8
7
3
2
8
4
7
2
5
2
2
9


In [31]:
for i in np.nditer(test, order='F', flags = ['external_loop']):
    print(i)

[8 7 3 2]
[8 4 7 2]
[5 2 2 9]


In [32]:
# modify element while iterater
for array in np.nditer(test, op_flags = ['readwrite']):
    array[...] = array * array
test

array([[64, 64, 25],
       [49, 16,  4],
       [ 9, 49,  4],
       [ 4,  4, 81]])

# Searching

## `where()`
The `where()` function will return elements from an array that satisfy a certain condition. 

In [33]:
grades = np.array([1, 3, 4, 2, 5, 5])
np.where(grades > 3)

(array([2, 4, 5], dtype=int64),)

you can provide two additional parameters:
* the first one will replace values that satisfy the given condition
* the second one will, of course, replace those that don’t satisfy the condition

In [34]:
np.where(grades > 3, 'gt3', 'lt3')

array(['lt3', 'lt3', 'gt3', 'lt3', 'gt3', 'gt3'], dtype='<U3')

## intersect1d()
What `intersect1d()` function will do is, it will return the intersection of 2 arrays — meaning the items that are common in both arrays.

In [35]:
arr1 = np.array([3, 1, 2, 5, 5])
arr2 = np.array([3, 4, 4, 2, 4])
np.intersect1d(arr1, arr2)

array([2, 3])

## np.nonzero()
**Goal**: find elements that meet a certain condition in a NumPy array

The result is a tuple of two NumPy arrays. The first array gives the  row indices of non-zero elements. The second array gives the column indices of non-zero elements.

In [1]:
# Understanding np.nonzero()
import numpy as np
X = np.array([[1, 0, 0],
              [0, 2, 2],
              [3, 0, 0]])
print(np.nonzero(X))


(array([0, 1, 1, 2], dtype=int64), array([0, 1, 2, 0], dtype=int64))


Use np.nonzero() and broadcasting to find elements

In [2]:
import numpy as np
## Data: air quality index AQI data (row = city)
X = np.array(
            [[ 42, 40, 41, 43, 44, 43 ], # Hong Kong
             [ 30, 31, 29, 29, 29, 30 ], # New York
             [ 8, 13, 31, 11, 11, 9 ], # Berlin
             [ 11, 11, 12, 13, 11, 12 ]]) # Montreal
cities = np.array(["Hong Kong", "New York", "Berlin",
"Montreal"])
# Find cities with above average pollution
polluted = set(cities[np.nonzero(X > np.average(X))[0]])
print(polluted)


{'New York', 'Hong Kong', 'Berlin'}


The Boolean expression `X > np.average(X)` uses broadcasting to bring both operands to the same shape. Then it performs an 
element-wise comparison to determine a Boolean array that contains `True` if the respective measurement observed an above average AQI value. The function `np.average()` computes the average AQI value over all NumPy array elements. Boolean indexing accesses all city rows with above average pollution values.