# Intro to NumPy: my own experimentation

In [3]:
import numpy as np

In [4]:
x = np.array([1, 2, 3, 4, 5])

In [8]:
print(x)
print(x.shape)
print(type(x))
print(x.dtype)

[1 2 3 4 5]
(5,)
<class 'numpy.ndarray'>
int32


In [7]:
y = np.array([ [1,2,3], [4,5,6], [7,8,9] ])

In [9]:
print(y)
print(y.shape)
print(type(y))
print(y.dtype)

[[1 2 3]
 [4 5 6]
 [7 8 9]]
(3, 3)
<class 'numpy.ndarray'>
int32


In [10]:
np.ones((6,9))

array([[1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1.]])

In [11]:
np.ones((6,9), dtype = int)

array([[1, 1, 1, 1, 1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1, 1, 1, 1, 1]])

In [13]:
np.zeros((2,5))

array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

In [14]:
np.zeros((2,5), dtype = int)

array([[0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0]])

In [15]:
np.full((4,6), 7)

array([[7, 7, 7, 7, 7, 7],
       [7, 7, 7, 7, 7, 7],
       [7, 7, 7, 7, 7, 7],
       [7, 7, 7, 7, 7, 7]])

# - Matrix

A **matrix** is a **2** dimensional array, with size = row x column.

## Example
> `np.array( [ [1,2,3], [4,5,6], [7,8,9] ] )`

# - Identity matrix (famous in linear algebra)

## Definition
> An identity matrix is a square-shaped 2-dimensional matrix, with only 1s along its main diagonal and 0s everywhere else.

### Below, see an example of identity matrix: a 5x5 identity matrix


In [18]:
I = np.eye(5)
print(I)

[[1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 1.]]


# - Diagonal Matrix

## Definition
> Square-shaped 2-dimensional matrix with numbers along its main diagonal and 0s everywhere else.

### Below, see an example of a 5x5 (2D) diagonal matrix

In [19]:
D = np.diag( [10, 20, 30, 40, 50] )
print(D)

[[10  0  0  0  0]
 [ 0 20  0  0  0]
 [ 0  0 30  0  0]
 [ 0  0  0 40  0]
 [ 0  0  0  0 50]]


# - Numpy's arange()

> Arguments
`np.arange(start, stop, step)`

## Note: 
- stop is exclusive.

- you can specify 1 argument (will be the stop arg, with default start = 0, default step = 1)
- you can specify 2 arguments (will be start and stop, with default step = 1)
- you can specify all 3 arguments. 


In [22]:
c = np.arange(1, 10, 2)
print(c)

[1 3 5 7 9]


In [24]:
d = np.arange(2, 19)
print(d)

[ 2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18]


In [25]:
e = np.arange(10)
print(e)

[0 1 2 3 4 5 6 7 8 9]


# - np.linspace()

> Arguments
`np.linspace(start, stop, n)`

# n
> n is the number of evenly spaced numbers wanted in the range from start to end 
>> NOT EQUAL TO THE STEP, step is the distance between 2 consecutive numbers e.g. in np.arange()

## Note:

- start AND stop are inclusive, *unlike np.arange() where stop was exclusive*.

- requires AT LEAST **2** arguments, *unlike np.arange() where one was enough*.

- if not specified, n = 50


In [32]:
z = np.linspace(0, 25, 10)  # we want 10 evenly spaced numbers in the range of 0 (inclusive) to 25 (inclusive!!)
print(z)

[ 0.          2.77777778  5.55555556  8.33333333 11.11111111 13.88888889
 16.66666667 19.44444444 22.22222222 25.        ]


## Additional argument to np.linspace()

If you set the argument `endpoint = False` in np.linespace, the end is **excluded**

### Below, see an example

In [33]:
u = np.linspace(0, 25, 10, endpoint=False)
print(u)

[ 0.   2.5  5.   7.5 10.  12.5 15.  17.5 20.  22.5]


> Above, we have the 10 evenly spaced numbers in the range from 0 (inclusive) to 25 (**exclusive**).

> Because the range is now from 0 to 24 inclusive, the spacing between the values changed.

# - np.reshape()

In [35]:
x = np.arange(0, 20)
print(x)

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19]


**Above**, we have a 1D array of shape (20,)

In [38]:
x = np.reshape(x, (4,5))   # the new shape must be compatible with the number of elements in x (= 20)
print(x)

[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]
 [15 16 17 18 19]]


**NOW, above**, we have a 2D array of shape (4,5)

> other shapes of 2D arrays compatible with a 1D 20-elements array:

- (5,4)
- (2,10)
- (10,2)

# - NumPy Array Methods


In [40]:
x = np.arange(20).reshape((4,5))  # same result as above
print(x)

[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]
 [15 16 17 18 19]]


In [41]:
y = np.linspace(0, 20, 12).reshape((4,3))
print(y)

[[ 0.          1.81818182  3.63636364]
 [ 5.45454545  7.27272727  9.09090909]
 [10.90909091 12.72727273 14.54545455]
 [16.36363636 18.18181818 20.        ]]


# - NumPy arrays containing RANDOM numbers

## Example below: the random function from the random module of NumPy
>  argument = shape of the array

> array of precised shape contains random numbers between 0 (inclusive) and 1 (exclusive).

In [44]:
x = np.random.random((3,3))
print(x)

[[0.36387396 0.33132398 0.43117825]
 [0.54073685 0.84581968 0.42326099]
 [0.86684129 0.79395085 0.79357527]]


## Example below: the randint function from the random module of NumPy
> arguments: lower bound (inclusive), upper bound (exclusive), shape

> array of precised shape contains random numbers in the specified range.

In [45]:
y = np.random.randint(0, 20, (6,5))
print(y)

[[ 4  1  2 10 19]
 [18  4 16 15  4]
 [ 2  3  1  2  7]
 [ 0  1  4  1 13]
 [19 10 18 15 10]
 [15 18  0 12  8]]


## Example: random numbers drawn from proba distributions

## example below: np.random.normal()

> arguments: mean, sd, size

In [46]:
x = np.random.normal(0, 0.1, size = (10,10))
print(x)

[[ 0.17322112  0.01164643 -0.0026265   0.08681715  0.11890425  0.08787981
   0.00775729 -0.01916029 -0.00084569  0.24323747]
 [ 0.07355259 -0.002948    0.06348196 -0.11852829 -0.08291985 -0.11611011
   0.00411207 -0.09170368 -0.22326352  0.00989628]
 [-0.04251899 -0.13731673 -0.13696643  0.11538785 -0.0325887   0.11366428
  -0.08886433  0.1064712   0.0524944  -0.09032784]
 [-0.11002988  0.0576105   0.03044997  0.12475972 -0.03217446  0.07890123
   0.11137951 -0.02308261  0.11941518 -0.04069002]
 [-0.14833514  0.28370439  0.03371187  0.01324893  0.02360053  0.08247595
  -0.0851846   0.13069349  0.18005494  0.04766699]
 [ 0.06121967  0.07533053  0.08080712  0.05443705 -0.03234217  0.10117898
   0.01741659  0.01522982 -0.0003409  -0.11828191]
 [-0.01454749 -0.00415636 -0.00391867 -0.13329557 -0.06534528  0.09391278
   0.012861    0.0133971   0.04214782  0.05930479]
 [-0.10804982  0.23170703 -0.00051726 -0.08307304 -0.17277291  0.02547599
  -0.11778344 -0.14922735  0.05004801 -0.15172855]


In [48]:
print(x.mean())  # very close to 0, argument passed to np.random.normal() above
print(x.std())   # very close to 0.1, argument passed to np.random.normal() above
print(x.max())
print(x.min())  # min and max are approx. symmetrical about mean of x, so that also satisfies another statistical property.

0.012563806779865077
0.09662513191437738
0.28370438538714593
-0.2232635190558755


# - Idexing, accesing, changing elements in ndarrays

In [49]:
x = np.array( [1, 2, 3, 4, 5])
print(x)

[1 2 3 4 5]


In [50]:
print(x[3]) # accessing x's FOURTH element (ndarrays have 0-indexing, like lists in plain python)

4


**Note: ndarrays are MUTABLE.**
> See below:

In [52]:
x[2] = 31 # changing x's THIRD element from 3 to 31
print(x)

[ 1  2 31  4  5]


In [56]:
y = np.linspace(2, 20, 10, dtype=int).reshape((5,2))
print(y)

[[ 2  4]
 [ 6  8]
 [10 12]
 [14 16]
 [18 20]]


In [57]:
print(y[(2, 1)])  # accessing the element in the 3rd row and 2nd column = 12

12


In [58]:
y[(2,1)] = 34    # changing the value of the element in the 3rd row and 2nd column, from 12 TO 34
print(y)

[[ 2  4]
 [ 6  8]
 [10 34]
 [14 16]
 [18 20]]


# - Delete elements from ndarrays

## for rank 2 arrays:

**additional argument** to specify: **axis**
> axis = 0 for selecting rows 
> axis = 1 for selecting columns

In [59]:
x = np.array( [1,2,3,4,5] )
print(x)

[1 2 3 4 5]


In [60]:
x = np.delete(x, [0, 4])  # deleting the FIRST and the FIFTH/LAST elements of x
print(x)

[2 3 4]


In [61]:
Y = np.arange(1,10).reshape((3,3))
print(Y)

[[1 2 3]
 [4 5 6]
 [7 8 9]]


In [62]:
W = np.delete(Y, 0, axis=0)  # deleting FIRST ROW of Y
print(W)

[[4 5 6]
 [7 8 9]]


In [63]:
V = np.delete(Y, 1, axis=1)  # deleting the SECOND COLUMN of Y
print(V)

[[1 3]
 [4 6]
 [7 9]]


In [64]:
B = np.delete(Y, [0,1], axis=1)  # deleting the first AND second COLUMNS of Y
print(B)

[[3]
 [6]
 [9]]


# - Add values to ndarrays

## The append function

### in rank 1 arrays:

In [65]:
x = np.array( [1,2,3,4,5] )
print(x)

[1 2 3 4 5]


In [66]:
x = np.append(x, 6)  # appending 6 to rank 1 array x
print(x)

[1 2 3 4 5 6]


In [67]:
x = np.append(x, [7,8])  # appending 7 AND 8 to rank 1 array x, using a list [7,8]
print(x)

[1 2 3 4 5 6 7 8]


### in rank 2 arrays:

In [71]:
y = np.arange(1,10).reshape(3,3)
print(y)

[[1 2 3]
 [4 5 6]
 [7 8 9]]


In [74]:
y = np.append(y, [ [10, 11, 12] ], axis=0)   # appending [10 11 12], new row, to rank 2 array y (along the axis of rows, 0)
print(y)

[[ 1  2  3]
 [ 4  5  6]
 [ 7  8  9]
 [10 11 12]]


In [77]:
y = np.append(y, [ [-1], [-2], [-3], [-4] ], axis=1)  # appending a new column to rank 2 array y (along the axis of columns, 1)
print(y)

[[ 1  2  3 -1]
 [ 4  5  6 -2]
 [ 7  8  9 -3]
 [10 11 12 -4]]


# - Insert values to ndarrays (in between other values, not to the end)

## in rank 1 arrays:

In [80]:
x = np.array( [1, 2, 5, 6] )
print(x)

[1 2 5 6]


In [81]:
x = np.insert(x, 2, [3,4])  # inserting 3 and 4, with starting index = 2 
print(x)

[1 2 3 4 5 6]


## in rank 2 arrays: (inserting an entire row or column)

In [82]:
y = np.array( [ [1,2,3], [7,8,9] ] )
print(y)

[[1 2 3]
 [7 8 9]]


In [84]:
y = np.insert(y, 1, [ [4,5,6] ], axis=0)  # inserting a new row as the second row (index = 1)
print(y)

[[1 2 3]
 [4 5 6]
 [4 5 6]
 [7 8 9]]


In [85]:
y = np.insert(y, 2, 5, axis=1)  # inserts a new colum full of 5, as the third column of the rank 2 4x3 array y
print(y)

[[1 2 5 3]
 [4 5 5 6]
 [4 5 5 6]
 [7 8 5 9]]


# - Stack NumPy arrays

**condition: the shape of the arrays we want to stack MUST be IDENTICAL.**

## On top of each other (vertical stacking):

In [87]:
a = np.array([1,2,3,4,5])
b = np.array([6,7,8,9,10])

I = np.vstack((a,b))
print(I)

[[ 1  2  3  4  5]
 [ 6  7  8  9 10]]


In [90]:
c = np.array([2,7,8])

d = np.arange(0,6).reshape(2,3)

J = np.vstack((c,d))
print(J)

[[2 7 8]
 [0 1 2]
 [3 4 5]]


## Side by side (horizontal stacking):

In [93]:
M = np.hstack((a,b))
print(M)

[ 1  2  3  4  5  6  7  8  9 10]


In [94]:
e = np.arange(0,9).reshape(3,3)

f = np.arange(10, 19).reshape(3,3)

N = np.hstack((e,f))
print(N)

[[ 0  1  2 10 11 12]
 [ 3  4  5 13 14 15]
 [ 6  7  8 16 17 18]]


# - Slicing ndarrays

> Access subsets of NumPy arrays with **slicing**, instead of accessing one element at a time.

> 3 ways of slicing:

|ndarray[start:end]  | specified start and specified end indexes                                        |
|--------------------|----------------------------------------------------------------------------------|                      |ndarray[start:]     | no specified end, so default end = the index of the last value of the array      |
|--------------------|----------------------------------------------------------------------------------|                       | ndarray[:end]      | no specified start, so default start = the index of the first value of the array |

> ending index is always exclusive.
          

In [95]:
x = np.arange(1, 21).reshape(4,5)
print(x)

[[ 1  2  3  4  5]
 [ 6  7  8  9 10]
 [11 12 13 14 15]
 [16 17 18 19 20]]


In [96]:
z = x[2:, 1:4]     # x[rows, columns]
print(z)

[[12 13 14]
 [17 18 19]]


In [99]:
w = x[1, :]   # selects the entire second row of x
print(w)

[ 6  7  8  9 10]


# IMPORTANT:

## - z and w are now "views of x": they aren't new variables that store one part of x.
## - SO making changes to w or z will ALSO change the corresponding parts in x !

In [102]:
# see what z is above

z[1, 1] = 529  # changing 18 to 529 in z
print(z, '\n')

print(x, '\n')

[[ 12  13  14]
 [ 17 529  19]] 

[[  1   2   3   4   5]
 [  6   7   8   9  10]
 [ 11  12  13  14  15]
 [ 16  17 529  19  20]] 



## Above:

you can see that the change in z also occured in x.

## > Creating NEW arrays (not views) that are slices of another array
### Using NumPy's copy function/method

In [103]:
x = np.arange(1,21).reshape(4,5)
print(x)

[[ 1  2  3  4  5]
 [ 6  7  8  9 10]
 [11 12 13 14 15]
 [16 17 18 19 20]]


In [107]:
z = np.copy(x[2:, 1:4])   # using NumPy's copy function: can also write z = x[2:, 1:4].copy() -> AS A METHOD
print(z)

[[12 13 14]
 [17 18 19]]


In [108]:
z[1, 2] = 281
print(z, '\n')

print(x, '\n')  # x IS NOT AFFECTED BY THE CHANGE IN Z NOW

[[ 12  13  14]
 [ 17  18 281]] 

[[ 1  2  3  4  5]
 [ 6  7  8  9 10]
 [11 12 13 14 15]
 [16 17 18 19 20]] 



> Using np.copy(), we have created an array 'z' completely independent of x.

# - Extracting values along the main diagonal of an ndarray

In [109]:
x = np.arange(1,21).reshape(4,5)
print(x)

[[ 1  2  3  4  5]
 [ 6  7  8  9 10]
 [11 12 13 14 15]
 [16 17 18 19 20]]


In [111]:
z = np.copy(np.diag(x))   # extracts diagonal values of array x, z being a new array independent of x
print(z)

[ 1  7 13 19]


# - Extracting values ABOVE the main diagonal of an ndarray

print(x, '\n')
print('Diagonal of x is:', np.diag(x), '\n')

j = np.diag(x, k = 1)   # by default, k = 0 : np.diag(x) = np.diag(x, k=0) and gives back the main diagonal of x
print(j, '\n')

# - Extracting the values BELOW the main diagonal of an ndarray

In [116]:
print(x, '\n')
print('Diagonal of x is:', np.diag(x), '\n')

f = np.diag(x, k = -1)   # by default, k = 0 : np.diag(x) = np.diag(x, k=0) and gives back the main diagonal of x
print(f, '\n')

[[ 1  2  3  4  5]
 [ 6  7  8  9 10]
 [11 12 13 14 15]
 [16 17 18 19 20]] 

Diagonal of x is: [ 1  7 13 19] 

[ 6 12 18] 



# - Extracting the unique elements in an ndarray

> Using NumPy's unique function

In [117]:
x = np.array( [ [1,2,3], [3,2,9], [2,7,9] ] )
print(x)

[[1 2 3]
 [3 2 9]
 [2 7 9]]


In [119]:
print(np.unique(x))   # prints the unique values of the rank 2 3x3 array x

[1 2 3 7 9]


# - Boolean Indexing

## Why:
> for selecting elements in an array using logical arguments (if element satisfies a condition) instead of explicit indices.
> useful when we don't know the indices of the elements we want (e.g. in 1000x1000 arrays) and we want a specific subset of elements that satisfy a specific condition (e.g. elements larger than 10)


In [120]:
x = np.arange(0,25).reshape(5,5)
print(x)

[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]
 [15 16 17 18 19]
 [20 21 22 23 24]]


Select elements greater than 10:

In [122]:
print(x[x > 10])   # instead of indices, using a boolean expression for selecting a specific subset of elements in array x

[11 12 13 14 15 16 17 18 19 20 21 22 23 24]


Other example:

In [126]:
print(x[(x > 10) & (x < 17)])  # don't write 'and' + keep () in both sides of & : to make the code work as expected

[11 12 13 14 15 16]


Changing the values of a specific subset of elements of the array x using boolean indexing:

In [129]:
x[x > 7] = -15
print(x)

[[  0   1   2   3   4]
 [  5   6   7 -15 -15]
 [-15 -15 -15 -15 -15]
 [-15 -15 -15 -15 -15]
 [-15 -15 -15 -15 -15]]


# - Set operations