<a href="https://colab.research.google.com/github/araldi/Python_for_biomedical_data_analysis/blob/main/04_Intro_to_NumPy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# NumPy

NumPy’s main object is the homogeneous multidimensional array. It is a table of elements (usually numbers), all of the same type, indexed by a tuple of non-negative integers. In NumPy dimensions are called axes.

## Why NumPy?



*   **More speed**: NumPy uses algorithms written in C that complete in nanoseconds rather than seconds. 
*   **Fewer loops**: NumPy helps you to reduce loops and keep from getting tangled up in iteration indices.
* **Clearer code**: Without loops, your code will look more like the equations you’re trying to calculate.

* Better quality: There are thousands of contributors working to keep NumPy fast, friendly, and bug free.

In [None]:
# Always initialize the environment by importing the libraries you need
import numpy as np

### Manually create NumPy arrays

In [None]:
# create a NumPy array
a = np.array([6, 7, 8])
a

In [None]:
type(a)

In [None]:
a.ndim # dimensions of the array

In [None]:
a.shape # shape of the array

In [None]:
a.dtype # datatype of the array

In [None]:
a.size  # number of elements in the array

In [None]:
# change an element of the array
a[0] = 0

In [None]:
b = np.array([[1,2,3],[4,5,6]])   # Create a rank 2 array
print(b)

In [None]:
b.ndim

In [None]:
b.shape

In [None]:
c = np.array([[[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]],

       [[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]]])

In [None]:
c.shape

In [None]:
c.ndim

In [None]:
#find (or change, an element of the array)
print(b[0, 0], b[0, 1], b[1, 0])  
# the coordinates are given as [row_index, column_index]

b[0, 0] = 5
b[0, 1] = 3
b[1, 0] = 0

print(b[0, 0], b[0, 1], b[1, 0])  

### Use NumPy functions to create arrays

In [None]:
# one dimensional array with numbers from 0 to n
n = 6
np.arange(n)

In [None]:
# evenly spaced numbers over a specified interval
np.linspace(3,20,6) # start, end, number of evenly distributed element in interval

In [None]:
# change the shape of an array 
# in this case we make a 2,3 array from a linear array
np.linspace(3,20,6).reshape(2,3)


In [None]:
# change the type of number in the array with dtype
np.linspace(3,20,6, dtype=int).reshape(2,3)


In [None]:
# numbers spaced evenly on a log scale

np.logspace(1, 10, 10, base=10)

In [None]:
a = np.zeros((2,2))  # Create an array of all zeros
a

In [None]:
# create an array with 3 dimensions of zeros
i =2
j=3
k=4
b = np.zeros((i,j,k))

In [None]:
b

In [None]:
c = np.ones((1,2))   # Create an array of all ones
c

In [None]:
c.shape

In [None]:
d = np.full((2,2), 7) # Create a constant array
d


In [None]:
# Create an array filled with uniform random values from 0 to 1
e = np.random.random((2,2)) 
e

In [None]:
# Create an 7-value long array filled with random values
e = np.random.random(7) 
e

In [None]:
# Create an array with random uniform float numbers  from a specific range

np.random.uniform(-2,2, 7)

In [None]:
# standard normal distributed random numbers
np.random.randn(5,5)

In [None]:
f = np.random.randint(50, 100, (5,4)) # Create an array filled with random integers in a range

f

### Array indexing

#### Integer indexing

In [None]:
a = np.array([[1,2,3], [4,5,6], [7,8,9]])

a

In [None]:
# access the middle row of the array
a[[1],:]

In [None]:
# access the last row of the array
a[a.shape[0]-1,:]

In [None]:
# access the last column of the array
a[:,a.shape[1]-1]

#### Integer array indexing

In [None]:
print(a[[0, 1, 2], [0, 1, 0]]) # row coordinates first, the column coordinates

# which is equivalent to:
print(np.array([a[0, 0], a[1, 1], a[2, 0]]))

In [None]:
# take the elements in the diagonal of a
print(a[np.arange(a.shape[0]), np.arange(a.shape[1])])

#### Boolean array indexing

In [None]:
a

In [None]:

bool_idx = (a > 5)  # Find the elements of a that are bigger than 2;
                    # this returns a numpy array of Booleans of the same
                    # shape as a, where each slot of bool_idx tells
                    # whether that element of a is > 2.

print(bool_idx)

In [None]:
a[bool_idx]

In [None]:
a[a>5]

### Mathematical operations on arrays

In [None]:
x = np.array([[1,2],[3,4]], dtype=np.float64)

x

In [None]:
# Sum of a constant

y = x + 2 

y

In [None]:
# Elementwise sum

z = np.add(x, y)

z

In [None]:
# Difference of a constant
z - 4


In [None]:
# Elementwise difference

np.subtract(z, x)

In [None]:
# Elementwise product
print(x * y)
print(np.multiply(x, y))

In [None]:
# Elementwise division
print(x / y)
print(np.divide(x, y))

In [None]:
# Elementwise square root
print(np.sqrt(x))

In [None]:
# Elementwise power (this is like x^y for every element of x and y)

np.power(x, y)

In [None]:
np.power(x, 3)

In [None]:
# Calculate the sum of every element in the array, or by axis
a = np.array([[1,2,3], [4,5,6], [7,8,9]])


print(np.sum(a))  # Compute sum of all elements; 
print(np.sum(a, axis=0))  # Compute sum of each column
print(np.sum(a, axis=1))  # Compute sum of each row


In [None]:
# find the maximum value

print(np.max(a))  # Compute maximum of all elements; 
print(np.max(a, axis=0))  # Compute maximum of each column
print(np.max(a, axis=1))  # Compute maximum of each row


In [None]:
# find the mean 
print(np.mean(a))
print(np.mean(a, axis=0))
print(np.mean(a, axis=1))

In [None]:
# find the standard deviation 
print(np.std(a))
print(np.std(a, axis=0))
print(np.std(a, axis=1))

In [None]:
# np.around() evenly rounds to the given number of decimals.
f = np.random.uniform(-2,2, 7)

print(f)
print(np.around(f))

In [None]:
print(np.around(f, 2))

In [None]:
# np.floor() returns the floor of the input, element-wise.

print(np.floor(f))

In [None]:
# np.ceil() returns the ceiling of the input, element-wise.
print(np.ceil(f))

In [None]:
# Clip (limit) the values in an array to the defined min and max
g = np.random.uniform(-1,2, 7)
print(g)

In [None]:
min = 0
max =1
print(np.clip(g, min, max))

#### Operations on specific positions of the array

In [None]:
# select the positions (with array indexing/slicing/etc), then perform the operation
a[np.arange(a.shape[0]), np.arange(a.shape[1])] += 10

# which is equivalent to:
# a[np.arange(a.shape[0]), np.arange(a.shape[1])] = a[np.arange(a.shape[0]), np.arange(a.shape[1])] + 10

In [None]:
a

In [None]:
a.shape

In [None]:
a.shape[0]

In [None]:
np.arange(a.shape[0])

In [None]:
np.arange(a.shape[1])

In [None]:
a[np.arange(a.shape[0]), np.arange(a.shape[1])] += 10


In [None]:
a


## Pandas columns as NumPy arrays


In [None]:
import pandas as pd

#import the file
diamonds = pd.read_csv('https://raw.githubusercontent.com/mwaskom/seaborn-data/master/diamonds.csv')

In [None]:
diamonds

In [None]:
# calculate the mean of a column of the dataframe, in this case, price
np.mean(diamonds['price'])

In [None]:
np.mean(diamonds['depth'])

In [None]:
diamonds.describe()

In [None]:
diamonds.dtypes

In [None]:
# create a new column which is the sum the X, Y, Z coordinates

diamonds['new'] = np.sum(diamonds[['x','y','z']], axis =1)

diamonds['new']

In [None]:
np.sum(diamonds[['x','y','z']], axis =0)

# Exercises

#### Exercise 1

Calculate the power of 4 of numbers from 1 to 10

#### Exercise 2

Replace all odd numbers in the given array with 0

In [None]:
exercise_2 = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])


#### Exercise 3


With NumPy, find which numbers between 10000 and 10000000 are multiples of 77.

Do the same without NumPy

When finished, rerun the cells adding

```
%%time
```
in the first line. You can see that NumPy was considerably faster.


#### Exercise 4

Create a monodimensional array with 100000 random uniform floats from a standard normal distribution and calculate the mean and standard deviation

#### Exercise 5

Create an array of random two-dimensional shape (axes size between 1 and 10)populated by random floats from 0 to 1.
