# Numpy

### np.array vs. List

Numpy is one of the basic packages for scientific computing in Python. Facilitates mathematical operations on large numbers of data, is more efficient and less code is needed than for built-in sequences (lists).  

For more specific information visit the [link](https://www.geeksforgeeks.org/python-lists-vs-numpy-arrays/)

In [1]:
import numpy as np

In [2]:
a = np.array([1,4,8])

In [3]:
a[1]

4

In [4]:
a[::2]

array([1, 8])

In [5]:
a

array([1, 4, 8])

In [7]:
b = np.array([[1,23],[1,4]])

In [8]:
b

array([[ 1, 23],
       [ 1,  4]])

In [9]:
c = np.array([(1,23),(1,4)])

In [10]:
c

array([[ 1, 23],
       [ 1,  4]])

In [11]:
b == c

array([[ True,  True],
       [ True,  True]])

In [13]:
np.zeros((4,1)) #filas,columnas

array([[0.],
       [0.],
       [0.],
       [0.]])

In [14]:
np.zeros((1,4))

array([[0., 0., 0., 0.]])

In [35]:
np.ones((2,3))

array([[1., 1., 1.],
       [1., 1., 1.]])

In [21]:
d = np.ones((2,3,4)) #2 rows, 3 columns, 4 depth
d

In [28]:
d[1][1][1] = 2 #row column depth

In [30]:
d[0][0][0] = 3

In [33]:
d[0][1][0] = 45

In [36]:
d[0][2][3] = 8

In [48]:
d[0,2,3]

8.0

In [38]:
d.shape

(2, 3, 4)

In [39]:
d.max()

45.0

**WARNING**: this returns number of dimensions (nothing to do with number of columns but with the length of shape)

In [42]:
d.ndim 

3

In [43]:
len(d.shape)

3

In [45]:
d.size == 2*3*4

True

In [46]:
d.dtype

dtype('float64')

In [47]:
d.astype('int')

array([[[ 3,  1,  1,  1],
        [45,  1,  1,  1],
        [ 1,  1,  1,  8]],

       [[ 1,  1,  1,  1],
        [ 1,  2,  1,  1],
        [ 1,  1,  1,  1]]])

## Mathematical operations

In [51]:
#subtraction
a = np.array([(1,2,3),(8,30,14)])
b = np.array([(3,4,1),(2,10,3)])
print(a-b)
np.subtract(a,b)

[[-2 -2  2]
 [ 6 20 11]]


array([[-2, -2,  2],
       [ 6, 20, 11]])

In [52]:
#addition
print(a+b)
np.add(a,b)

[[ 4  6  4]
 [10 40 17]]


array([[ 4,  6,  4],
       [10, 40, 17]])

In [53]:
#division
print(a/b)
np.divide(a,b)

[[0.33333333 0.5        3.        ]
 [4.         3.         4.66666667]]


array([[0.33333333, 0.5       , 3.        ],
       [4.        , 3.        , 4.66666667]])

In [62]:
#multiplication
print(a*b)
np.multiply(a,b)

#proper matrix multiplication (dot product)
a_2 = a.reshape((3,2)) #careful here with b.resize(3,2) as this disrupts b
b_2 = b.reshape((3,2))
a.dot(b_2) #not
b.dot(a_2) #equal

[[  3   8   3]
 [ 16 300  42]]


array([[ 35,  17],
       [194, 134]])

In [76]:
## Aggregation functions
a.sum() 
a.min() 
a.max(axis = 0) #column wise if 1, rows
print(b)
b.cumsum()
a.mean()
np.median(b)
np.corrcoef(a) #correlation coefficient matrix
np.std(a)

[[ 3  4  1]
 [ 2 10  3]]


10.110500592068734

## Manipulating Arrays

In [94]:
i = np.transpose(b)
print(i)
z = b.reshape(3,2)
z

[[ 3  2]
 [ 4 10]
 [ 1  3]]


array([[ 3,  4],
       [ 1,  2],
       [10,  3]])

In [95]:
np.reshape(b,(3,2)) #change shape (as commented before, watch it when using resize)

array([[ 3,  4],
       [ 1,  2],
       [10,  3]])

In [101]:
print(np.ravel(b))
b.ravel().reshape((3,2))

[ 3  4  1  2 10  3]


array([[ 3,  4],
       [ 1,  2],
       [10,  3]])

In [105]:
np.append(a,b).reshape(4,3)

array([[ 1,  2,  3],
       [ 8, 30, 14],
       [ 3,  4,  1],
       [ 2, 10,  3]])

In [108]:
np.concatenate((a,b),axis = 0) #row-wise

array([[ 1,  2,  3],
       [ 8, 30, 14],
       [ 3,  4,  1],
       [ 2, 10,  3]])

In [115]:
np.vstack((a,b)) #vertically 

array([[ 1,  2,  3],
       [ 8, 30, 14],
       [ 3,  4,  1],
       [ 2, 10,  3]])

In [119]:
np.r_[a,b]

array([[ 1,  2,  3],
       [ 8, 30, 14],
       [ 3,  4,  1],
       [ 2, 10,  3]])

In [112]:
print(np.insert(a,3,25)) #insert at index(3) value(25)
np.delete(np.insert(a,3,25),[3]) #delete at position

[ 1  2  3 25  8 30 14]


array([ 1,  2,  3,  8, 30, 14])

In [118]:
np.concatenate((a,b),axis = 1) #column-wise

array([[ 1,  2,  3,  3,  4,  1],
       [ 8, 30, 14,  2, 10,  3]])

In [117]:
np.hstack((a,b)) #horizontally

array([[ 1,  2,  3,  3,  4,  1],
       [ 8, 30, 14,  2, 10,  3]])

## Useful functions
TBC (to be completed)

In [120]:
np.where(a < 5, 99999, a-8) #ifelse in R

array([[99999, 99999, 99999],
       [    0,    22,     6]])

In [124]:
np.linspace(start = 1,stop = 10,num = 11) #array of 11 values from 1 to 10 same separation between them

array([ 1. ,  1.9,  2.8,  3.7,  4.6,  5.5,  6.4,  7.3,  8.2,  9.1, 10. ])

In [128]:
np.random.seed(20) #to be able to reproduce results

x = np.random.randint(low = 0,high = 10,size = 5) #high is exclusive
print(x)

bins = np.array([2,4,6,8,10]) #bins created

np.digitize(x,bins)#which bin it belongs to

[3 9 4 6 7]


array([1, 4, 2, 3, 3], dtype=int64)

In [129]:
np.repeat('6',6)

array(['6', '6', '6', '6', '6', '6'], dtype='<U1')

In [142]:
np.random.choice([0,1], size = 4, p = [0.8,0.2]) #default size = 1

array([0, 0, 0, 0])

In [149]:
np.random.seed(4)
np.array(np.random.randint(low=0,high=100,size=20)).reshape((4,5)) 

array([[46, 55, 69,  1, 87],
       [72, 50,  9, 58, 94],
       [55, 55, 57, 36, 50],
       [44, 38, 52,  3,  0]])

In [152]:
np.random.seed(4)
np.argmax(np.array(np.random.randint(low=0,high=100,size=20)).reshape((4,5)), axis = 0) #returns position of by row

array([1, 0, 0, 1, 1], dtype=int64)