In [1]:
# numpy and scipy for scientific computations

# perform mathematical computations in a grid (matrix)
# numpy has many builtin functions to manipulate data

import numpy as np
import scipy

In [2]:
# Array examples:
# 1D array is similar to a list
array_1d = np.array([1,2,3,4,5,6])
print(array_1d)
print(type(array_1d))

[1 2 3 4 5 6]
<class 'numpy.ndarray'>


In [3]:
# 2D array
array_2d = np.array([['a','b','c'],[1,2,3]])
print(array_2d)

# type
print(type(array_2d))

[['a' 'b' 'c']
 ['1' '2' '3']]
<class 'numpy.ndarray'>


In [4]:
# shape - a tuple (no of rows, no of columns)
print(array_1d.shape)
print(array_2d.shape)

(6,)
(2, 3)


In [5]:
# indexing - array index starts with '0' similar to Lists
print(array_1d[0])

# incase of multi dimensional arrays, we can slice and dice to get any element we want
# it works like, array [i] [j]

# this will print the first row of the numpy array
print(array_2d[0])

# this will print the elements in the 0th row and 1st column
print(array_2d[0][1])

# this will print the elements in the 1st row and 2nd column
print(array_2d[1][2])

1
['a' 'b' 'c']
b
3


In [6]:
# similar to lists we can use : to get a subset of the numpy array
print(array_2d)

print(array_2d[1:])

print(array_2d[:, 1:])

print(array_2d[1:,2:])

[['a' 'b' 'c']
 ['1' '2' '3']]
[['1' '2' '3']]
[['b' 'c']
 ['2' '3']]
[['3']]


In [7]:
# integer arrays
array1_2d = np.array([[1,2,3],[4,5,6],[7,8,9]])
print(array1_2d)
# if we want to get elements [0,0] , [1,1], [2,2]
# instead of using there 3 separate index statements, we use an array of integers to specify indices
subarray = array1_2d[[0,1,2],[0,1,2]]
print(subarray)

[[1 2 3]
 [4 5 6]
 [7 8 9]]
[1 5 9]


In [8]:
# boolean indexing
# subsetting the array based on the given condition
print([subarray > 4])
subarray[subarray>4]

[array([False,  True,  True], dtype=bool)]


array([5, 9])

In [9]:
# creating standard arrays using builtin functions

array_zeros = np.zeros((2,2))

array1_zeros = np.zeros((2,2),dtype='int64')

# creating array of ones
array_ones = np.ones((3,5),dtype='int64')

# creating array with constant values
array_constantvalue_2 = np.full(array_ones.shape, 2)

# creating array with constant values
array_constantvalue_3 = np.full_like(array_ones, 3)

# identity matrix
i_matrix = np.eye(5)

print (array_zeros, "\n", array1_zeros, "\n", array_ones, "\n"
       , array_constantvalue_2, "\n", array_constantvalue_3, "\n", i_matrix)

[[ 0.  0.]
 [ 0.  0.]] 
 [[0 0]
 [0 0]] 
 [[1 1 1 1 1]
 [1 1 1 1 1]
 [1 1 1 1 1]] 
 [[2 2 2 2 2]
 [2 2 2 2 2]
 [2 2 2 2 2]] 
 [[3 3 3 3 3]
 [3 3 3 3 3]
 [3 3 3 3 3]] 
 [[ 1.  0.  0.  0.  0.]
 [ 0.  1.  0.  0.  0.]
 [ 0.  0.  1.  0.  0.]
 [ 0.  0.  0.  1.  0.]
 [ 0.  0.  0.  0.  1.]]


In [10]:
# creating an array with random numbers
array_random = np.random.random((5,5))
print(array_random)

[[ 0.18987992  0.05281291  0.38546632  0.39751724  0.53867534]
 [ 0.23382933  0.53777605  0.26527885  0.58409977  0.05441301]
 [ 0.06665216  0.64614621  0.96538645  0.28617158  0.38090234]
 [ 0.32646778  0.01200539  0.32878002  0.97399995  0.364183  ]
 [ 0.73932982  0.92823681  0.94851038  0.55811902  0.85602916]]


In [11]:
# changing the size and shape of the array
array_t = np.transpose(array_2d)
print(array_2d, "\n", array_t)
print(array_2d.shape, "=>", array_t.shape)

[['a' 'b' 'c']
 ['1' '2' '3']] 
 [['a' '1']
 ['b' '2']
 ['c' '3']]
(2, 3) => (3, 2)


In [12]:
array_reshape = np.reshape(array_random, [1,25])
print(array_reshape.shape)

(1, 25)


In [13]:
array_reshape = np.reshape(array_random, [25,1])
print(array_reshape.shape)

(25, 1)


In [14]:
# Mathematical operations can be performed element wise

array1 = np.random.randint(1,10,[5,5])
array2 = np.random.randint(1,17,[5,5])
print(array1, "\n", array2)

[[9 1 2 4 2]
 [1 6 6 8 7]
 [8 7 7 7 4]
 [5 6 6 9 1]
 [7 2 3 8 1]] 
 [[ 5  4  9 15  2]
 [ 3  6 16 12  9]
 [11  1  7 11 13]
 [ 1  6 12 11  7]
 [ 6 10  9 14 12]]


In [15]:
# element wise addition
print(array1 + array2)

# alternatively can be used as
print(np.add(array1, array2))
print(np.subtract(array1, array2))

[[14  5 11 19  4]
 [ 4 12 22 20 16]
 [19  8 14 18 17]
 [ 6 12 18 20  8]
 [13 12 12 22 13]]
[[14  5 11 19  4]
 [ 4 12 22 20 16]
 [19  8 14 18 17]
 [ 6 12 18 20  8]
 [13 12 12 22 13]]
[[  4  -3  -7 -11   0]
 [ -2   0 -10  -4  -2]
 [ -3   6   0  -4  -9]
 [  4   0  -6  -2  -6]
 [  1  -8  -6  -6 -11]]


In [16]:
# broadcasting - the ability to add to each row of matirces 
# arrays needs to be aligned in atleast on dimension

array3 = np.array([1,1,1,1,1])
print(array1)
print(array1 + array3)
print(array1 + array3.T)

[[9 1 2 4 2]
 [1 6 6 8 7]
 [8 7 7 7 4]
 [5 6 6 9 1]
 [7 2 3 8 1]]
[[10  2  3  5  3]
 [ 2  7  7  9  8]
 [ 9  8  8  8  5]
 [ 6  7  7 10  2]
 [ 8  3  4  9  2]]
[[10  2  3  5  3]
 [ 2  7  7  9  8]
 [ 9  8  8  8  5]
 [ 6  7  7 10  2]
 [ 8  3  4  9  2]]


In [17]:
# dot product or matrix multiplication
# condition to be satisfied only when the dimensions of the arrays are in the following form: m x n => n x m
np.dot(array1, array2)

array([[ 86,  88, 177, 241, 105],
       [139, 164, 306, 339, 274],
       [169, 163, 353, 414, 267],
       [124, 126, 300, 326, 217],
       [ 88, 101, 221, 264, 139]])

In [18]:
# merge arrays or stacking them
# need to pass the tuple of arrays
np.vstack((array1, array2))

array([[ 9,  1,  2,  4,  2],
       [ 1,  6,  6,  8,  7],
       [ 8,  7,  7,  7,  4],
       [ 5,  6,  6,  9,  1],
       [ 7,  2,  3,  8,  1],
       [ 5,  4,  9, 15,  2],
       [ 3,  6, 16, 12,  9],
       [11,  1,  7, 11, 13],
       [ 1,  6, 12, 11,  7],
       [ 6, 10,  9, 14, 12]])

In [19]:
np.hstack((array1, array2))

array([[ 9,  1,  2,  4,  2,  5,  4,  9, 15,  2],
       [ 1,  6,  6,  8,  7,  3,  6, 16, 12,  9],
       [ 8,  7,  7,  7,  4, 11,  1,  7, 11, 13],
       [ 5,  6,  6,  9,  1,  1,  6, 12, 11,  7],
       [ 7,  2,  3,  8,  1,  6, 10,  9, 14, 12]])

In [22]:
# scipy modules for mathematical computations
# spatial module can find the distance between 2 points eg, eucledian, cosine, correlation etc
# pdist - pairwise distnace between the rows in a numpy array

from scipy.spatial.distance import cosine, pdist, correlation, squareform
array1 = np.random.randint(1,5, [1,3])
array2 = np.random.randint(1,7, [1,3])

correlation(array1, array2)

1.9819805060619657

In [21]:
points = np.vstack((array1, array2))
points_eu = squareform(pdist(points, 'euclidean'))
points_cos = squareform(pdist(points, 'cosine'))

print(points_eu, "\n", points_cos)

[[ 0.          2.23606798]
 [ 2.23606798  0.        ]] 
 [[ 0.         0.0741799]
 [ 0.0741799  0.       ]]
