In [1]:
# numpy and scipy for scientific computations

# perform mathematical computations in a grid (matrix)
# numpy has many builtin functions to manipulate data

import numpy as np
import scipy

In [2]:
# Array examples:
# 1D array is similar to a list
array_1d = np.array([1,2,3,4,5,6])
print(array_1d)
print(type(array_1d))

[1 2 3 4 5 6]
<class 'numpy.ndarray'>


In [3]:
# 2D array
array_2d = np.array([['a','b','c'],[1,2,3]])
print(array_2d)

# type
print(type(array_2d))

[['a' 'b' 'c']
 ['1' '2' '3']]
<class 'numpy.ndarray'>


In [4]:
# shape - a tuple (no of rows, no of columns)
print(array_1d.shape)
print(array_2d.shape)

(6,)
(2, 3)


In [5]:
# indexing - array index starts with '0' similar to Lists
print(array_1d[0])

# incase of multi dimensional arrays, we can slice and dice to get any element we want
# it works like, array [i] [j]

# this will print the first row of the numpy array
print(array_2d[0])

# this will print the elements in the 0th row and 1st column
print(array_2d[0][1])

# this will print the elements in the 1st row and 2nd column
print(array_2d[1][2])

1
['a' 'b' 'c']
b
3


In [6]:
# similar to lists we can use : to get a subset of the numpy array
print(array_2d)

print(array_2d[1:])

print(array_2d[:, 1:])

print(array_2d[1:,2:])

[['a' 'b' 'c']
 ['1' '2' '3']]
[['1' '2' '3']]
[['b' 'c']
 ['2' '3']]
[['3']]


In [7]:
# integer arrays
array1_2d = np.array([[1,2,3],[4,5,6],[7,8,9]])
print(array1_2d)
# if we want to get elements [0,0] , [1,1], [2,2]
# instead of using there 3 separate index statements, we use an array of integers to specify indices
subarray = array1_2d[[0,1,2],[0,1,2]]
print(subarray)

[[1 2 3]
 [4 5 6]
 [7 8 9]]
[1 5 9]


In [8]:
# boolean indexing
# subsetting the array based on the given condition
print([subarray > 4])
subarray[subarray>4]

[array([False,  True,  True], dtype=bool)]


array([5, 9])

In [9]:
# creating standard arrays using builtin functions

array_zeros = np.zeros((2,2))

array1_zeros = np.zeros((2,2),dtype='int64')

# creating array of ones
array_ones = np.ones((3,5),dtype='int64')

# creating array with constant values
array_constantvalue_2 = np.full(array_ones.shape, 2)

# creating array with constant values
array_constantvalue_3 = np.full_like(array_ones, 3)

# identity matrix
i_matrix = np.eye(5)

print (array_zeros, "\n", array1_zeros, "\n", array_ones, "\n"
       , array_constantvalue_2, "\n", array_constantvalue_3, "\n", i_matrix)

[[ 0.  0.]
 [ 0.  0.]] 
 [[0 0]
 [0 0]] 
 [[1 1 1 1 1]
 [1 1 1 1 1]
 [1 1 1 1 1]] 
 [[2 2 2 2 2]
 [2 2 2 2 2]
 [2 2 2 2 2]] 
 [[3 3 3 3 3]
 [3 3 3 3 3]
 [3 3 3 3 3]] 
 [[ 1.  0.  0.  0.  0.]
 [ 0.  1.  0.  0.  0.]
 [ 0.  0.  1.  0.  0.]
 [ 0.  0.  0.  1.  0.]
 [ 0.  0.  0.  0.  1.]]


In [10]:
# creating an array with random numbers
array_random = np.random.random((5,5))
print(array_random)

array([[ 0.78550114,  0.8074743 ,  0.33712011,  0.07878866,  0.99486834],
       [ 0.69812488,  0.74716389,  0.73052002,  0.11337596,  0.05882218],
       [ 0.72172251,  0.89124892,  0.28828999,  0.39405262,  0.45046979],
       [ 0.75579718,  0.84014197,  0.19294201,  0.34553626,  0.42178699],
       [ 0.09073129,  0.74970314,  0.94062487,  0.35106124,  0.61508429]])

In [23]:
# changing the size and shape of the array
array_t = np.transpose(array_2d)
print(array_2d, "\n", array_t)
print(array_2d.shape, "=>", array_t.shape)

[['a' 'b' 'c']
 ['1' '2' '3']] 
 [['a' '1']
 ['b' '2']
 ['c' '3']]
(2, 3) => (3, 2)


In [25]:
array_reshape = np.reshape(array_random, [1,25])
print(array_reshape.shape)

(1, 25)


In [26]:
array_reshape = np.reshape(array_random, [25,1])
print(array_reshape.shape)

(25, 1)


In [30]:
# Mathematical operations can be performed element wise

array1 = np.random.randint(1,10,[5,5])
array2 = np.random.randint(1,17,[5,5])
print(array1, "\n", array2)

[[4 8 8 3 2]
 [9 3 4 9 9]
 [3 6 5 4 1]
 [4 6 4 9 8]
 [5 4 4 9 6]] 
 [[ 5  2 16 15 15]
 [ 5  9 11  8 13]
 [ 2  3 14 12 16]
 [ 4 11  5 14  5]
 [10 15  9 13 12]]


In [35]:
# element wise addition
print(array1 + array2)

# alternatively can be used as
print(np.add(array1, array2))
print(np.subtract(array1, array2))

[[ 9 10 24 18 17]
 [14 12 15 17 22]
 [ 5  9 19 16 17]
 [ 8 17  9 23 13]
 [15 19 13 22 18]]
[[ 9 10 24 18 17]
 [14 12 15 17 22]
 [ 5  9 19 16 17]
 [ 8 17  9 23 13]
 [15 19 13 22 18]]
[[ -1   6  -8 -12 -13]
 [  4  -6  -7   1  -4]
 [  1   3  -9  -8 -15]
 [  0  -5  -1  -5   3]
 [ -5 -11  -5  -4  -6]]


In [46]:
# broadcasting - the ability to add to each row of matirces 
# arrays needs to be aligned in atleast on dimension

array3 = np.array([1,1,1,1,1])
print(array1)
print(array1 + array3)
print(array1 + array3.T)

[[4 8 8 3 2]
 [9 3 4 9 9]
 [3 6 5 4 1]
 [4 6 4 9 8]
 [5 4 4 9 6]]
[[ 5  9  9  4  3]
 [10  4  5 10 10]
 [ 4  7  6  5  2]
 [ 5  7  5 10  9]
 [ 6  5  5 10  7]]
[[ 5  9  9  4  3]
 [10  4  5 10 10]
 [ 4  7  6  5  2]
 [ 5  7  5 10  9]
 [ 6  5  5 10  7]]


In [47]:
# dot product or matrix multiplication
# condition to be satisfied only when the dimensions of the arrays are in the following form: m x n => n x m
np.dot(array1, array2)

array([[108, 167, 297, 288, 331],
       [194, 291, 359, 450, 391],
       [ 81, 134, 213, 222, 235],
       [174, 293, 303, 386, 343],
       [149, 247, 279, 359, 308]])

In [49]:
# merge arrays or stacking them
# need to pass the tuple of arrays
np.vstack((array1, array2))

array([[ 4,  8,  8,  3,  2],
       [ 9,  3,  4,  9,  9],
       [ 3,  6,  5,  4,  1],
       [ 4,  6,  4,  9,  8],
       [ 5,  4,  4,  9,  6],
       [ 5,  2, 16, 15, 15],
       [ 5,  9, 11,  8, 13],
       [ 2,  3, 14, 12, 16],
       [ 4, 11,  5, 14,  5],
       [10, 15,  9, 13, 12]])

In [50]:
np.hstack((array1, array2))

array([[ 4,  8,  8,  3,  2,  5,  2, 16, 15, 15],
       [ 9,  3,  4,  9,  9,  5,  9, 11,  8, 13],
       [ 3,  6,  5,  4,  1,  2,  3, 14, 12, 16],
       [ 4,  6,  4,  9,  8,  4, 11,  5, 14,  5],
       [ 5,  4,  4,  9,  6, 10, 15,  9, 13, 12]])

In [61]:
# scipy modules for mathematical computations
# spatial module can find the distance between 2 points eg, eucledian, cosine, correlation etc
# pdist - pairwise distnace between the rows in a numpy array

from scipy.spatial.distance import cosine, pdist, correlation, squareform
array1 = np.random.randint(1,5, [1,3])
array2 = np.random.randint(1,7, [1,3])

correlation(array1, array2)

0.50000000000000011

In [63]:
points = np.vstack((array1, array2))
points_eu = squareform(pdist(points, 'euclidean'))
points_cos = squareform(pdist(points, 'cosine'))

print(points_sq, "\n", points_cos)

[[ 0.          3.16227766]
 [ 3.16227766  0.        ]] 
 [[ 0.          0.08915993]
 [ 0.08915993  0.        ]]
