# **Data Analysis with Python - 2  (25 Apr 22)**

## **Pre-Class**

### **NumPy Array Indexing and Selection**

In [68]:
import numpy as np
from scipy import stats

In [3]:
p = np.array([5, 7, 9])
p[0:2]

array([5, 7])

In [4]:
p[-1]

9

In [5]:
g = np.array([[6, 7, 8], [1, 2, 3], [9, 3, 2]])
g

array([[6, 7, 8],
       [1, 2, 3],
       [9, 3, 2]])

In [6]:
g[1, 2] # row 1, column 2

3

In [7]:
g[2, 0]

9

In [9]:
g[1:3, 1] # from first and second row, return column 1 values

array([2, 3])

In [10]:
g[0:2, 2 ] # from first two rows, return second column

array([8, 3])

In [11]:
g[-1] # returns last element

array([9, 3, 2])

In [12]:
g[-2]

array([1, 2, 3])

In [87]:
g[-1, 0:2]

array([9, 3])

In [13]:
g[-2, 1:3]

array([2, 3])

In [14]:
g[:, 1:]

array([[7, 8],
       [2, 3],
       [3, 2]])

In [16]:
g[:, -1]

array([8, 3, 2])

In [17]:
g[:, :]

array([[6, 7, 8],
       [1, 2, 3],
       [9, 3, 2]])

### **Iterate through arrays**

In [19]:
g = np.array([[6, 7, 8], [1, 2, 3], [9, 3, 2]])
g

array([[6, 7, 8],
       [1, 2, 3],
       [9, 3, 2]])

In [20]:
for row in g:
    print(row)

[6 7 8]
[1 2 3]
[9 3 2]


In [21]:
for cell in g.flat: # using flat method to get all cell values
    print(cell)

6
7
8
1
2
3
9
3
2


### **Stacking two arrays together**

In [23]:
s = np.arange(6).reshape(3,2)
t = np.arange(6,12).reshape(3,2)

In [24]:
s

array([[0, 1],
       [2, 3],
       [4, 5]])

In [25]:
t

array([[ 6,  7],
       [ 8,  9],
       [10, 11]])

In [26]:
np.vstack((s,t)) # verticle stacking, enter arrays as tuple (x, y)

array([[ 0,  1],
       [ 2,  3],
       [ 4,  5],
       [ 6,  7],
       [ 8,  9],
       [10, 11]])

In [28]:
np.hstack((s,t)) # horizontal stacking

array([[ 0,  1,  6,  7],
       [ 2,  3,  8,  9],
       [ 4,  5, 10, 11]])

In [29]:
v = np.arange(30).reshape(2, 15)
v

array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]])

In [31]:
result = np.hsplit(v, 5) # splitting array horizontally
result

[array([[ 0,  1,  2],
        [15, 16, 17]]),
 array([[ 3,  4,  5],
        [18, 19, 20]]),
 array([[ 6,  7,  8],
        [21, 22, 23]]),
 array([[ 9, 10, 11],
        [24, 25, 26]]),
 array([[12, 13, 14],
        [27, 28, 29]])]

In [32]:
result[0]

array([[ 0,  1,  2],
       [15, 16, 17]])

In [33]:
result[1]

array([[ 3,  4,  5],
       [18, 19, 20]])

In [34]:
result2 = np.vsplit(v, 2)
result2

[array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14]]),
 array([[15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]])]

In [35]:
result2[0]

array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14]])

In [37]:
result2[1]

array([[15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]])

In [36]:
np.hsplit(result2[0], 5)

[array([[0, 1, 2]]),
 array([[3, 4, 5]]),
 array([[6, 7, 8]]),
 array([[ 9, 10, 11]]),
 array([[12, 13, 14]])]

### **Indexing with Boolean Arrays**

In [38]:
a = np.arange(12).reshape(3,4)
a

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [40]:
b = a > 4 # creates another array with bool results
b

array([[False, False, False, False],
       [False,  True,  True,  True],
       [ True,  True,  True,  True]])

In [42]:
a[b] # array 'a' looked at array 'b' and returns value matching True. In sum, omits values with matching false. 

array([ 5,  6,  7,  8,  9, 10, 11])

In [44]:
a[a<6]  # extracting all elemenst less than 6

array([0, 1, 2, 3, 4, 5])

In [46]:
a[b] = -1 # insert -1 to all Tru values (a>4)
a

array([[ 0,  1,  2,  3],
       [ 4, -1, -1, -1],
       [-1, -1, -1, -1]])

### **NumPy Operations**

In [47]:
np.random.seed(17)
arr1 = np.random.randint(1,9,9).reshape(3,3)
arr2 = np.random.randint(1,9,9).reshape(3,3)

In [48]:
arr1

array([[8, 2, 8],
       [7, 7, 2],
       [1, 6, 7]])

In [49]:
arr2

array([[8, 5, 8],
       [5, 5, 8],
       [2, 7, 2]])

`addition` return the sum of arr1 and arr2 

In [51]:
arr1 + arr2

array([[16,  7, 16],
       [12, 12, 10],
       [ 3, 13,  9]])

`subtraction` return the difference of arr1 and arr2

In [52]:
arr1 - arr2

array([[ 0, -3,  0],
       [ 2,  2, -6],
       [-1, -1,  5]])

`division` return an array element from first array is divided by elements from second element

In [53]:
arr1/arr2

array([[1.        , 0.4       , 1.        ],
       [1.4       , 1.4       , 0.25      ],
       [0.5       , 0.85714286, 3.5       ]])

`multiplication` returns the product of arr1 and arr2

In [54]:
arr1*arr2

array([[64, 10, 64],
       [35, 35, 16],
       [ 2, 42, 14]])

`square root` returns the square root of the number in an array.

In [55]:
np.sqrt(arr1)

array([[2.82842712, 1.41421356, 2.82842712],
       [2.64575131, 2.64575131, 1.41421356],
       [1.        , 2.44948974, 2.64575131]])

`exponential` return an array with exponential of all elements of input array

In [56]:
np.exp(arr2)

array([[2980.95798704,  148.4131591 , 2980.95798704],
       [ 148.4131591 ,  148.4131591 , 2980.95798704],
       [   7.3890561 , 1096.63315843,    7.3890561 ]])

`max`  return the maximum along a given axis.

In [59]:
arr1

array([[8, 2, 8],
       [7, 7, 2],
       [1, 6, 7]])

In [57]:
arr1.max(axis=0)

array([8, 7, 8])

In [58]:
arr1.max(axis=1)

array([8, 7, 7])

`min`  return the minimum along a given axis.

In [60]:
arr2

array([[8, 5, 8],
       [5, 5, 8],
       [2, 7, 2]])

In [61]:
arr2.min(axis=0)

array([2, 5, 2])

In [62]:
arr2.min(axis=1)

array([5, 5, 2])

`mean` returns mean along specific axis

In [63]:
arr1.mean(axis=0)

array([5.33333333, 5.        , 5.66666667])

In [64]:
arr2.mean(axis=1)

array([7.        , 6.        , 3.66666667])

`median` computes the median along the specified axis

In [65]:
np.median(arr1, axis=0)

array([7., 6., 7.])

`std`  returns the standard deviation of specific axis

In [66]:
arr1.std(axis=1)

array([2.82842712, 2.3570226 , 2.62466929])

`sum` returns sum of arr

In [67]:
arr1.sum(axis=0)

array([16, 15, 17])

`mod (remainder)` returns element-wise remainder of division.

In [69]:
stats.mode(arr1, axis=0)[0]

array([[1, 2, 2]])

`sin`  The sine of each element of x. This is a scalar if x is a scalar (2pi radian equals 360 degrees)

In [70]:
np.sin(arr1)

array([[ 0.98935825,  0.90929743,  0.98935825],
       [ 0.6569866 ,  0.6569866 ,  0.90929743],
       [ 0.84147098, -0.2794155 ,  0.6569866 ]])

## **In-Class (25 Apr 22)**