## Data Analysis With Numpy
### (Numerical Python)

In [1]:
arr = [1, 2, 3]
print(arr)

[1, 2, 3]


In [2]:
print(arr*5)

[1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3]


In [3]:
# ndarray - main object of numpy
# all element has same data type inside ndarray
# faster mathematical operations

In [4]:
import numpy as np
arr1 = np.array(arr)
print(arr1)
arr1 = arr1*5
print(arr1)

[1 2 3]
[ 5 10 15]


In [5]:
arr2 = np.array([1, 1.9, 'gdgk',])
arr2

array(['1', '1.9', 'gdgk'], dtype='<U32')

## Ndarray

In [6]:
# 1D array
arr1 = np.array([2, 6, 7])
print(arr1)

# 2D array
arr2 = np.array([
    [1, 2, 3],  # row1
    [4, 5, 6]   # row2
])
print(arr2)

# 3D array
arr3 = np.array([
    [[1, 2, 3], [4, 5, 6]],  # first floor
    [[1, 2, 3], [4, 5, 6]],  # 2nd floor
    [[1, 2, 3], [4, 5, 6]],  # 3rd floor
])
print(arr3)


[2 6 7]
[[1 2 3]
 [4 5 6]]
[[[1 2 3]
  [4 5 6]]

 [[1 2 3]
  [4 5 6]]

 [[1 2 3]
  [4 5 6]]]


#### Array Attribute

In [7]:
# dimension
print(arr1.ndim)
print(arr2.ndim)
print(arr3.ndim)

# shape
print(arr1.shape)
print(arr2.shape)
print(arr3.shape)

# data type
print(arr1.dtype)
print(arr2.dtype)
print(arr3.dtype)

# size
print(arr1.size)
print(arr2.size)
print(arr3.size)

1
2
3
(3,)
(2, 3)
(3, 2, 3)
int64
int64
int64
3
6
18


#### Data type

In [8]:
arr = np.array([1,2,3.2])
# upcasted to floatData
print(arr.dtype)

arr = np.array([1,1.3,'hello'])
# upcasted to string
print(arr.dtype)

arr = np.array([1,1.3,'hello',True])
print(arr.dtype)

float64
<U32
<U32


### Selecting a data type for an array

In [9]:
arr = np.array([1,2,300], dtype = np.int16)
print(arr.dtype)
print(arr)

# alternative way
arr = arr.astype(np.int32)
print(arr.dtype)

# error
arr = np.array([1,1.2,'hello',True], dtype = np.float64)
print(arr.dtype)

int16
[  1   2 300]
int32


ValueError: could not convert string to float: 'hello'

## Ndarray creation from existing data

In [10]:
# list

lst = [10, 20, 30, 40, 40.5]
arr = np.array(lst, dtype=np.int32)
print(arr)
print(type(arr))
print(arr.dtype)

mixed_lst = [10, True, 'Hello']
arr = np.array(mixed_lst)
print(arr)
print(type(arr))
print(arr.dtype)

matrix = [
    [1, 2, 3],
    [4, 5, 6]
]
arr = np.array(matrix)
print(arr)
print(type(arr))
print(arr.dtype)

[10 20 30 40 40]
<class 'numpy.ndarray'>
int32
['10' 'True' 'Hello']
<class 'numpy.ndarray'>
<U21
[[1 2 3]
 [4 5 6]]
<class 'numpy.ndarray'>
int64


In [11]:
# tuple

tpl = (10, 20, 30)
arr = np.array(tpl, dtype=np.int32)
print(arr)
print(type(arr))
print(arr.dtype)

arr = arr.astype(np.int16)
print(arr)
print(type(arr))
print(arr.dtype)

print(arr.shape)
print(arr.ndim)

[10 20 30]
<class 'numpy.ndarray'>
int32
[10 20 30]
<class 'numpy.ndarray'>
int16
(3,)
1


In [12]:
# set -> directly pass make as a object

st = {1, 2, 3}
arr = np.array(st)
print(arr)
print(type(arr))
print(arr.dtype)

{1, 2, 3}
<class 'numpy.ndarray'>
object


In [13]:
arr = np.array(list(st))
print(arr)
print(type(arr))
print(arr.dtype)
print(arr.shape)

[1 2 3]
<class 'numpy.ndarray'>
int64
(3,)


In [14]:
# dictionary

dc = {'a': 10, 'b': 20, 'c': 30}

keys = dc.keys()
values = dc.values()
items = dc.items()

print(keys)
print(type(keys))

print(values)
print(type(values))

print(items)
print(type(items))

dict_keys(['a', 'b', 'c'])
<class 'dict_keys'>
dict_values([10, 20, 30])
<class 'dict_values'>
dict_items([('a', 10), ('b', 20), ('c', 30)])
<class 'dict_items'>


In [15]:
# keys to ndarray
arr = np.array(list(keys))
print(arr)
print(type(arr))
print(arr.dtype)
print(arr.shape)
print(arr.ndim)
print(arr.size)

['a' 'b' 'c']
<class 'numpy.ndarray'>
<U1
(3,)
1
3


In [16]:
# values to ndarray
arr = np.array(list(values))
print(arr)
print(type(arr))
print(arr.dtype)
print(arr.shape)
print(arr.ndim)
print(arr.size)

[10 20 30]
<class 'numpy.ndarray'>
int64
(3,)
1
3


In [17]:
# items to ndarray
arr = np.array(list(items))
print(arr)
print(type(arr))
print(arr.dtype)
print(arr.shape)
print(arr.ndim)
print(arr.size)

[['a' '10']
 ['b' '20']
 ['c' '30']]
<class 'numpy.ndarray'>
<U21
(3, 2)
2
6


## Creating ndarray from scratch

In [18]:
# np.zeros

arr = np.zeros(3)
print(arr)

arr = np.zeros(3, dtype=np.int16)
print(arr)

arr = np.zeros((2, 3), dtype=np.int8)
print(arr)
print(type(arr))
print(arr.dtype)
print(arr.shape)
print(arr.ndim)
print(arr.size)

arr = np.zeros_like(arr3)
print(arr)
print(arr.shape)

[0. 0. 0.]
[0 0 0]
[[0 0 0]
 [0 0 0]]
<class 'numpy.ndarray'>
int8
(2, 3)
2
6
[[[0 0 0]
  [0 0 0]]

 [[0 0 0]
  [0 0 0]]

 [[0 0 0]
  [0 0 0]]]
(3, 2, 3)


In [19]:
# np.ones

arr = np.ones((2, 3), dtype=np.int8)
print(arr)
print(type(arr))
print(arr.dtype)
print(arr.shape)
print(arr.ndim)
print(arr.size)

arr = np.ones_like(arr3)
print(arr)
print(arr.shape)

[[1 1 1]
 [1 1 1]]
<class 'numpy.ndarray'>
int8
(2, 3)
2
6
[[[1 1 1]
  [1 1 1]]

 [[1 1 1]
  [1 1 1]]

 [[1 1 1]
  [1 1 1]]]
(3, 2, 3)


In [20]:
# np.empty -> random value

arr = np.empty((2, 3), dtype=np.int8)
print(arr)
print(type(arr))
print(arr.dtype)
print(arr.shape)
print(arr.ndim)
print(arr.size)

arr = np.empty_like(arr3)
print(arr)
print(arr.shape)

[[1 1 1]
 [1 1 1]]
<class 'numpy.ndarray'>
int8
(2, 3)
2
6
[[[1 1 1]
  [1 1 1]]

 [[1 1 1]
  [1 1 1]]

 [[1 1 1]
  [1 1 1]]]
(3, 2, 3)


In [21]:
# np.full

arr = np.full((2, 3), np.inf)
print(arr)
print(type(arr))
print(arr.dtype)
print(arr.shape)
print(arr.ndim)
print(arr.size)

arr = np.full_like(arr3, 20, dtype=np.float32)
print(arr)
print(arr.shape)

[[inf inf inf]
 [inf inf inf]]
<class 'numpy.ndarray'>
float64
(2, 3)
2
6
[[[20. 20. 20.]
  [20. 20. 20.]]

 [[20. 20. 20.]
  [20. 20. 20.]]

 [[20. 20. 20.]
  [20. 20. 20.]]]
(3, 2, 3)


## Array creation with random values

In [22]:
# np.random.rand(shape) : 0->1

arr = np.random.rand(2, 3)
print(arr)
print(type(arr))
print(arr.dtype)
print(arr.shape)
print(arr.ndim)
print(arr.size)

[[0.11922713 0.2641116  0.09961173]
 [0.98515186 0.91217977 0.62306934]]
<class 'numpy.ndarray'>
float64
(2, 3)
2
6


In [23]:
# np.random.randint(start, end, shape)

arr = np.random.randint(1, 10, (2, 3))
print(arr)
print(type(arr))
print(arr.dtype)
print(arr.shape)
print(arr.ndim)
print(arr.size)

[[5 5 1]
 [2 7 9]]
<class 'numpy.ndarray'>
int64
(2, 3)
2
6


In [24]:
# np.random.uniform(start, end, shape)

arr = np.random.uniform(1, 10, (2, 3))
print(arr)
print(type(arr))
print(arr.dtype)
print(arr.shape)
print(arr.ndim)
print(arr.size)

[[3.59383549 2.72139036 8.93121172]
 [8.01594871 9.76276509 3.39582791]]
<class 'numpy.ndarray'>
float64
(2, 3)
2
6


### Array creation with range functions

In [25]:
# np.arange(first, end, step) -> similar to range function

arr = np.arange(1, 10, 1)
print(arr)

mat = arr.reshape(3, 2)
print(mat)
print(type(mat))
print(mat.dtype)
print(mat.shape)
print(mat.ndim)
print(mat.size)

[1 2 3 4 5 6 7 8 9]


ValueError: cannot reshape array of size 9 into shape (3,2)

In [26]:
mat = arr.reshape(3, 3)
print(mat)
print(type(mat))
print(mat.dtype)
print(mat.shape)
print(mat.ndim)
print(mat.size)

[[1 2 3]
 [4 5 6]
 [7 8 9]]
<class 'numpy.ndarray'>
int64
(3, 3)
2
9


In [27]:
# np.linspace(start, end(included), how much)

arr = np.linspace(0, 4, 6)
print(arr)
print(type(arr))
print(arr.dtype)
print(arr.shape)
print(arr.ndim)
print(arr.size)

[0.  0.8 1.6 2.4 3.2 4. ]
<class 'numpy.ndarray'>
float64
(6,)
1
6


In [28]:
# np.logspace(start, end(included), how much)

arr = np.logspace(0, 4, 6)
print(arr)
print(type(arr))
print(arr.dtype)
print(arr.shape)
print(arr.ndim)
print(arr.size)

[1.00000000e+00 6.30957344e+00 3.98107171e+01 2.51188643e+02
 1.58489319e+03 1.00000000e+04]
<class 'numpy.ndarray'>
float64
(6,)
1
6


In [29]:
arr = np.logspace(0, 4, 6, base=2)
print(arr)
print(type(arr))
print(arr.dtype)
print(arr.shape)
print(arr.ndim)
print(arr.size)

[ 1.          1.74110113  3.03143313  5.27803164  9.18958684 16.        ]
<class 'numpy.ndarray'>
float64
(6,)
1
6


## Creating matrix for linear algebra

In [30]:
# diagonal matrix

diagonal_matrix = np.diag([1, 2, 3, 4])
print(diagonal_matrix)
print(diagonal_matrix.shape)

diagonal_matrix = np.diag([1, 2, 3, 4], 1)
print(diagonal_matrix)
print(diagonal_matrix.shape)

[[1 0 0 0]
 [0 2 0 0]
 [0 0 3 0]
 [0 0 0 4]]
(4, 4)
[[0 1 0 0 0]
 [0 0 2 0 0]
 [0 0 0 3 0]
 [0 0 0 0 4]
 [0 0 0 0 0]]
(5, 5)


In [31]:
# Identity matrix

identity_mat = np.eye(4)
print(identity_mat)
print(identity_mat.shape)

identity_mat = np.eye(4, 5)  # row, col
print(identity_mat)
print(identity_mat.shape)

identity_mat = np.eye(4, 5, -1)  # row, col, k
print(identity_mat)
print(identity_mat.shape)

[[1. 0. 0. 0.]
 [0. 1. 0. 0.]
 [0. 0. 1. 0.]
 [0. 0. 0. 1.]]
(4, 4)
[[1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0.]]
(4, 5)
[[0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0.]]
(4, 5)


## Indexing & Slicing

In [32]:
# indexing

print(arr1)
print(arr2)

arr1[2] = 100
print(arr1)

arr2[0, 1] = 200
print(arr2)

[2 6 7]
[[1 2 3]
 [4 5 6]]
[  2   6 100]
[[  1 200   3]
 [  4   5   6]]


In [33]:
# slicing

arr1 = np.array([1, 2, 3, 4, 5, 6, 7])
print(arr1)

mod_arr1 = arr1[0: 7: 2]  # start: end: step
print(arr1)
print(mod_arr1)

arr1[2] = 100
print(arr1)
print(mod_arr1)  # automaticly change 

arr1 = np.array([1, 2, 3, 4, 5, 6, 7])
mod_arr1 = arr1[0: 7: 2].copy()  # start: end: step
print(arr1)
print(mod_arr1)
arr1[2] = 100
print(mod_arr1)

[1 2 3 4 5 6 7]
[1 2 3 4 5 6 7]
[1 3 5 7]
[  1   2 100   4   5   6   7]
[  1 100   5   7]
[1 2 3 4 5 6 7]
[1 3 5 7]
[1 3 5 7]


In [34]:
# 2D slicing
# arr[row_start: row_end: step, col_start: col_end: step]  -> end(excluded)

print(arr2)

# getting a row
row_0 = arr2[0:1,]
print(row_0)

row_0 = arr2[:1,]
print(row_0)

arr2[0][1] = 2
print(row_0)

print(arr2)

# getting a column
col_0 = arr2[:, :1]
print(col_0)
col_1 = arr2[::, 1:2]
print(col_1)
col_1 = arr2[::, 1:]
print(col_1)

[[  1 200   3]
 [  4   5   6]]
[[  1 200   3]]
[[  1 200   3]]
[[1 2 3]]
[[1 2 3]
 [4 5 6]]
[[1]
 [4]]
[[2]
 [5]]
[[2 3]
 [5 6]]


In [35]:
# getting a portion

print(arr2)

portion = arr2[::, 0:2]
print(portion)

[[1 2 3]
 [4 5 6]]
[[1 2]
 [4 5]]


## Advanced Indexing

In [36]:
arr = np.array([10, 20, 30, 40])
print(arr)

values = arr[[0, 3, 1]]
print(values)

[10 20 30 40]
[10 40 20]


In [37]:
print(arr2)

print(arr2[[0, 1], [1, 2]])

# using something boolean
arr1 = np.array([1, 2, 3, 4, 5, 6])
print(arr1)
print(arr1 > 1)
print(arr2)
print(arr2[arr1 > 1])

[[1 2 3]
 [4 5 6]]
[2 6]
[1 2 3 4 5 6]
[False  True  True  True  True  True]
[[1 2 3]
 [4 5 6]]


IndexError: boolean index did not match indexed array along axis 0; size of axis is 2 but size of corresponding boolean axis is 6

In [38]:
# using something boolean
arr1 = np.array([[1, 2, 3], [4, 5, 6]])
print(arr1)
print(arr1 > 1)
print(arr2)
print(arr2[arr1 > 1])

[[1 2 3]
 [4 5 6]]
[[False  True  True]
 [ True  True  True]]
[[1 2 3]
 [4 5 6]]
[2 3 4 5 6]


In [39]:
print(arr2)

arr2[arr2 > 2] = 0
print(arr2)

[[1 2 3]
 [4 5 6]]
[[1 2 0]
 [0 0 0]]


### Iterating over arrays

In [40]:
arr3 = np.array([
    [
        [1, 2, 3],
        [4, 5, 6]
    ],
    [
        [7, 8, 9],
        [10, 11, 12]
    ]
])
print(arr3)

[[[ 1  2  3]
  [ 4  5  6]]

 [[ 7  8  9]
  [10 11 12]]]


In [41]:
# For a 3D array
for matrix in arr3:
    for row in matrix:
        for element in row:
            print(element)

1
2
3
4
5
6
7
8
9
10
11
12


In [42]:
for i in np.nditer(arr3):
    print(i)

1
2
3
4
5
6
7
8
9
10
11
12


### Practice Problems

1: You have a list of temperatures recorded in Celsius.
   
Your task is to:\
Create a NumPy array from the given list.\
Convert all temperatures from Celsius to Fahrenheit. The formula is F = C* 1.8 + 32.\
Print the new Fahrenheit array.


In [43]:
given_temp = [20, 21, 23, 15, 10, 35, 40, 52, 28, 19]
temp = np.array(given_temp)
print(temp)

temp = temp*1.8+32
print(temp)

[20 21 23 15 10 35 40 52 28 19]
[ 68.   69.8  73.4  59.   50.   95.  104.  125.6  82.4  66.2]


2:  You have a Python list containing the lap times (in seconds) for an athlete.\
Task:

Convert the list into a NumPy array.\
Print the array's shape, size, and data type.\
The athlete wants to see their times in minutes. Create a new array by dividing all the lap times by 60.\
Print the new array of lap times in minutes.

In [44]:
lst = [10, 20, 30, 40, 50]
arr = np.array(lst)
print(arr.shape)
print(arr.size)
print(arr.dtype)

(5,)
5
int64


In [45]:
lap_time = np.array([2000, 78899, 88999, 4444, 443323, 23444])  # given in seconds
lap_time = lap_time//60  # convert to minutes
print(lap_time)

[  33 1314 1483   74 7388  390]


## Data Analysis With Numpy Part-02

In [46]:
arr = np.random.randint(1, 100, size=(10,5))
print(arr)

[[44 80 70 53 90]
 [41 88 16 41  2]
 [29 76 74 95 77]
 [12 25 71 76  1]
 [ 6 86 83 56 89]
 [29 18 59 47 69]
 [75 78 55 70 64]
 [81 41 41 63  5]
 [29 90 41 23 32]
 [50 83 89 38 75]]


In [51]:
arr.shape

# reshape()

b = arr.reshape(5, 10)
print(b)

[[44 80 70 53 90 41 88 16 41  2]
 [29 76 74 95 77 12 25 71 76  1]
 [ 6 86 83 56 89 29 18 59 47 69]
 [75 78 55 70 64 81 41 41 63  5]
 [29 90 41 23 32 50 83 89 38 75]]


In [57]:
# flattening
print(b)

flatten = b.flatten()  # default by row-wise
print(flatten)
print(flatten.ndim)

col_wise_flatten = np.ravel(b, order='C')  # row major
print(col_wise_flatten)

col_wise_flatten = np.ravel(b, order='F')  # col major
print(col_wise_flatten)

[[44 80 70 53 90 41 88 16 41  2]
 [29 76 74 95 77 12 25 71 76  1]
 [ 6 86 83 56 89 29 18 59 47 69]
 [75 78 55 70 64 81 41 41 63  5]
 [29 90 41 23 32 50 83 89 38 75]]
[44 80 70 53 90 41 88 16 41  2 29 76 74 95 77 12 25 71 76  1  6 86 83 56
 89 29 18 59 47 69 75 78 55 70 64 81 41 41 63  5 29 90 41 23 32 50 83 89
 38 75]
1
[44 80 70 53 90 41 88 16 41  2 29 76 74 95 77 12 25 71 76  1  6 86 83 56
 89 29 18 59 47 69 75 78 55 70 64 81 41 41 63  5 29 90 41 23 32 50 83 89
 38 75]
[44 29  6 75 29 80 76 86 78 90 70 74 83 55 41 53 95 56 70 23 90 77 89 64
 32 41 12 29 81 50 88 25 18 41 83 16 71 59 41 89 41 76 47 63 38  2  1 69
  5 75]


In [102]:
a = np.random.randint(1, 10, size=(2, 3))
b = np.random.randint(20, 30, size=(2, 3))

print(a)
print(b)

[[4 1 2]
 [2 2 8]]
[[29 21 27]
 [29 29 20]]


## Concatenate

In [104]:
con_row = np.concatenate((a, b), axis=0)  # row-wise
print(con_row)

con_col = np.concatenate((a, b), axis=1)  # col-wise
print(con_col)

[[ 4  1  2]
 [ 2  2  8]
 [29 21 27]
 [29 29 20]]
[[ 4  1  2 29 21 27]
 [ 2  2  8 29 29 20]]


In [107]:
# error

# column must be same when concatenate by row
# row must be same when concatenate by column

x = np.random.randint(1, 10, size=(2, 3))
y = np.random.randint(20, 30, size=(2, 4))

print(x)
print(y)

r2 = np.concatenate((x, y), axis=1)
print(r2)

r1 = np.concatenate((x, y), axis=0)
print(r1)

[[3 5 9]
 [6 3 3]]
[[23 29 20 26]
 [28 20 25 23]]
[[ 3  5  9 23 29 20 26]
 [ 6  3  3 28 20 25 23]]


ValueError: all the input array dimensions except for the concatenation axis must match exactly, but along dimension 1, the array at index 0 has size 3 and the array at index 1 has size 4

### Transpose

In [111]:
mat = np.array([
    [1, 2, 3],
    [4, 5, 6]
])
print(mat)

transpose = mat.T
print(transpose)

[[1 2 3]
 [4 5 6]]
[[1 4]
 [2 5]
 [3 6]]


### Array spliting

In [116]:
a = np.random.randint(1, 10, size=(10,))
print(a)

splitted_array = np.array_split(a, 3)
print(splitted_array)

# split -> equal division
b = np.random.randint(1, 10, size=(9,))
splitted_array = np.split(b, 3)
print(splitted_array)

splitted_array = np.split(a, 3)
print(splitted_array)


[4 2 6 9 5 2 3 8 3 8]
[array([4, 2, 6, 9]), array([5, 2, 3]), array([8, 3, 8])]
[array([8, 8, 2]), array([6, 7, 3]), array([3, 9, 5])]


ValueError: array split does not result in an equal division

### Arithmetic operator & mathematical functions

In [119]:
# faster for vectorization 

x = np.array([10, 8, 30, 100])
y = np.array([2, 3, 4, 5])

print(x)
print(y)

add = np.add(x, y)
print(add)

sub = x - y
print(sub)

mul = x*y
print(mul)

div = x/y
print(div)

remainder = x % y
print(remainder)

[ 10   8  30 100]
[2 3 4 5]
[ 12  11  34 105]
[ 8  5 26 95]
[ 20  24 120 500]
[ 5.          2.66666667  7.5        20.        ]
[0 2 2 0]


In [123]:
# mathematical function

# trigonometry
# sin, cos, tan takes values in radian
print(x)

sin_val = np.sin(x)
print(sin_val)

cos_val = np.cos(x)
print(cos_val)

deg_conversion = np.rad2deg(sin_val)
print(deg_conversion)

[ 10   8  30 100]
[-0.54402111  0.98935825 -0.98803162 -0.50636564]
[-0.83907153 -0.14550003  0.15425145  0.86231887]
[-31.17011362  56.68605196 -56.61004209 -29.01261413]


In [127]:
# logarithmic

print(x)
base10_log_val = np.log10(x)
print(base10_log_val)

base2_log_val = np.log2(x)  # how much divide by 2
print(base2_log_val)

# sqrt
sqrt = np.sqrt(x)
print(sqrt)

[ 10   8  30 100]
[1.         0.90308999 1.47712125 2.        ]
[3.32192809 3.         4.9068906  6.64385619]
[ 3.16227766  2.82842712  5.47722558 10.        ]


In [129]:
# different
print(x)

s = np.sum(x)
# prefix sum
cum_sum = np.cumulative_sum(x)
print(cum_sum)

[ 10   8  30 100]
[ 10  18  48 148]


In [133]:
# broadcasting

x = np.array([10, 8, 16, 100])
result = x + 2
print(x)
print(result)

mat = np.array([
    [10, 20, 30],
    [30, 40, 50]
])

result = mat + 2
print(mat)
print(result)

vector = np.array([1, 2, 3])
result = mat + vector
print(vector)
print(result)

vector = np.array([1, 2, 3, 4])
result = mat + vector  # size of a vector must be same to column size of a matrix 

print(result)

[ 10   8  16 100]
[ 12  10  18 102]
[[10 20 30]
 [30 40 50]]
[[12 22 32]
 [32 42 52]]
[1 2 3]
[[11 22 33]
 [31 42 53]]


ValueError: operands could not be broadcast together with shapes (2,3) (4,) 

### Logical function

In [141]:
# comparison >, <, >=, <=, ==, !=

x = np.array([10, 8, 16, 100])
y = np.array([2, 3, 16, 5])

print(x)
print(y)

greater_than = x > y
print(greater_than)

equal = x == y
print(equal)

# all()
print(np.all(greater_than))

# any
print(np.any(equal))

[ 10   8  16 100]
[ 2  3 16  5]
[ True  True False  True]
[False False  True False]
False
True


### Sorting

In [145]:
# inplace
x = np.array([10, 8, 16, 100])
z = x.copy()
print(z)

z.sort()
print(x)
print(z)

[ 10   8  16 100]
[ 10   8  16 100]
[  8  10  16 100]


In [146]:
# copy sorting
print(x)
sort_arr = np.sort(x)
print(sort_arr)
print(x)

[ 10   8  16 100]
[  8  10  16 100]
[ 10   8  16 100]


In [148]:
# 2D array sorting
mat = np.array([[10, 3, 5], [8, 4, 9]])

print(mat)
hor_sort = np.sort(mat, axis=1)  # horizontal axis = 1
print(hor_sort)

vert_sort = np.sort(mat, axis=0)
print(vert_sort)

[[10  3  5]
 [ 8  4  9]]
[[ 3  5 10]
 [ 4  8  9]]
[[ 8  3  5]
 [10  4  9]]


### Searching

In [150]:
print(x)

# np.where(condition, x, y) if true then x otherwise replace with y
index = np.where(x == 8)  # return index
print(index)

arr = np.where(x > 8, x, 0)  # return array
print(arr)

[ 10   8  16 100]
(array([1]),)
[ 10   0  16 100]


In [151]:
print(mat)

arr = np.where(mat > 8, mat, 0)
print(arr)

[[10  3  5]
 [ 8  4  9]]
[[10  0  0]
 [ 0  0  9]]


In [164]:
# maximum, minimum value index
print(x)
maximum_value_index = np.argmax(x)
minimum_value_index = np.argmin(x)

print(maximum_value_index)
print(minimum_value_index)

[ 10   8  16 100]
3
1


### Counting

In [165]:
a = np.random.randint(1, 100, size=(100,))
print(a)

[94 66 12 99 45  8 92 58 82 45 33 21 65 59 27  8 79 89 46 35 95  9 99  2
 54 80 66  4 12 46 75 89 90 67 45 57 96 67 66 27 10  2 21 96 78  3 29 89
 62 15  9 66 57 78 51 84 30 78 88  2 74 65 39 84 14 44 95 53 68 42 45 27
 58 27 51 11 19 74 59 78 74  6 30 42 18 67 91 91 36 14  3 20 91 54 63 84
 58 12 20 68]


In [166]:
value_greater_than_60 = np.count_nonzero(a > 60)
print(value_greater_than_60)

43


In [169]:
unique_value, count = np.unique(a, return_counts=True)
print(unique_value)
print(count)
print(np.sum(count))

[ 2  3  4  6  8  9 10 11 12 14 15 18 19 20 21 27 29 30 33 35 36 39 42 44
 45 46 51 53 54 57 58 59 62 63 65 66 67 68 74 75 78 79 80 82 84 88 89 90
 91 92 94 95 96 99]
[3 2 1 1 2 2 1 1 3 2 1 1 1 2 2 4 1 2 1 1 1 1 2 1 4 2 2 1 2 2 3 2 1 1 2 4 3
 2 3 1 4 1 1 1 3 1 3 1 3 1 1 2 2 2]
100


## Statistical Function

In [176]:
# data input
data = np.genfromtxt('student_scores.csv', delimiter=',', skip_header=1)
print(data)
print(type(data))

[[78. 85. 82.]
 [56. 67. 72.]
 [89. 92. 88.]
 [45. 52. 58.]
 [70. 75. 80.]
 [92. 90. 91.]
 [61. 60. 62.]
 [55. 57. 54.]
 [88. 89. 87.]
 [74. 70. 76.]
 [66. 69. 68.]
 [80. 82. 81.]
 [59. 64. 60.]
 [73. 78. 74.]
 [91. 93. 90.]
 [68. 71. 69.]
 [77. 79. 78.]
 [84. 86. 85.]
 [62. 63. 65.]
 [95. 97. 96.]]
<class 'numpy.ndarray'>


In [178]:
# statistics
math_marks = data[::, :1]
print(math_marks.T)

max_math_marks = np.max(math_marks)
print(max_math_marks)

min_math_marks = np.min(math_marks)
print(min_math_marks)

avg_math_marks = np.mean(math_marks)
print(avg_math_marks)

median_math_marks = np.median(math_marks)
print(median_math_marks)

# standard deviation
std_math_marks = np.std(math_marks)
print(std_math_marks)

[[78. 56. 89. 45. 70. 92. 61. 55. 88. 74. 66. 80. 59. 73. 91. 68. 77. 84.
  62. 95.]]
95.0
45.0
73.15
73.5
13.788672887555204


In [180]:
# Correlation matrix

study_hours = np.array([2, 4, 5, 7, 8])
exam_scores = np.array([65, 75, 78, 88, 92])

data = np.array([study_hours, exam_scores])
print(data)

correlation = np.corrcoef(data)
print(correlation)

[[ 2  4  5  7  8]
 [65 75 78 88 92]]
[[1.         0.99859154]
 [0.99859154 1.        ]]


## Linear algebra

In [182]:
# dot product

A = np.array([
    [1, 2, 3],
    [4, 5, 6]
])

B = np.array([
    [7, 8],
    [9, 10],
    [11, 12]
])
dot_product = np.dot(A, B)  # col of A and row of B length must be same
print(A)
print(B)
print(dot_product)

# trace
print(np.trace(B))

[[1 2 3]
 [4 5 6]]
[[ 7  8]
 [ 9 10]
 [11 12]]
[[ 58  64]
 [139 154]]
17


In [184]:
# determinant

sq_mat = np.array([
    [1, 2, 3],
    [4, 5, 6],
    [7, 8, 9]
])
det_of_sq = np.linalg.det(sq_mat)
print(sq_mat)
print(det_of_sq)

rank_of_sq = np.linalg.matrix_rank(sq_mat)
print(rank_of_sq)

[[1 2 3]
 [4 5 6]
 [7 8 9]]
-9.51619735392994e-16
2
