### Numpy

- Fast
    - complied using C, C++, Fortran
    - vectorized code
- Easy to perform mathematical operation (Linear Algebra)
- Express data as Matrix

#### 1. Matrix
- 1.1) Generate matrix: np.array, np.zeros, np.ones, np.arange
- 1.2) Shape
- 1.3) Indexing

#### 2. Linspace / Logspace
- Linspace: returns the value of location or interval which was linearly divided
- Logspace: returns the value of location or interval which was divided on a log scale

#### 3. Random
- 3.1) rand : generate random value with uniform distribution
- 3.2) randn :  generate random value with normal distribution
- 3.3) randint : generate random integer value with uniform distribution
- 3.4) suffle :  suffle the data of matrix
- 3.5) choice : select the data with specific probability

#### 4. Transpose / Concatenation

#### 5.  Add / Delete

#### 6. Statistics
- 6.1) Basic stats
- 6.2) Variance
- 6.3) Covariance
- 6.4) Correlation Coefficient

In [1]:
import numpy as np

In [2]:
# 1. Array
# np.array(object, dtype = None)

array = np.array([100, 200, 300])
print("type: ",type(array))
print(array)

intArray = np.array([1,2,3], dtype = int)
print(intArray)

floatArray = np.array([4,5,6], dtype = float)
print(floatArray)

complexArray = np.array([7,8,9], dtype = complex)
print(complexArray)

type:  <class 'numpy.ndarray'>
[100 200 300]
[1 2 3]
[4. 5. 6.]
[7.+0.j 8.+0.j 9.+0.j]


In [3]:
# np.zeros(size of matrix)

z1 = np.zeros((2,3)) # row = 2, col = 3
print(z1)

z2 = np.zeros(4)     # row = 1, col = 4
print(z2)

z3 = np.ones(4)
print(z3)

[[0. 0. 0.]
 [0. 0. 0.]]
[0. 0. 0. 0.]
[1. 1. 1. 1.]


In [4]:
# arrange
# np.arange(start, end, step, dtype)

ar1 = np.arange(5) 
print(ar1)

ar2 = np.arange(1,10,2)
print(ar2)


[0 1 2 3 4]
[1 3 5 7 9]


In [5]:
# other function

a1 = np.arange(12)
print(a1)
# ndarray.ndim -> number of dimension
print("ndim: ",a1.ndim)

[ 0  1  2  3  4  5  6  7  8  9 10 11]
ndim:  1


In [6]:
# ndarray.shape -> (row, col)
# ndarray.reshape(row, col) -> new array

a2 = a1.reshape(3,4)
print(a2)
print(a2.shape)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
(3, 4)


In [7]:
# indexing 
ls = np.array([[1,2,3], [4,5,6], [7,8,9]])

# select data
print(ls[0])
print(ls[0][1])
print(ls[0,1])

[1 2 3]
2
2


In [8]:
# modify data
ls[0][1] = 99
print(ls)
# broadcasting
ls[1] = 100
print(ls)

[[ 1 99  3]
 [ 4  5  6]
 [ 7  8  9]]
[[  1  99   3]
 [100 100 100]
 [  7   8   9]]


In [9]:
# slicing
ls2 = ls[1:]
print(ls2)

ls3 = ls[...,1:]
print(ls3)

[[100 100 100]
 [  7   8   9]]
[[ 99   3]
 [100 100]
 [  8   9]]


In [10]:
condition = ls%2 == 0
condition

array([[False, False, False],
       [ True,  True,  True],
       [False,  True, False]])

In [11]:
ls[condition] = 999
ls

array([[  1,  99,   3],
       [999, 999, 999],
       [  7, 999,   9]])

In [12]:
# 2. Linspace

np.linspace(0, 100, 5) # linearly divide in 5 sections

array([  0.,  25.,  50.,  75., 100.])

In [13]:
# Logspace
# log(x1) = start,..... , log(x) = end

np.logspace(2,4,3)
# log(x1) = 2 -> x1 = 100
# log(x2) = 3 -> x2 = 1000
# log(x3) = 4 -> x3 = 10000

array([  100.,  1000., 10000.])

In [16]:
# Let annual salary of n year old sn
# 1) s20 = $100,000 and s50 = $1,000,000
# 2) there are two cases: annual salary increases linearly and exponentially.
# 3) print out s30 and s40 respectively


linear = np.linspace(100000, 1000000, 4)
print("linspace: ",linear[1], linear[2])

logarithm = np.logspace(np.log10(100000), np.log(1000000), 4)
print("logspace: ",logarithm[1], logarithm[2])

linspace:  400000.0 700000.0
logspace:  86796761.08715706 75336777352.2102


In [17]:
# 3) Random

In [20]:
# 3.1) rand: Random values in a given shape.
np.random.rand(10)

array([0.20976093, 0.01100239, 0.80715667, 0.71563352, 0.91365517,
       0.91263237, 0.7288488 , 0.8618104 , 0.01084628, 0.55017416])

In [16]:
# 3.2) randn: Return a sample (or samples) from the "standard normal" distribution
np.random.randn(10)

array([ 0.83503717, -0.88921958,  0.32419434,  0.54435311,  1.48736103,
       -1.28357575,  0.68202543, -0.43902499,  0.11488764, -1.11415689])

In [21]:
# 3.3) randint(low, high=None, size=None, dtype=int)

np.random.randint(-3, 15, (2,3))

array([[14, 10, -2],
       [ 4, 11, -3]])

In [22]:
# 3.4) Suffle: Modify a sequence in-place by shuffling its contents.

prev = np.random.randint(10, size = (3,3))
print("Before shuffle")
print(prev)

print("\nAfter shuffle")
np.random.shuffle(prev)
print(prev)

Before shuffle
[[1 5 8]
 [4 6 6]
 [3 7 1]]

After shuffle
[[3 7 1]
 [1 5 8]
 [4 6 6]]


In [23]:
# 3.5) choice
# choice(a, n, p)
# a = candidate, n = number of num, p = probability
np.random.choice(4, 10, p =[0.1, 0.2, 0.3, 0.4])

array([3, 3, 2, 3, 2, 3, 3, 2, 3, 2])

In [24]:
# 4. Transpose
ls = np.arange(0, 90, 6)
ls = ls.reshape(3,5)
print("before transpose")
print(ls)

print("\nafter transpose")
ls2 = ls.T
print(ls2)

before transpose
[[ 0  6 12 18 24]
 [30 36 42 48 54]
 [60 66 72 78 84]]

after transpose
[[ 0 30 60]
 [ 6 36 66]
 [12 42 72]
 [18 48 78]
 [24 54 84]]


In [25]:
# Concatenation

matrix1 = np.random.randint(10, size = (3,3))
matrix2 = np.random.randint(10, size = (3,3))

print(matrix1)
print("\n",matrix2)

[[4 9 1]
 [5 6 8]
 [7 3 2]]

 [[1 0 7]
 [0 3 8]
 [0 4 7]]


In [26]:
np.concatenate((matrix1, matrix2))

array([[4, 9, 1],
       [5, 6, 8],
       [7, 3, 2],
       [1, 0, 7],
       [0, 3, 8],
       [0, 4, 7]])

In [27]:
np.concatenate((matrix1, matrix2), axis = 1)

array([[4, 9, 1, 1, 0, 7],
       [5, 6, 8, 0, 3, 8],
       [7, 3, 2, 0, 4, 7]])

In [28]:
# version 2

# column concatenate
print(np.c_[np.array([1,2,3]), np.array([4,5,6])])

# row concatenate
print(np.r_[np.array([1,2,3]), np.array([4,5,6])])

[[1 4]
 [2 5]
 [3 6]]
[1 2 3 4 5 6]


In [29]:
# 5. Add / Delete

arr = np.arange(10,16)
arr = arr.reshape(2,3)
arr

array([[10, 11, 12],
       [13, 14, 15]])

In [90]:
np.append(arr, [16,17,18,19])

array([10, 11, 12, 13, 14, 15, 16, 17, 18, 19])

In [91]:
arr = np.arange(1,7)
arr = arr.reshape(2,3)

np.append(arr, [[7,8,9]], axis = 0)

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [93]:
arr = np.arange(1,7)
arr = arr.reshape(2,3)

np.append(arr, [[7,8,9],[10,11,12]], axis = 1)

array([[ 1,  2,  3,  7,  8,  9],
       [ 4,  5,  6, 10, 11, 12]])

In [95]:
# insert
arr = np.arange(1,7)
arr = arr.reshape(2,3)

np.insert(arr, 2, [7,8,9])

array([1, 2, 7, 8, 9, 3, 4, 5, 6])

In [96]:
arr = np.arange(1,7)
arr = arr.reshape(2,3)

np.insert(arr, 2, [7,8,9], axis = 0)

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [99]:
arr = np.arange(1,7)
arr = arr.reshape(2,3)

np.insert(arr, 2, [10, 11], axis = 1)

array([[ 1,  2, 10,  3],
       [ 4,  5, 11,  6]])

In [102]:
# Delete
arr = np.arange(1,7)
arr = arr.reshape(2,3)
print(arr)

# flattended
np.delete(arr,3)

[[1 2 3]
 [4 5 6]]


array([1, 2, 3, 4, 6])

In [105]:
arr = np.arange(1,7)
arr = arr.reshape(2,3)
print(arr)

np.delete(arr, 1, axis = 1)

[[1 2 3]
 [4 5 6]]


array([[1, 3],
       [4, 6]])

In [115]:
# 6 Statistics

# 6.1) Basic stats: Average, Median, Standard Deviation

data = np.arange(12)
data = data.reshape(3,4)
print(data, "\n")
print("Average: ",np.mean(data))
print("Median: ",np.median(data))
print("Standard Deciation: ",np.std(data))

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]] 

Average:  5.5
Median:  5.5
Standard Deciation:  3.452052529534663


- 6.2) Variance
- the average of the squared differences from the mean
     

In [130]:
%%time
print("Numpy function: ",np.var(data))

Numpy function:  89.96000000000001
CPU times: user 1.03 ms, sys: 160 µs, total: 1.19 ms
Wall time: 1.13 ms


- 6.3) Covariance
- used to determine the relationship between the movements of two random variables

- Advantage: able to show the relationship 
- Disadvantage
    - unable to show the strenght of relationship
    - larger the value of sample, the more difference the result

In [139]:
data1 = np.array([10, 50, 22, 99, 34])
data2 = np.array([77, 64, 22, 122, 4])
np.cov(data1, data2)[0,1]

1060.25

In [144]:
# Disadvantage

data1 = data1 * 100
data2 = data2 ** 2

np.cov(data1, data2)[0,1]

2.83958550375e+17

- 6.4) Correlation coefficient
- statistical measure of the strength of the relationship btw the relative movements of two variables
- range: (-1 ~ 1), close to 0 : less related
        


In [146]:
data1 = np.array([10, 3, 22, 99, 34])
data2 = np.array([77, 64, 22, 122, 84])
data3 = np.array([100, 91, 83, 68, 46])

np.corrcoef(data1, data2)[0,1], np.corrcoef(data1, data3)[0,1],np.corrcoef(data2, data3)[0,1]

(0.7173617704561531, -0.5034101603009543, -0.36680713446840096)