### Topics ###

![](2023-08-04-20-39-03.png)

In [4]:
import numpy as np
print(np.__version__)

a = np.array([1,2,4])
print(f'a: {a}')
print(f'shape: {a.shape}')
print(f'dtype: {a.dtype}')
print(f'no. of dim: {a.ndim}')
print(f'no. of elements: {a.size}')
print(f'size of each item: {a.itemsize}')

1.22.3
a: [1 2 4]
shape: (3,)
dtype: int32
no. of dim: 1
no. of elements: 3
size of each item: 4


## Arrays vs Lists ##

### Data Type: ###
* Lists: A list in Python is a collection of heterogeneous data types. This means you can have elements of different types (e.g., integers, strings, floats) within the same list.
* Arrays: In Python, arrays are implemented through the NumPy library. Arrays are homogeneous, meaning all elements in the array must have the same data type (e.g., int, float, etc.).

### Functionality and Operations: ###
* Lists: Python lists come with a wide range of built-in methods for manipulation, such as `append(), pop(), extend()`, etc. They are versatile and flexible but may not be as efficient for numerical computations.
* Arrays: Numpy arrays offer a variety of numerical operations that can be performed on the entire array efficiently due to their homogeneous nature. These operations include element-wise arithmetic, broadcasting, matrix multiplication, and more.

### Performance: ###
* Lists: Python lists are generally slower for numerical computations and large-scale data manipulation due to their dynamic nature and lack of vectorization.
* Arrays: Numpy arrays are optimized for numerical computations and are significantly faster for large datasets, especially when using vectorized operations.

### Memory Overhead: ###
* Lists: Python lists have more memory overhead compared to arrays due to additional information they store, such as the type of each element and the reference count.
* Arrays: Numpy arrays have less memory overhead because they store data more compactly in contiguous blocks of memory.

In [5]:
# DOT PRODUCT #
a = np.array([1,2,3])
b = np.array([4,9,8])

# method 1
dot = (a * b).sum()
print(dot)

# method 2
print(np.dot(a,b))

# method 3
dot1 = a @ b
print(dot1)

46
46
46


In [6]:
# MULTIDIMENSIONAL ARRAYS #

mArr = np.array([[1,3], [4,7]])
print(mArr)
print(mArr.shape)
print(mArr[0,1])

# retrieve all the rows in column 0
print(f'retrieve all the rows in column 0:\n {mArr[:,0]}')
# retrieve all the coulmn in row 0
print(f'retrieve all the coulmn in row 0:\n {mArr[0,:]}')

# transpose
print(f'transpose:\n{mArr.T}')

# determinant
print(f'determinant:\n{np.linalg.det(mArr)}')

# inverse
print(f'inverse:\n{np.linalg.inv(mArr)}')   # the array should be a square one

# bool indexing
a1 = np.array([[1,4], [3,2], [6,9], [5,0]])
bool_idx = a1>4
print(f'bool array: {bool_idx}')
print(f'value array: {a1[a1>4]}')

# modifying array based on certain condition
b1 = np.where(a1>2, a1, -1)
print(f'modified array: {b1}')

# fancy indexing
c1 = np.array([32,54,23,74,34,87])
c2 = np.argwhere(c1%2 == 0).flatten()    # index array  # .flatten() - suppresses the array to 1D
print(f'fancy indexing: {c1[c2]}')



[[1 3]
 [4 7]]
(2, 2)
3
retrieve all the rows in column 0:
 [1 4]
retrieve all the coulmn in row 0:
 [1 3]
transpose:
[[1 4]
 [3 7]]
determinant:
-4.999999999999999
inverse:
[[-1.4  0.6]
 [ 0.8 -0.2]]
bool array: [[False False]
 [False False]
 [ True  True]
 [ True False]]
value array: [6 9 5]
modified array: [[-1  4]
 [ 3 -1]
 [ 6  9]
 [ 5 -1]]
fancy indexing: [32 54 74 34]


In [7]:
# RESHAPING #

a2 = np.array([2,6,3,7,5,9])
a2.reshape(3,2)
#a2.reshape(2,4) # ERROR

array([[2, 6],
       [3, 7],
       [5, 9]])

In [10]:
# adding a dimension to the array (columnwise)
print('adding a dimension to the array (columnwise)')
b2 = a2[np.newaxis,:]
print(b2)
print(b2.shape)

# adding a dimension to the array
b2 = a2[:,np.newaxis]
print('adding a dimension to the array (rowwise)')
print(b2)
print(b2.shape)

adding a dimension to the array (columnwise)
[[2 6 3 7 5 9]]
(1, 6)
adding a dimension to the array (rowwise)
[[2]
 [6]
 [3]
 [7]
 [5]
 [9]]
(6, 1)


In [30]:
# CONCATENATION #

a3 = np.array([[1,3],[5,7]])
b3 = np.array([[9,11]])

# as it is
concatArr = np.concatenate((a3,b3))
print(concatArr)

# in diff axis
concatArr = np.concatenate((a3,b3.T), axis = 1)
print(concatArr)

# hstack
a4 = np.array([5,3,6,1,8])
b4 = np.array([4,8,4,7,2,3])
print('--- hstack ---')
hstackArr = np.hstack((a4, b4))
print(hstackArr)

# vstack
print('--- vstack ---')
vstackArr = np.vstack((a3,b3))
print(vstackArr)

[[ 1  3]
 [ 5  7]
 [ 9 11]]
[[ 1  3  9]
 [ 5  7 11]]
--- hstack ---
[5 3 6 1 8 4 8 4 7 2]
--- vstack ---
[[ 1  3]
 [ 5  7]
 [ 9 11]]


In [28]:
# BROADCASTING #

a5 = np.array([[2,4,6], [1,9,7], [2,4,6], [1,9,7]])
b5 = np.array([5,7,1])

broadcastedArr = a5 + b5
print(broadcastedArr)

[[ 7 11  7]
 [ 6 16  8]
 [ 7 11  7]
 [ 6 16  8]]


In [47]:
# FUNCTIONS AND AXIS

a6 = np.array([8,4,5,7,3,2,8])
b6 = np.array([5,7,4,2,7,8,1])

# SUM
print('\n--- SUM ---')
# axis = none
sum = np.sum((a6, b6), axis = None)
print(sum)
# axis = 0
sum = np.sum((a6, b6), axis = 0)
print(sum)
# axis = 1
sum = np.sum((a6, b6), axis = 1)
print(sum)

# MEAN
print('\n--- MEAN ---')
# axis = none
sum = np.mean((a6, b6), axis = None)
print(sum)
# axis = 0
sum = np.mean((a6, b6), axis = 0)
print(sum)
# axis = 1
sum = np.mean((a6, b6), axis = 1)
print(sum)

print('\n--- STD DEVIATION ---')
# axis = none
sum = np.std((a6, b6), axis = None)
print(sum)
# axis = 0
sum = np.std((a6, b6), axis = 0)
print(sum)
# axis = 1
sum = np.std((a6, b6), axis = 1)
print(sum)

print('\n--- VARIANCE ---')
# axis = none
sum = np.var((a6, b6), axis = None)
print(sum)
# axis = 0
sum = np.var((a6, b6), axis = 0)
print(sum)
# axis = 1
sum = np.var((a6, b6), axis = 1)
print(sum)


--- SUM ---
71
[13 11  9  9 10 10  9]
[37 34]

--- MEAN ---
5.071428571428571
[6.5 5.5 4.5 4.5 5.  5.  4.5]
[5.28571429 4.85714286]

--- STD DEVIATION ---
2.3743957340849517
[1.5 1.5 0.5 2.5 2.  3.  3.5]
[2.24971654 2.4743583 ]

--- VARIANCE ---
5.637755102040818
[ 2.25  2.25  0.25  6.25  4.    9.   12.25]
[5.06122449 6.12244898]


In [46]:
print('\n--- MINIMUM ---')
# axis = none
sum = np.min((a6, b6), axis = None)
print(sum)
# axis = 0
sum = np.min((a6, b6), axis = 0)
print(sum)
# axis = 1
sum = np.min((a6, b6), axis = 1)
print(sum)

print('\n--- MAXIMUM ---')
# axis = none
sum = np.max((a6, b6), axis = None)
print(sum)
# axis = 0
sum = np.max((a6, b6), axis = 0)
print(sum)
# axis = 1
sum = np.max((a6, b6), axis = 1)
print(sum)


--- MINIMUM ---
1
[5 4 4 2 3 2 1]
[2 1]

--- MAXIMUM ---
8
[8 7 5 7 7 8 8]
[8 8]


In [51]:
# DATATTYPES #
a7 = np.array([2,4,6])
print(a7.dtype)
b7 = np.array([3,6,4], dtype=np.float64)
print(b7.dtype)

int32
float64


In [62]:
# GENERATING ARRAYS #
a8 = np.zeros((3,4))
print(a8)

a8 = np.ones((3,4))
print(a8)

a8 = np.full((3,4), 7.2)
print(a8)

a8 = np.eye(3)
print(a8)

a8 = np.arange(5)
print(a8)

a8 = np.linspace(0,20,5) # divides 0 to 20 into 5 elements
print(a8)

[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]
[[1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]]
[[7.2 7.2 7.2 7.2]
 [7.2 7.2 7.2 7.2]
 [7.2 7.2 7.2 7.2]]
[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]
[0 1 2 3 4]
[ 0.  5. 10. 15. 20.]


In [63]:
# RANDOM NUMBERS #
a = np.random.random((3,2)) # uniform 0-1 distribution
print(a)
b = np.random.randn(3,2) # normal/Gaussian distribution, mean 0 and unit variance
# no tuple as shape here! each dimension one argument
print(b)

R = np.random.randn(10000)
print(R.mean(), R.var(), R.std())

R = np.random.randn(10, 3)
print(R.mean()) # mean of whole array

# random integer, low,high,size; high is exclusive
R = np.random.randint(3,10,size=(3,3)) # if we only pass one parameter, then from 0-x
print(R)

# with integer between 0 up to certain value
c = np.random.choice(7, size=10)
print(c)
# random values from a array
d = np.random.choice([1,2,3,4], size=8)
print(d)

[[0.02970255 0.44655226]
 [0.55915055 0.42102371]
 [0.60418856 0.58185864]]
[[ 0.61365753 -0.75597651]
 [-0.56912048  0.15421942]
 [ 0.97273552 -1.33767759]]
-0.0021739215575731267 0.9981761088141822 0.9990876382050686
-0.013761477551262688
[[7 4 8]
 [5 4 9]
 [8 3 9]]
[1 2 6 4 4 0 6 0 4 1]
[2 1 3 3 2 1 2 3]


In [68]:
# eigenvalues
a = np.array([[1,2], [3,4]])
eigenvalues, eigenvectors = np.linalg.eig(a)
# Note: use eigh if your matrix is symmetric (faster)
print(eigenvalues)
print(eigenvectors) # column vectors
print(eigenvectors[:,0]) # column 0 corresponding to eigenvalue[0]

# verify: e-vec * e-val = A * e-vec
d = eigenvectors[:,0] * eigenvalues[0]
e = a @ eigenvectors[:, 0]
print(d, e)
print(d == e) # numerical issues

# correct way to compare matrix
print(np.allclose(d,e))

[-0.37228132  5.37228132]
[[-0.82456484 -0.41597356]
 [ 0.56576746 -0.90937671]]
[-0.82456484  0.56576746]
[ 0.30697009 -0.21062466] [ 0.30697009 -0.21062466]
[ True False]
True


### LINEAR EQUATIONS ###
![](2023-08-09-21-12-53.png)

![](2023-08-09-21-14-12.png)

In [69]:
# solve linear system
# x1+x2=2200
# 1.5 x1 + 4 x2 = 5050
# 2 equations and 2 unknowns
A = np.array([[1, 1], [1.5, 4]])
b = np.array([2200,5050])

# Ax = b <=> x = b/A

# But: inverse is slow and less accurate
x = np.linalg.inv(A).dot(b) # not recommended
print(x)
x = np.linalg.solve(A,b) # good
print(x)

[1500.  700.]
[1500.  700.]


In [71]:
# LOADING CSV #
# https://www.python-engineer.com/videos/how-to-load-data/

# 1) load with np.loadtxt()
data = np.loadtxt('spambase.csv', delimiter=",",dtype=np.float32, skiprows=1)
print(data.shape, data.dtype)

# 2) load with np.genfromtxt()
data = np.genfromtxt('spambase.csv', delimiter=",", dtype=np.float32)
print(data.shape)

(4601, 58) float32
(4602, 58)
