### Python Numpy Arrays: Extensive Documentation

In [1]:
import numpy as np

In [2]:
lst = [1,2,3,4]
arr = np.array(lst)

In [3]:
arr

array([1, 2, 3, 4])

In [4]:
print(arr)

[1 2 3 4]


In [5]:
type(arr)

numpy.ndarray

In [6]:
arr.shape

(4,)

In [7]:
arr.ndim

1

In [8]:
lst1 = [1,2,3,4,5]
lst2 = [2,3,4,5,6]
lst3 = [3,4,5,6,7]

In [9]:
arr1 = np.array([lst1, lst2, lst3])
arr1

array([[1, 2, 3, 4, 5],
       [2, 3, 4, 5, 6],
       [3, 4, 5, 6, 7]])

In [10]:
## look for the no. of [ to identify the dimnesion of the array

In [11]:
arr1.shape

(3, 5)

In [12]:
arr1.ndim

2

In [13]:
print(arr1)

[[1 2 3 4 5]
 [2 3 4 5 6]
 [3 4 5 6 7]]


## Array Indexing & Slicing

In [14]:
arr

array([1, 2, 3, 4])

In [15]:
arr[3]

4

In [16]:
arr[3] = 5
arr

array([1, 2, 3, 5])

In [17]:
arr[1:]

array([2, 3, 5])

In [18]:
arr1

array([[1, 2, 3, 4, 5],
       [2, 3, 4, 5, 6],
       [3, 4, 5, 6, 7]])

In [19]:
## index on left of comma represents rows and to the right indicate columns
arr1[:,1]

array([2, 3, 4])

In [20]:
arr1[1:,1:3]

array([[3, 4],
       [4, 5]])

In [21]:
arr1[:,3:]

array([[4, 5],
       [5, 6],
       [6, 7]])

In [22]:
arr1[:,3:].shape

(3, 2)

## Element insertion and deletion in NumPy Arrays

In [23]:
x = np.array([1,2,3,4])

In [24]:
np.append(x, 6)

array([1, 2, 3, 4, 6])

In [25]:
np.insert(x, 3, 9)

array([1, 2, 3, 9, 4])

In [26]:
np.delete(x, 2)

array([1, 2, 4])

## Sort, Filter and Search operations

In [27]:
ar_1d = np.array([6,2,9,5,4,1,7])

In [28]:
np.sort(ar_1d)

array([1, 2, 4, 5, 6, 7, 9])

In [29]:
ar_2d = np.array([[5,2,8,7],[9,2,3,7]])

In [30]:
np.sort(ar_2d)

array([[2, 5, 7, 8],
       [2, 3, 7, 9]])

In [31]:
np.where(ar_1d == 9)

(array([2], dtype=int64),)

In [32]:
np.where(arr % 2 == 0)

(array([1], dtype=int64),)

In [33]:
ar_1d[ar_1d>5]

array([6, 9, 7])

### Concatenation

In [34]:
np.concatenate((arr1[:1,:3], arr1[2:,2:]))

array([[1, 2, 3],
       [5, 6, 7]])

In [35]:
np.concatenate((arr1[:1,:3], arr1[2:,2:]), axis=0)

array([[1, 2, 3],
       [5, 6, 7]])

In [36]:
np.concatenate((arr1[:1,:3], arr1[2:,2:]), axis = 1)

array([[1, 2, 3, 5, 6, 7]])

In [37]:
a = np.array([[10,20],[30,40]])
b = np.array([[1,2],[3,4]])

In [38]:
np.concatenate((a,b))  ## --> same as np.concatenate((a,b), axis=0)

array([[10, 20],
       [30, 40],
       [ 1,  2],
       [ 3,  4]])

In [39]:
np.vstack((a, b))  ## --> vertical concatenation (same as axis=0)

array([[10, 20],
       [30, 40],
       [ 1,  2],
       [ 3,  4]])

In [40]:
np.concatenate((a,b), axis=1)

array([[10, 20,  1,  2],
       [30, 40,  3,  4]])

In [41]:
np.hstack((a,b))  ## --> horizontal concatenation (same as axis=1)

array([[10, 20,  1,  2],
       [30, 40,  3,  4]])

In [42]:
print(np.hstack((a, b)))

[[10 20  1  2]
 [30 40  3  4]]


### Aggregate functions in NumPy

In [43]:
x

array([1, 2, 3, 4])

In [44]:
np.sum(x)
np.min(x)
np.max(x)
np.size(x)
np.mean(x)
np.cumsum(x)
## this code will only give output of the last line only

array([ 1,  3,  6, 10])

In [45]:
## this code will give output of all lines present in the block
print(np.sum(x))
print(np.min(x))
print(np.max(x))
print(np.size(x))
print(np.mean(x))
print(np.cumsum(x))
print(np.cumprod(x))

10
1
4
4
2.5
[ 1  3  6 10]
[ 1  2  6 24]


In [46]:
y = np.array([3,6,3,1])

In [47]:
np.cumprod((x,y), axis = 1)

array([[ 1,  2,  6, 24],
       [ 3, 18, 54, 54]])

### Statistical functions

In [48]:
import statistics as stats

In [49]:
stat = np.array([320, 231, 540, 494, 190, 325, 270])

In [50]:
print(np.mean(stat))
print(np.median(stat))
print(stats.mode(stat))
print(np.std(stat))  ## --> standard deviation data ka spread batata hai
print(np.var(stat))  ## --> same as std**2

338.57142857142856
320.0
320
121.65977988150533
14801.102040816328


In [51]:
## Coefficient of Correlation
## Shows inter-dependency of 1 value with another

tobacco_consumption = [200, 350, 225, 190, 400, 300]
deaths = [30, 42, 32, 29, 50, 37]
print(np.corrcoef([tobacco_consumption, deaths]))

[[1.         0.98570065]
 [0.98570065 1.        ]]


##### +ve correlation shows if one quantity increases other also increases

In [52]:
product_price = [200, 170, 100, 210, 300, 230]
sales = [20, 25, 34, 22, 11, 17]
np.corrcoef(product_price, sales)

array([[ 1.        , -0.98691004],
       [-0.98691004,  1.        ]])

##### -ve correlation shows if one quantity increases other decreases

### -----------

### Broadcasting in NumPy

In [53]:
var1 = np.array([1,2,3])       ## --> 1x3
var2 = np.array([4,5,6])       ## --> 1x3
var3 = np.array([[1],[2],[3]]) ## --> 3x1
var4 = np.array([1,2,3,4])     ## --> 1x4 (broadcasting not possible)

In [54]:
print(var1+var2)
print(np.sum((var1, var2)))
print('\n', var1+var3)

[5 7 9]
21

 [[2 3 4]
 [3 4 5]
 [4 5 6]]


In [55]:
var5 = np.array([[3],[7]])         ## --> 2x1
var6 = np.array([[1,2,3],[4,5,6]]) ## --> 2x3 (broadcast possible)

In [56]:
print(var5.shape)
print(var6.shape)

(2, 1)
(2, 3)


In [57]:
(var5+var6)

array([[ 4,  5,  6],
       [11, 12, 13]])

### Iterating NumPy arrays

In [58]:
ar_1 = np.array([1,2,3,4])
ar_2 = np.array([[1,2],[3,4]])
ar_3 = np.array([[[1,2],[3,4]]])

In [59]:
print(ar_1.ndim)
print(ar_2.ndim)
print(ar_3.ndim)

1
2
3


In [60]:
for i in ar_1:
    print(i)

1
2
3
4


In [61]:
for i in ar_2:
    for j in i:
        print(j)

1
2
3
4


In [62]:
for i in ar_3:
    for j in i:
        for k in j:
            print(k)

1
2
3
4


In [63]:
for i in np.nditer(ar_3):
    print(i)

1
2
3
4


In [64]:
for i in np.ndenumerate(ar_3):
    print(i)

((0, 0, 0), 1)
((0, 0, 1), 2)
((0, 1, 0), 3)
((0, 1, 1), 4)


In [65]:
for i in np.ndenumerate(ar_2):
    print(i)

((0, 0), 1)
((0, 1), 2)
((1, 0), 3)
((1, 1), 4)


### Copy vs View in NumPy

In [66]:
array1 = np.array([1,2,3,4])
co = array1.copy()

# Creating copies is necessary when you need to ensure that the original data remains unchanged when you modify the new array.

In [67]:
array1[2] = 9

In [68]:
print(array1)
print(co)

[1 2 9 4]
[1 2 3 4]


In [69]:
array2 = np.array([5,6,7,8])
vi = array2.view()

# Views are beneficial because they avoid copying data, making operations faster and using less memory. 
# However, modifications to the view affect the original array, which is crucial to keep in mind during data manipulation.

In [70]:
array2[2] = 3

In [71]:
print(array2)
print(vi)

[5 6 3 8]
[5 6 3 8]


### Matrix Multiplication

In [72]:
mat1 = np.matrix([[1,2],[3,4]])
mat2 = np.matrix([[5,6],[7,8]])
print(type(mat1))

<class 'numpy.matrix'>


In [73]:
p = np.array([[1,2],[3,4]])
q = np.array([[5,6],[7,8]])
type(p)

numpy.ndarray

In [74]:
print(mat1)
print(p)

[[1 2]
 [3 4]]
[[1 2]
 [3 4]]


In [75]:
mat1 + mat2

matrix([[ 6,  8],
        [10, 12]])

In [76]:
p + q   ## --> result same as mat1 + mat2

array([[ 6,  8],
       [10, 12]])

In [77]:
# matrix multiplication
mat1 * mat2   

matrix([[19, 22],
        [43, 50]])

In [78]:
# normal element to element multiplication
p * q  ## --> result different than mat1 * mat2

array([[ 5, 12],
       [21, 32]])

In [79]:
# matrix like multiplication of arrays
np.dot(p, q)   ## --> result same as mat1 * mat2

array([[19, 22],
       [43, 50]])

In [80]:
# Matrix Functions in NumPy

# np.transpose(mat1)  or  mat1.T  --> transpose of mat1
# np.swapaxes(mat1, 0, 1)  --> same result as transpose
# np.linalg.inv(mat1)      --> inverse of mat1
# np.linalg.matrix_power(mat1, 2)  --> power of mat1
# np.linalg.det(mat1)      --> determinant of mat1

## -------------------------------------

# NumPy for EDA

In [81]:
arr

array([1, 2, 3, 5])

In [82]:
arr<=2

array([ True,  True, False, False])

In [83]:
arr[arr<=2]

array([1, 2])

In [84]:
arr[arr<2]

array([1])

In [85]:
arr1

array([[1, 2, 3, 4, 5],
       [2, 3, 4, 5, 6],
       [3, 4, 5, 6, 7]])

In [86]:
arr1.reshape(5, 3)

array([[1, 2, 3],
       [4, 5, 2],
       [3, 4, 5],
       [6, 3, 4],
       [5, 6, 7]])

#### Another method to create an Array

In [87]:
arr2 = np.arange(1, 11, 1)  ## np.arange(start, stop, step size)

In [88]:
arr2

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [89]:
arr2.reshape(1, 10)

array([[ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10]])

In [90]:
arr2.reshape(2,5)

array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10]])

In [91]:
arr2.reshape(10, 1)

array([[ 1],
       [ 2],
       [ 3],
       [ 4],
       [ 5],
       [ 6],
       [ 7],
       [ 8],
       [ 9],
       [10]])

In [92]:
## 3-dimensioanl array
arr2.reshape(1, 10, 1)

array([[[ 1],
        [ 2],
        [ 3],
        [ 4],
        [ 5],
        [ 6],
        [ 7],
        [ 8],
        [ 9],
        [10]]])

In [93]:
arr1

array([[1, 2, 3, 4, 5],
       [2, 3, 4, 5, 6],
       [3, 4, 5, 6, 7]])

In [94]:
arr1 * 2

array([[ 2,  4,  6,  8, 10],
       [ 4,  6,  8, 10, 12],
       [ 6,  8, 10, 12, 14]])

In [95]:
arr1 * arr1

array([[ 1,  4,  9, 16, 25],
       [ 4,  9, 16, 25, 36],
       [ 9, 16, 25, 36, 49]])

In [96]:
np.ones((3, 4))

array([[1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.]])

In [97]:
np.ones((3, 4), dtype = int)

array([[1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1]])

In [98]:
np.zeros((2,3), dtype = int)

array([[0, 0, 0],
       [0, 0, 0]])

### Random function

In [99]:
np.random.randint(10, 30)

26

In [100]:
arr3 = np.random.randint(10, 30, 5)

In [101]:
## np.random.randint(10, 30, size = 5, dtype = float)  ---> wrong
np.random.randint(10, 30, size=5).astype(float)     ## ---> correct

array([14., 29., 12., 13., 18.])

In [102]:
arr3.shape

(5,)

In [103]:
arr3.reshape(5, 1)

array([[17],
       [20],
       [19],
       [24],
       [24]])

In [104]:
## To generate random floats in the half-open interval [0.0, 1.0)
np.random.random_sample()

0.9603401577276368

In [105]:
## Normal/Gaussian ditribution (Mean = 0, Std. Deviation = 1)
np.random.randn(4, 5)

array([[-0.63491937,  0.98960443, -0.10333125,  0.3259723 , -0.78105999],
       [ 0.21975272, -1.57382472,  0.06865768,  0.26739419,  1.58514403],
       [ 1.45080913,  0.71019319,  0.56758942, -0.33391013,  0.25539348],
       [-2.16926531,  0.62545168, -2.83300031, -0.79105988,  0.45343845]])

In [106]:
np.random.rand(4, 5)

array([[0.3034166 , 0.40339465, 0.74527615, 0.13483701, 0.74883319],
       [0.7955167 , 0.00675517, 0.8430602 , 0.06564982, 0.20615624],
       [0.22522888, 0.04075725, 0.40015242, 0.36100975, 0.39025758],
       [0.51388704, 0.9140424 , 0.07375525, 0.86119322, 0.74776954]])

#### .randn() and .random_sample() both can be used interchangeably

In [107]:
np.random.random_sample((4, 5))

array([[0.86217117, 0.67589771, 0.81686343, 0.81338834, 0.38476733],
       [0.03712028, 0.94157471, 0.15797453, 0.51052826, 0.90577003],
       [0.82136879, 0.90333045, 0.08266492, 0.7473348 , 0.41754707],
       [0.8985955 , 0.97231748, 0.9834825 , 0.93519291, 0.00954598]])