In [1]:
import numpy as np

###  Creating Arrays

In [2]:
a = np.array([1, 2, 3])
print(a)

[1 2 3]


In [3]:
b = np.array([(1, 2), 
              (3, 4)], 
             dtype=float)
print(b)

[[1. 2.]
 [3. 4.]]


In [4]:
c = np.array([[[1, 2, 3, 4], [5, 6, 7, 8]], 
              [[1, 2, 3, 4], [9, 10, 11, 12]], 
              [[13, 14, 15, 16], [17, 18, 19, 20]]], 
             dtype=np.float64)
print(c)

[[[ 1.  2.  3.  4.]
  [ 5.  6.  7.  8.]]

 [[ 1.  2.  3.  4.]
  [ 9. 10. 11. 12.]]

 [[13. 14. 15. 16.]
  [17. 18. 19. 20.]]]


In [5]:
print(type(c),    # <class 'numpy.ndarray'>
      c.dtype,    # float64
      c.ndim,     # 3
      c.shape,    # (3, 2, 4)
      c.size,     # total elements = 24
      sep='\n')

<class 'numpy.ndarray'>
float64
3
(3, 2, 4)
24


### Initial Placeholders 

In [6]:
d = np.zeros(shape=(3, 4), dtype=np.int64)
print(d)

[[0 0 0 0]
 [0 0 0 0]
 [0 0 0 0]]


In [7]:
e = np.ones(shape=(2, 3, 4), dtype=np.int16)
print(e)

[[[1 1 1 1]
  [1 1 1 1]
  [1 1 1 1]]

 [[1 1 1 1]
  [1 1 1 1]
  [1 1 1 1]]]


In [8]:
f = np.arange(start=10, stop=25, step=5)
print(f)

[10 15 20]


In [9]:
g = np.linspace(start=0, stop=2, num=5, endpoint=True)
print(g)

g1 = np.linspace(start=0, stop=2, num=5, endpoint=False)
print(g1)

[0.  0.5 1.  1.5 2. ]
[0.  0.4 0.8 1.2 1.6]


In [10]:
h = np.full(shape=(2, 3), fill_value=5.7, dtype=float)
print(h)

[[5.7 5.7 5.7]
 [5.7 5.7 5.7]]


In [11]:
i = np.eye(N=10,  # rows
           M=5,   # cols
           k=-2,  # diagonal (-ve => lower diag)
           dtype=np.int64)
print(i)

[[0 0 0 0 0]
 [0 0 0 0 0]
 [1 0 0 0 0]
 [0 1 0 0 0]
 [0 0 1 0 0]
 [0 0 0 1 0]
 [0 0 0 0 1]
 [0 0 0 0 0]
 [0 0 0 0 0]
 [0 0 0 0 0]]


In [12]:
i = np.eye(N=10,  # rows
           M=5,   # cols
           k=2,   # diagonal (+ve => upper diag)
           dtype=np.int64)
print(i)

[[0 0 1 0 0]
 [0 0 0 1 0]
 [0 0 0 0 1]
 [0 0 0 0 0]
 [0 0 0 0 0]
 [0 0 0 0 0]
 [0 0 0 0 0]
 [0 0 0 0 0]
 [0 0 0 0 0]
 [0 0 0 0 0]]


In [13]:
i = np.eye(N=10,  # rows
           M=5,   # cols
#            k=0, # default
           dtype=np.int64)
print(i)

[[1 0 0 0 0]
 [0 1 0 0 0]
 [0 0 1 0 0]
 [0 0 0 1 0]
 [0 0 0 0 1]
 [0 0 0 0 0]
 [0 0 0 0 0]
 [0 0 0 0 0]
 [0 0 0 0 0]
 [0 0 0 0 0]]


In [14]:
j = np.random.random(size=(3, 3)) # values between 0 and 1
print(j)

[[0.38057024 0.44229351 0.40759088]
 [0.18566708 0.6804758  0.88817317]
 [0.20869172 0.44411826 0.19203114]]


In [15]:
j = np.random.random(size=(3, 3))*10 # values between 1 and 10
print(j)

[[6.25347375 1.71873143 6.15480457]
 [8.44761173 4.45694121 3.68365244]
 [2.1926234  6.94010842 8.56439297]]


In [16]:
k = np.empty(shape=(3, 3), dtype=np.int64) # garbage
print(k)

[[4618726803943714822 4610419297421240007 4618615712323414914]
 [4620945200685995779 4616704088119508733 4615477266670957078]
 [4612119767753471143 4619499885849584405 4621010942679788865]]


### Input/Output

#### arrays

In [17]:
# array to numpy files
a = np.array([1, 2, 3])

np.save("my_array.npy", a)
loaded_array = np.load("my_array.npy")

print(loaded_array)

[1 2 3]


In [18]:
a = np.array([1, 2, 3])
b = np.array([(1, 2), (3, 4)], dtype=float)
c = np.array([[[1, 2, 3, 4], [5, 6, 7, 8]], [[1, 2, 3, 4], [9, 10, 11, 12]], [[13, 14, 15, 16], [17, 18, 19, 20]]], dtype=np.float64)

np.savez("my_arrays.npz", 
         my_array1=a, 
         my_array2=b, 
         my_array3=c)

loaded_data = np.load("my_arrays.npz")
loaded_array1 = loaded_data["my_array1"]
loaded_array2 = loaded_data["my_array2"]
loaded_array3 = loaded_data["my_array3"]

print(loaded_array1,'\n') # a
print(loaded_array2,'\n') # b
print(loaded_array3) # c

[1 2 3] 

[[1. 2.]
 [3. 4.]] 

[[[ 1.  2.  3.  4.]
  [ 5.  6.  7.  8.]]

 [[ 1.  2.  3.  4.]
  [ 9. 10. 11. 12.]]

 [[13. 14. 15. 16.]
  [17. 18. 19. 20.]]]


#### text

In [19]:
# array to normal files
np.savetxt('my_array.txt', a, delimiter=',')
np.savetxt('my_array.csv', a, delimiter=',') # comma seperated values
np.savetxt('my_array.out', a, delimiter=',') # assembler output

In [20]:
# create and add data into data.txt
with open('data.txt', 'w+') as f:
    f.write('''Value1  Value2  Value3
0.2536  0.1008  0.3857
0.4839  0.4536  0.3561
0.1292  0.6875  0.5929
0.1781  0.3049  0.8928
0.6253  0.3486  0.8791''')

In [21]:
# create and add data into data2.txt (contains missing values)
with open('data2.txt', 'w+') as f:
    f.write('''Value1  Value2  Value3
0.4839  0.4536  0.3561
0.1292  0.6875  MISSING
0.1781  0.3049  0.8928
MISSING 0.5801  0.2038
0.5993  0.4357  0.7410''')

In [22]:
# data from file to array
arr1 = np.genfromtxt('data.txt')
arr2 = np.genfromtxt('data2.txt', skip_header=1, filling_values=-99)

In [23]:
print('arr1:\n',arr1)
print('arr2:\n',arr2)

arr1:
 [[   nan    nan    nan]
 [0.2536 0.1008 0.3857]
 [0.4839 0.4536 0.3561]
 [0.1292 0.6875 0.5929]
 [0.1781 0.3049 0.8928]
 [0.6253 0.3486 0.8791]]
arr2:
 [[  0.4839   0.4536   0.3561]
 [  0.1292   0.6875 -99.    ]
 [  0.1781   0.3049   0.8928]
 [-99.       0.5801   0.2038]
 [  0.5993   0.4357   0.741 ]]


In [24]:
# text format data from file to array
arr3 = np.loadtxt('data.txt', dtype=str) 
arr4 = np.loadtxt('data.txt', dtype=str, delimiter='\n') # \n delimiter => each row a str

In [25]:
print('arr3:\n',arr3)
print('arr4:\n',arr4)

arr3:
 [['Value1' 'Value2' 'Value3']
 ['0.2536' '0.1008' '0.3857']
 ['0.4839' '0.4536' '0.3561']
 ['0.1292' '0.6875' '0.5929']
 ['0.1781' '0.3049' '0.8928']
 ['0.6253' '0.3486' '0.8791']]
arr4:
 ['Value1  Value2  Value3' '0.2536  0.1008  0.3857'
 '0.4839  0.4536  0.3561' '0.1292  0.6875  0.5929'
 '0.1781  0.3049  0.8928' '0.6253  0.3486  0.8791']


In [26]:
# unpack each column
x, y, z = np.loadtxt('data.txt', skiprows=1, unpack=True)
print(x,y,z, sep='\n') # each col in file is stored as array

[0.2536 0.4839 0.1292 0.1781 0.6253]
[0.1008 0.4536 0.6875 0.3049 0.3486]
[0.3857 0.3561 0.5929 0.8928 0.8791]


In [27]:
# x, y, z = np.loadtxt('data2.txt', skiprows=1, unpack=True)
# ValueError: could not convert string to float: 'MISSING'

### Inspecting array

In [28]:
print(a)

[1 2 3]


In [29]:
print(a.ndim,         # 1
      len(a),         # 3
      a.shape,        # (3,)
      a.size,         # 3
      a.itemsize,     # 4
      a.nbytes,       # 3*4 = 12
      a.dtype,        # int32
      a.dtype.name,   # int32
      a[0].dtype,     # int32
      sep='  ')

1  3  (3,)  3  4  12  int32  int32  int32


In [30]:
print(b)

[[1. 2.]
 [3. 4.]]


In [31]:
print(b.ndim,         # 2
      len(b),         # 2 rows
      b.shape,        # (2,2)
      b.size,         # 4
      b.itemsize,     # 8
      b.nbytes,       # 4*8 = 32
      b.dtype,        # float64
      b.dtype.name,   # float64
      b[0][0].dtype,  # float64
      sep='\t')

2	2	(2, 2)	4	8	32	float64	float64	float64


In [32]:
print(c)

[[[ 1.  2.  3.  4.]
  [ 5.  6.  7.  8.]]

 [[ 1.  2.  3.  4.]
  [ 9. 10. 11. 12.]]

 [[13. 14. 15. 16.]
  [17. 18. 19. 20.]]]


In [33]:
print(c.ndim,         # 3
      len(c),         # 3 rows
      c.shape,        # (3,2,4)
      c.size,         # 24
      c.itemsize,     # 8
      c.nbytes,       # 24*8 = 192
      c.dtype,        # float64
      type(c),        # numpy.ndarray
      type(c[0]),     # numpy.ndarray
      type(c[0][0]),  # numpy.ndarray
      sep='  ')

3  3  (3, 2, 4)  24  8  192  float64  <class 'numpy.ndarray'>  <class 'numpy.ndarray'>  <class 'numpy.ndarray'>


In [34]:
# string array
s = np.array([['a','an','the'],['python','is','easy']])
print(s.ndim,         # 2
      len(s),         # 2 rows
      s.shape,        # (2,3)
      s.size,         # 6 (largest string length)
      s.itemsize,     # 24
      s.nbytes,       # 24*6 = 144
      s.dtype,        # <U6
      type(s),        # numpy.ndarray
      type(s[0]),     # numpy.ndarray
      type(s[0][0]),  # numpy.str_
      sep='  ')

2  2  (2, 3)  6  24  144  <U6  <class 'numpy.ndarray'>  <class 'numpy.ndarray'>  <class 'numpy.str_'>


### Data Types

***numpy data types***

- np.int64 # Signed 64-bit integer types
- np.float32 # 8 bits exponent, 23 bits mantissa
- np.double # 11 bits exponent, 52 bits mantissa
- complex # Complex numbers represented by 128 floats
- bool # Boolean type storing TRUE and FALSE values
- object # Python object type
- np.string_ # Fixed-length string type
- np.unicode_ # Fixed-length unicode 

***referring datatypes with 1 character:***

- i # integer
- b # boolean
- u # unsigned integer
- f # float
- c # complex float
- m # timedelta
- M # datetime
- O # object
- S # string
- U # unicode string
- V # fixed chunk of memory for other type ( void )

In [35]:
a = np.power(100, 8, dtype=np.int64)     # 10^16
a2 = np.power(100, 8, dtype=np.int32)    # garbage as size exceeds
b = np.array([1, 2, 3], dtype='f')
c = np.array([1, 2, 3], dtype=np.float32)
d = np.array([1, 2, 3], dtype=np.str_)

In [36]:
print(a,a2,b,c,d)
print(a.dtype,b.dtype,c.dtype,d.dtype)

10000000000000000 1874919424 [1. 2. 3.] [1. 2. 3.] ['1' '2' '3']
int64 float32 float32 <U1


#### Casting

In [37]:
b_str = b.astype(np.str_)

print(b, b.dtype)
print(b_str, b_str.dtype)

[1. 2. 3.] float32
['1.0' '2.0' '3.0'] <U32


### Math Operations

In [38]:
# shape should match or one array should be a scalar
a = np.array([1, 2, 3])
b = np.array([1, 2, 3])
c = np.array([[10,20],[30,40]])
s = 5

In [39]:
# a and b shape matches
print(np.add(a, b),           # [2 4 6]
      np.subtract(a, b),      # [0 0 0]
      np.diff(a),             # [1 1] consecutive elements difference
      np.multiply(a, b),      # [1 4 9]
      np.divide(b, a),        # [1. 1. 1.]
      np.exp(a),              # e^a_elements [ 2.71828183  7.3890561  20.08553692]
      np.sqrt(b),             # [1.         1.41421356 1.73205081]
      np.dot(a,b),            # matrix dot product: 14
      np.trace(c),            # 10+40 = 50
      
      # Comparison Operations
      a == b,                 # array operation (elementwise) [ True  True  True]
      a < 2,                  # [ True False False]
      np.array_equal(a, b),   # arrray content comparision True
      a is b,                 # array object with content comparision False
     sep='  ')

[2 4 6]  [0 0 0]  [1 1]  [1 4 9]  [1. 1. 1.]  [ 2.71828183  7.3890561  20.08553692]  [1.         1.41421356 1.73205081]  14  50  [ True  True  True]  [ True False False]  True  False


In [40]:
# s is sclare and a is array
print(np.add(a, s),           # [6 7 8]
      np.subtract(a, s),      # [-4 -3 -2]
      np.multiply(a, s),      # [ 5 10 15]
      np.divide(b, s),        # [0.2 0.4 0.6]
      np.exp(s),              # e^5
      np.sqrt(s),             # sqrt(5)=2.23606797749979
      np.dot(a,s),            # element wise multiplycation: [ 5 10 15]
      
      # Comparison Operations
      a == s,                 # elementwise comparision [False False False]
      a < s,                  # [ True  True  True]
      np.array_equal(a, s),   # False
      a is s,                 # False
      sep=' ')

[6 7 8] [-4 -3 -2] [ 5 10 15] [0.2 0.4 0.6] 148.4131591025766 2.23606797749979 [ 5 10 15] [False False False] [ True  True  True] False False


In [41]:
# columns should be equal. compatible in all dimensions

x = np.ones((2,3))
y = np.full((5,1,3),4)

z = x + y
print(z.shape)

(5, 2, 3)


In [42]:
print(x)
print(y)

[[1. 1. 1.]
 [1. 1. 1.]]
[[[4 4 4]]

 [[4 4 4]]

 [[4 4 4]]

 [[4 4 4]]

 [[4 4 4]]]


In [43]:
print(z)

[[[5. 5. 5.]
  [5. 5. 5.]]

 [[5. 5. 5.]
  [5. 5. 5.]]

 [[5. 5. 5.]
  [5. 5. 5.]]

 [[5. 5. 5.]
  [5. 5. 5.]]

 [[5. 5. 5.]
  [5. 5. 5.]]]


In [44]:
# Error if cols doesnot match

b = np.array([(1, 2), (3, 4)], dtype=float)
print(b.shape, x.shape)

# x + b # ValueError: operands could not be broadcast together with shapes (2,3) (2,2) 

(2, 2) (2, 3)


In [45]:
# division by 0

print(np.divide(a, 0))    # [inf, inf, inf]
print(np.divide(-a, 0))   # [-inf, -inf, -inf]

[inf inf inf]
[-inf -inf -inf]


  print(np.divide(a, 0))    # [inf, inf, inf]
  print(np.divide(-a, 0))   # [-inf, -inf, -inf]


In [46]:
print(np.sin(a), 
      np.cos(a), 
      np.tan(a), 
      np.log(a), sep='\n')

[0.84147098 0.90929743 0.14112001]
[ 0.54030231 -0.41614684 -0.9899925 ]
[ 1.55740772 -2.18503986 -0.14254654]
[0.         0.69314718 1.09861229]


In [47]:
a = np.array([1.6, 2.2, 3.09])
print(np.floor(a), 
      np.ceil(a), sep='\n')

[1. 2. 3.]
[2. 3. 4.]


### Aggregate Functions

In [48]:
a = np.array([1, 2, 3])
print(a.sum(), 
      a.min(), 
      a.max(), 
      a.cumsum(), sep='\n')

6
1
3
[1 3 6]


In [49]:
a = np.array([[1, 2, 2],[4, 5, 6]])
print(a.sum(), 
      a.min(), 
      a.max(), 
      a.cumsum(), sep='\n')

20
1
6
[ 1  3  5  9 14 20]


### Statistical functions

In [50]:
a = np.array([10, 5, 2, 4])
b = a.reshape(2,-1)

print(np.mean(a),             # 5.25
      np.std(a),              # 2.947456530637899
      np.var(a),              # 8.6875
      np.median(a),           # 4.5
      np.percentile(a, 50),   # 4.5
      np.percentile(a, 75),   # 6.25
      np.percentile(a, 100),  # 10.0
      sep='  ')

5.25  2.947456530637899  8.6875  4.5  4.5  6.25  10.0


In [51]:
a = np.array([[1, 2, 2],[4, 5, 6]])
corr = np.corrcoef(a)
print(corr) 

[[1.        0.8660254]
 [0.8660254 1.       ]]


- corr[0][0]: 1. is correlation between [1, 2, 2] and itself
- corr[0,1] = corr[1,0]: 0.8660254 is correlation between [1, 2, 2] and [4, 5, 6]
- corr[1][1]: 1. is correlation between [4, 5, 6] and itself

In [52]:
a = np.array([[10, 5, 2],[4, 5, 6]])
corr = np.corrcoef(a)
print(corr)

[[ 1.         -0.98974332]
 [-0.98974332  1.        ]]


- corr[0][0]: 1. is correlation between [10, 5, 2] and itself
- corr[0,1] = corr[1,0]: -0.98974332 is correlation between [10, 5, 2] and [4, 5, 6]
- corr[1][1]: 1. is correlation between [4, 5, 6] and itself

### copying

In [53]:
# no copy
a = np.array([1, 2, 3])
b = np.array([1, 2, 3])
c = a
print(a is b, a is c, b is c)

False True False


In [54]:
d = a[:]          # shallow copy
e = np.copy(a)    # deep copy
f = a.copy()      # deep copy
d[0] = 10
e[1] = 20
f[2] = 30

In [55]:
print(a)          # affected due to changes in d, but not from e and f
print(d)
print(e)
print(f)

[10  2  3]
[10  2  3]
[ 1 20  3]
[ 1  2 30]


### views

Ex: Image processing
1. Capture the image using the camera and convert it into a NumPy array.
2. Create a view of the NumPy array to work with a specific region of the image (e.g., a rectangular portion).
3. Apply the sepia filter to the view of the image, which modifies the pixel values directly in the view without copying the entire image data.
4. Display the modified portion of the image in real-time.

In [56]:
# use of view

arr = np.array([1, 2, 3, 4, 5])
v1 = arr[1:4]
v2 = arr.view()

print("original arr:", arr)
print("v2:", v1)
print("v2:", v2)

original arr: [1 2 3 4 5]
v2: [2 3 4]
v2: [1 2 3 4 5]


In [57]:
# Modify the view array
v1[:] = 100
print("Modified v1:", v1)

v2[1:2] = 200 
print("Modified v2:", v2) # changes from above view are also there

Modified v1: [100 100 100]
Modified v2: [  1 200 100 100   5]


In [58]:
print("Modified Original Array:", arr) # changes from all views affects the original array

Modified Original Array: [  1 200 100 100   5]


### Transpose

In [59]:
a = np.array([[1,2,3],[4,5,6]]) # 2-d
b = np.transpose(a)
c = a.T
print(a.shape, b.shape, c.shape)

(2, 3) (3, 2) (3, 2)


In [60]:
a = np.array([[1,2,3,4,5,6]]) # 2-d
b = np.transpose(a)
c = a.T
print(a.shape, b.shape, c.shape)

(1, 6) (6, 1) (6, 1)


In [61]:
a = np.array([1,2,3,4,5,6]) # 1-d
b = np.transpose(a)
c = a.T
print(a.shape, b.shape, c.shape)

(6,) (6,) (6,)


### Indexing, Subsetting, Slicing

In [62]:
a = np.array([[1,2,3],[4,5,6]])
print(a[0],       # positive indexing
      a[-1],      # negative indexing
      a[a > 4],   # boolean indexing
      a[-1][-1],  
      a[-1, -1],  # subsetting
      sep='\n')

[1 2 3]
[4 5 6]
[5 6]
6
6


In [63]:
print(a[1:],         # slicing
      a[-1, :], 
      a[:, 2], 
      a[:2, 1], 
      a[:1,1:], 
      a[1, ...],     # a[1,:]
      
      a[[0, 0, -1, 1], [0, 1, -1, 2]], # a[0,0] a[0,1] a[-1][-1] a[1][2]
      
      a[1,-2:-4:-1],
     sep='\n')

[[4 5 6]]
[4 5 6]
[3 6]
[2 5]
[[2 3]]
[4 5 6]
[1 2 6 6]
[5 4]


In [64]:
# fancy indexing
print(a[[1,0,1,0]],              # select rows
      a[[1,0,1,0]][:],           # select rows and all columns 
      a[[1,0,1,0]][:,[0,1,2,1]], # select rows and specified columns
      sep="\n")

[[4 5 6]
 [1 2 3]
 [4 5 6]
 [1 2 3]]
[[4 5 6]
 [1 2 3]
 [4 5 6]
 [1 2 3]]
[[4 5 6 5]
 [1 2 3 2]
 [4 5 6 5]
 [1 2 3 2]]


### array manipulation

In [65]:
a = np.array([[1, 2, 3], [4, 5, 6], [7,8,9],[10,11,12]])
print(a.shape)
print(a)

(4, 3)
[[ 1  2  3]
 [ 4  5  6]
 [ 7  8  9]
 [10 11 12]]


In [66]:
# Reshape array 'a' into a 3x3 matrix
reshaped_a = a.reshape((3, 4))
print('reshaped_a',reshaped_a.shape)
print(reshaped_a)

reshaped_a (3, 4)
[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]


In [67]:
# -1 evaluates the last value in reshape according to the total number of elements in the array

reshaped_a = a.reshape((1,-1))        # -1 is evaluated to 12
print('reshaped_a',reshaped_a.shape)
print(reshaped_a)

reshaped_a (1, 12)
[[ 1  2  3  4  5  6  7  8  9 10 11 12]]


In [68]:
reshaped_a = a.reshape((2,-1))        # -1 is evaluated to 6
print('reshaped_a',reshaped_a.shape)
print(reshaped_a)

reshaped_a (2, 6)
[[ 1  2  3  4  5  6]
 [ 7  8  9 10 11 12]]


In [69]:
reshaped_a = a.reshape((1,3,-1))      # -1 is evaluated to 4 
print('reshaped_a',reshaped_a.shape)
print(reshaped_a)

reshaped_a (1, 3, 4)
[[[ 1  2  3  4]
  [ 5  6  7  8]
  [ 9 10 11 12]]]


In [70]:
# Resize array 'a' to a new shape (2x4), potentially repeating elements

resized_a = np.resize(a, (2, 4)) # data loss possible
print('resized_a',resized_a.shape)
print(resized_a)

resized_a (2, 4)
[[1 2 3 4]
 [5 6 7 8]]


In [71]:
# Flatten array 'a' (convert multi-dimensional array to a 1D array)
raveled_a = np.ravel(a)
print('raveled_a',raveled_a)

# Flatten array 'a' using the default row-major order (equivalent to np.ravel(a))
flattened_a = a.flatten()
print('flattened_a',flattened_a)

raveled_a [ 1  2  3  4  5  6  7  8  9 10 11 12]
flattened_a [ 1  2  3  4  5  6  7  8  9 10 11 12]


In [72]:
# Append value 7 to the end of array 'a'
appended_a = np.append(a, 7)
print('appended_a',appended_a)

# Insert value 3 into array 'a' at index 2
inserted_a = np.insert(a, 2, 300)
print('inserted_a',inserted_a)

# Delete element at index 2from array 'a'
deleted_a = np.delete(a, 11)
print('deleted_a',deleted_a)

appended_a [ 1  2  3  4  5  6  7  8  9 10 11 12  7]
inserted_a [  1   2 300   3   4   5   6   7   8   9  10  11  12]
deleted_a [ 1  2  3  4  5  6  7  8  9 10 11]


In [73]:
print(a) # original array is not affected after all above array manipulation operations

[[ 1  2  3]
 [ 4  5  6]
 [ 7  8  9]
 [10 11 12]]


### Sorting Arrays

- **axis = 0 => vertical axis**
- **axis = 1 => horizontal axis**

In [74]:
# demonstrating axis
arr = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
print(arr)

print()
print(arr.sum(axis=0), 
      arr.sum(axis=1), 
      arr.sum(axis=None) # default axis = None for sum()
     ) 

[[1 2 3]
 [4 5 6]
 [7 8 9]]

[12 15 18] [ 6 15 24] 45


- default axis may differ from function to function

In [75]:
a = np.array([[30,10,1], [6,50,3]])
print(a)

[[30 10  1]
 [ 6 50  3]]


In [76]:
a.sort(axis=1)   # horizontal axis
print(a)         # original is affected

[[ 1 10 30]
 [ 3  6 50]]


In [77]:
a.sort(axis=0)   # vertical axis
print(a)         # original is affected

[[ 1  6 30]
 [ 3 10 50]]


In [78]:
a = np.array([[30,10,1], 
              [6,50,3]])

a.sort(axis=-1)  # last axis
print(a)

[[ 1 10 30]
 [ 3  6 50]]


In [79]:
a = np.array([[30,10,1], [6,50,3]])

b = np.sort(a) # original is not affected
print(b)
print(a)

[[ 1 10 30]
 [ 3  6 50]]
[[30 10  1]
 [ 6 50  3]]


### Combining arrays

In [80]:
a = np.array([(1, 2), (3, 4)], dtype=float)
b = np.array([(5, 6), (7, 8)], dtype=float)
print(a)
print(b)

[[1. 2.]
 [3. 4.]]
[[5. 6.]
 [7. 8.]]


In [81]:
np.vstack((a, b))

array([[1., 2.],
       [3., 4.],
       [5., 6.],
       [7., 8.]])

In [82]:
np.hstack((a, b))

array([[1., 2., 5., 6.],
       [3., 4., 7., 8.]])

In [83]:
np.concatenate((a, b), axis=0) # vstack

array([[1., 2.],
       [3., 4.],
       [5., 6.],
       [7., 8.]])

In [84]:
np.concatenate((a, b), axis=1) # hstack

array([[1., 2., 5., 6.],
       [3., 4., 7., 8.]])

### Splitting arrays

In [85]:
a = np.array([(1, 2), (3, 4), (5,6)], dtype=float)
print(a)

[[1. 2.]
 [3. 4.]
 [5. 6.]]


In [86]:
a1, a2, a3 = np.vsplit(a, 3) # split along row axis: each row is splitted
print(a1)
print(a2)
print(a3)

[[1. 2.]]
[[3. 4.]]
[[5. 6.]]


In [87]:
a1, a2 = np.hsplit(a, 2)    # split along column axis: each column is splitted
print(a1)
print(a2)

[[1.]
 [3.]
 [5.]]
[[2.]
 [4.]
 [6.]]


### Data analysis functions

In [88]:
a = np.array([(1, 2), (8, 4), (8,6)], dtype=int)
f = a.ravel()
t = np.array([np.inf, 5, np.nan])
print(a)
print(f)
print(t)

[[1 2]
 [8 4]
 [8 6]]
[1 2 8 4 8 6]
[inf  5. nan]


In [89]:
# indices
print(np.argsort(f),     # returns indices of elements when array is sorted 
      np.argmax(a),      # returns index of maximum element
      np.argmin(a),      # returns index of minimum element
      np.where(a > 5),   # returns indices of elements satisfying the condition: (1,0) (2,0) (2,1)
      sep='\n'
     )

[0 1 3 5 2 4]
2
0
(array([1, 2, 2], dtype=int64), array([0, 0, 1], dtype=int64))


In [90]:
print(
      np.bincount(f),          # count of each element across the bins: [0 1 2 3 4 5 6 7 8 ]
      np.histogram(a, bins=4), # count of values across 4 bins: [1.  , 2.75, 4.5 , 6.25, 8.  ]
      np.isinf(t),             # element wise opeation to check for np.inf
      np.isnan(t),             # element wise opeation to check for np.nan
      sep='\n')

[0 1 1 0 1 0 1 0 2]
(array([2, 1, 1, 2], dtype=int64), array([1.  , 2.75, 4.5 , 6.25, 8.  ]))
[ True False False]
[False False  True]


In [91]:
print(np.random.choice(f, size=3))                 # take a sample of 3 numbers with replacment from f

[8 2 2]


In [92]:
unq_ele, counts = np.unique(a, return_counts=True) # retuns unique elements with their count in array
print(unq_ele, counts)

[1 2 4 6 8] [1 1 1 1 2]


### masked arrays

In [93]:
# NumPy array with some missing values (e.g., represented by -999)
data = np.array([10, 20, -999, 30, -999, 40, 50, -999])

# Create a masked array by specifying the condition for missing values
masked_data = np.ma.masked_where(data == -999, data)

print("Original Data:", data)
print("Masked Array:", masked_data)

Original Data: [  10   20 -999   30 -999   40   50 -999]
Masked Array: [10 20 -- 30 -- 40 50 --]


In [94]:
# Perform calculations on the masked array (missing values will be ignored)
mean = np.mean(masked_data)
sum = np.sum(masked_data)

print("Mean:", mean)
print("Sum:", sum)

Mean: 30.0
Sum: 150
