### Handling Challenges in Numerical Computations with NumPy

In [1]:
marks = [78,0,89,67,98]
# marks = [78,None,89,67,98]
summation = 0
for i in marks:
    summation += i               # TypeError: unsupported operand type(s) for +=: 'int' and 'NoneType'
print(summation/len(marks))    

66.4


In [2]:
math_marks = [78,89,67,98]
science_marks = [56,34,98,44]
total_marks_of_each_student = math_marks + science_marks
print(total_marks_of_each_student)            # marks of each student are not added

[78, 89, 67, 98, 56, 34, 98, 44]


**Importing numpy**

In [3]:
import numpy as np

In [4]:
math_marks = np.array([78,89,67,98])
science_marks = np.array([56,34,98,44])
total_marks_of_each_student = math_marks + science_marks
print(total_marks_of_each_student)

[134 123 165 142]


In [5]:
math_marks = np.array([np.nan,89,67,98])
science_marks = np.array([56,34,98,44])
total_marks_of_each_student = math_marks + science_marks
print(total_marks_of_each_student)

[ nan 123. 165. 142.]


In [6]:
np.nan * 0, np.nan + 5, np.nan / 5

(nan, nan, nan)

###  Creating Arrays

In [7]:
a = np.array([1, 2, 3])
print(a)
print("shape:",a.shape)
print("dimension:",a.ndim)

[1 2 3]
shape: (3,)
dimension: 1


In [8]:
b = np.array([(1, 2), 
              (3, 4)], 
             dtype=float)
print("shape:",b.shape)
print("dimension:",b.ndim)
print(b)

shape: (2, 2)
dimension: 2
[[1. 2.]
 [3. 4.]]


In [9]:
c = np.array([[[1, 2, 3, 4], [5, 6, 7, 8]], 
              [[1, 2, 3, 4], [9, 10, 11, 12]], 
              [[13, 14, 15, 16], [17, 18, 19, 20]]], 
             dtype=np.float64)
print("shape:",c.shape)
print("dimension:",c.ndim)
print(c)

shape: (3, 2, 4)
dimension: 3
[[[ 1.  2.  3.  4.]
  [ 5.  6.  7.  8.]]

 [[ 1.  2.  3.  4.]
  [ 9. 10. 11. 12.]]

 [[13. 14. 15. 16.]
  [17. 18. 19. 20.]]]


In [10]:
np.set_printoptions(formatter={'float': '{: 5.1f}'.format})

In [11]:
print(c)

[[[  1.0   2.0   3.0   4.0]
  [  5.0   6.0   7.0   8.0]]

 [[  1.0   2.0   3.0   4.0]
  [  9.0  10.0  11.0  12.0]]

 [[ 13.0  14.0  15.0  16.0]
  [ 17.0  18.0  19.0  20.0]]]


In [12]:
print(c.shape,    # (3, 2, 4)
      c.size,     # total elements = 3*2*4 = 24
      c.ndim,     # 3
      type(c),    # <class 'numpy.ndarray'>
      c.dtype,    # array element datatype: float64
      sep='\n')

(3, 2, 4)
24
3
<class 'numpy.ndarray'>
float64


### Initial Placeholders 

In [13]:
d = np.zeros(shape=(3, 4), dtype=np.int64)
print(d)

[[0 0 0 0]
 [0 0 0 0]
 [0 0 0 0]]


In [14]:
e = np.ones(shape=(2, 3, 4), dtype=np.int16)
print(e)

[[[1 1 1 1]
  [1 1 1 1]
  [1 1 1 1]]

 [[1 1 1 1]
  [1 1 1 1]
  [1 1 1 1]]]


In [15]:
f = np.arange(start=10, stop=25, step=5)     # 1-d
print(f)

[10 15 20]


In [16]:
g = np.linspace(start=0, stop=2, num=5, endpoint=True)    # 1-d
print(g)

g1 = np.linspace(start=0, stop=2, num=5, endpoint=False)  # 1-d
print(g1)

[  0.0   0.5   1.0   1.5   2.0]
[  0.0   0.4   0.8   1.2   1.6]


In [17]:
h = np.full(shape=(2, 3), fill_value=5.7, dtype=float)
print(h)

[[  5.7   5.7   5.7]
 [  5.7   5.7   5.7]]


In [18]:
i = np.eye(N=10,  # rows
           M=5,   # cols
           k=-2,  # diagonal (-ve => lower diag)
           dtype=np.int64)
print(i)                   # shape = NxM, dim = 2

[[0 0 0 0 0]
 [0 0 0 0 0]
 [1 0 0 0 0]
 [0 1 0 0 0]
 [0 0 1 0 0]
 [0 0 0 1 0]
 [0 0 0 0 1]
 [0 0 0 0 0]
 [0 0 0 0 0]
 [0 0 0 0 0]]


In [19]:
i = np.eye(N=10,  # rows
           M=5,   # cols
           k=2,   # diagonal (+ve => upper diag)
           dtype=np.int64)
print(i)

[[0 0 1 0 0]
 [0 0 0 1 0]
 [0 0 0 0 1]
 [0 0 0 0 0]
 [0 0 0 0 0]
 [0 0 0 0 0]
 [0 0 0 0 0]
 [0 0 0 0 0]
 [0 0 0 0 0]
 [0 0 0 0 0]]


In [20]:
i = np.eye(N=10,  # rows
           M=5,   # cols
#          k=0,   # default
           dtype=np.int64)
print(i)

[[1 0 0 0 0]
 [0 1 0 0 0]
 [0 0 1 0 0]
 [0 0 0 1 0]
 [0 0 0 0 1]
 [0 0 0 0 0]
 [0 0 0 0 0]
 [0 0 0 0 0]
 [0 0 0 0 0]
 [0 0 0 0 0]]


In [21]:
# random values between 0 and 1
# different random values for each execution

j = np.random.random(size=(3, 3)) 
print(j)

j = np.random.random(size=(3, 3)) # other random values
print(j)

[[  0.4   0.5   0.7]
 [  0.7   0.6   0.4]
 [  0.1   0.5   0.6]]
[[  0.9   0.9   0.9]
 [  0.5   0.2   0.1]
 [  0.6   0.5   0.4]]


In [22]:
# same random values for each execution
np.random.seed(0)

j = np.random.random(size=(3, 3))
print(j)

j = np.random.random(size=(3, 3)) # other random values
print(j)

[[  0.5   0.7   0.6]
 [  0.5   0.4   0.6]
 [  0.4   0.9   1.0]]
[[  0.4   0.8   0.5]
 [  0.6   0.9   0.1]
 [  0.1   0.0   0.8]]


In [23]:
# same random values for each execution
np.random.seed(1)

j = np.random.random(size=(3, 3))
print(j)

j = np.random.random(size=(3, 3)) # other random values
print(j)

[[  0.4   0.7   0.0]
 [  0.3   0.1   0.1]
 [  0.2   0.3   0.4]]
[[  0.5   0.4   0.7]
 [  0.2   0.9   0.0]
 [  0.7   0.4   0.6]]


In [24]:
j = np.random.random(size=(3, 3))*10 # random values between 0 and 10
print(j)

[[  1.4   2.0   8.0]
 [  9.7   3.1   6.9]
 [  8.8   8.9   0.9]]


In [25]:
j = np.random.random(size=(3, 3))*100 # random values between 0 and 100
print(j)

[[  3.9  17.0  87.8]
 [  9.8  42.1  95.8]
 [ 53.3  69.2  31.6]]


In [26]:
k = np.empty(shape=(3, 3), dtype=np.int64) # garbage
print(k)

[[4609001284816098707 4611600517096888917 4620697409231141019]
 [4621640446144267210 4614240103859851758 4619480878112259129]
 [4621123250379398379 4621225805850257262 4605835321117975406]]


### Input/Output

#### arrays

In [27]:
# array to numpy files
a = np.array([1, 2, 3])

np.save("my_array.npy", a)
loaded_array = np.load("my_array.npy")

print(loaded_array)

[1 2 3]


In [28]:
a = np.array([1, 2, 3])
b = np.array([(1, 2), (3, 4)], dtype=float)
c = np.array([[[1, 2, 3, 4], [5, 6, 7, 8]], [[1, 2, 3, 4], [9, 10, 11, 12]], [[13, 14, 15, 16], [17, 18, 19, 20]]], dtype=np.float64)

np.savez("my_arrays.npz", 
         my_array1=a, 
         my_array2=b, 
         my_array3=c)

loaded_data = np.load("my_arrays.npz")
loaded_array1 = loaded_data["my_array1"]
loaded_array2 = loaded_data["my_array2"]
loaded_array3 = loaded_data["my_array3"]

print(loaded_array1,'\n') # a
print(loaded_array2,'\n') # b
print(loaded_array3) # c

[1 2 3] 

[[  1.0   2.0]
 [  3.0   4.0]] 

[[[  1.0   2.0   3.0   4.0]
  [  5.0   6.0   7.0   8.0]]

 [[  1.0   2.0   3.0   4.0]
  [  9.0  10.0  11.0  12.0]]

 [[ 13.0  14.0  15.0  16.0]
  [ 17.0  18.0  19.0  20.0]]]


#### text

In [29]:
# array to normal files
np.savetxt('my_array.txt', a, delimiter=',')
np.savetxt('my_array.csv', a, delimiter=',') # comma seperated values
np.savetxt('my_array.out', a, delimiter=',') # assembler output

In [30]:
# create and add data into data.txt
with open('data.txt', 'w+') as f:
    f.write('''Value1  Value2  Value3
0.2536  0.1008  0.3857
0.4839  0.4536  0.3561
0.1292  0.6875  0.5929
0.1781  0.3049  0.8928
0.6253  0.3486  0.8791''')

In [31]:
# create and add data into data2.txt (contains missing values)
with open('data2.txt', 'w+') as f:
    f.write('''Value1  Value2  Value3
0.4839  0.4536  0.3561
0.1292  0.6875  MISSING
0.1781  0.3049  0.8928
MISSING 0.5801  0.2038
0.5993  0.4357  0.7410''')

In [32]:
# data from file to array
arr1 = np.genfromtxt('data.txt')                                      # non-numeric is filled with np.nan
arr2 = np.genfromtxt('data.txt', skip_header=1)                       # header is skipped
arr3 = np.genfromtxt('data2.txt', skip_header=1, filling_values=-99)  # non-numeric is filled with filling_values

In [33]:
print('\narr1:\n',arr1)
print('\narr2:\n',arr2)
print('\narr3:\n',arr3)


arr1:
 [[  nan   nan   nan]
 [  0.3   0.1   0.4]
 [  0.5   0.5   0.4]
 [  0.1   0.7   0.6]
 [  0.2   0.3   0.9]
 [  0.6   0.3   0.9]]

arr2:
 [[  0.3   0.1   0.4]
 [  0.5   0.5   0.4]
 [  0.1   0.7   0.6]
 [  0.2   0.3   0.9]
 [  0.6   0.3   0.9]]

arr3:
 [[  0.5   0.5   0.4]
 [  0.1   0.7 -99.0]
 [  0.2   0.3   0.9]
 [-99.0   0.6   0.2]
 [  0.6   0.4   0.7]]


In [34]:
# text format data from file to array
arr3 = np.loadtxt('data.txt', dtype=str)                 # each element is str, each row in file is a row in array
arr4 = np.loadtxt('data.txt', dtype=str, delimiter='\n') # each row in file is an element in array, each element is str 

In [35]:
print('\narr3:\n',arr3)
print('\narr4:\n',arr4)


arr3:
 [['Value1' 'Value2' 'Value3']
 ['0.2536' '0.1008' '0.3857']
 ['0.4839' '0.4536' '0.3561']
 ['0.1292' '0.6875' '0.5929']
 ['0.1781' '0.3049' '0.8928']
 ['0.6253' '0.3486' '0.8791']]

arr4:
 ['Value1  Value2  Value3' '0.2536  0.1008  0.3857'
 '0.4839  0.4536  0.3561' '0.1292  0.6875  0.5929'
 '0.1781  0.3049  0.8928' '0.6253  0.3486  0.8791']


In [36]:
# unpack each column
x, y, z = np.loadtxt('data.txt', skiprows=1, unpack=True)
print('x: ',x,'\ny: ',y,'\nz: ',z) # each col in file is stored as array

x:  [  0.3   0.5   0.1   0.2   0.6] 
y:  [  0.1   0.5   0.7   0.3   0.3] 
z:  [  0.4   0.4   0.6   0.9   0.9]


In [37]:
# x, y, z = np.loadtxt('data2.txt', skiprows=1, unpack=True)
# ValueError: could not convert string to float: 'MISSING'

### Inspecting array

In [38]:
print(a)

[1 2 3]


In [39]:
print(a.ndim,         # 1
      len(a),         # 3
      a.shape,        # (3,)
      a.size,         # 3
      a.itemsize,     # 4
      a.nbytes,       # 3*4 = 12
      a.dtype,        # int32
      a.dtype.name,   # int32
      a[0].dtype,     # int32
      sep='  ')

1  3  (3,)  3  4  12  int32  int32  int32


In [40]:
print(b)

[[  1.0   2.0]
 [  3.0   4.0]]


In [41]:
print(b.ndim,         # 2
      len(b),         # 2 rows
      b.shape,        # (2,2)
      b.size,         # 4
      b.itemsize,     # 8
      b.nbytes,       # 4*8 = 32
      b.dtype,        # float64
      b.dtype.name,   # float64
      b[0][0].dtype,  # float64
      sep='    ')

2    2    (2, 2)    4    8    32    float64    float64    float64


In [42]:
print(c)

[[[  1.0   2.0   3.0   4.0]
  [  5.0   6.0   7.0   8.0]]

 [[  1.0   2.0   3.0   4.0]
  [  9.0  10.0  11.0  12.0]]

 [[ 13.0  14.0  15.0  16.0]
  [ 17.0  18.0  19.0  20.0]]]


In [43]:
print(c.ndim,         # 3
      len(c),         # 3 rows
      c.shape,        # (3,2,4)
      c.size,         # 24
      c.itemsize,     # 8
      c.nbytes,       # 24*8 = 192
      c.dtype,        # float64
      type(c),        # numpy.ndarray
      type(c[0]),     # numpy.ndarray
      type(c[0][0]),  # numpy.ndarray
      sep='    ')

3    3    (3, 2, 4)    24    8    192    float64    <class 'numpy.ndarray'>    <class 'numpy.ndarray'>    <class 'numpy.ndarray'>


In [44]:
print(np.array([1]).itemsize)             # 4 bytes
print(np.array(['a']).itemsize)           # 4 bytes
print(np.array([10]).itemsize)            # 4 bytes
print(np.array(['ab']).itemsize)          # 8 bytes
print(np.array(['10']).itemsize)          # 8 bytes
print(np.array(['datascience']).itemsize) # 11*4 bytes

4
4
4
8
8
44


In [45]:
# string array
s = np.array([['a','an','the'],['datascience','is','easy']])
print(s.ndim,         # 2
      len(s),         # 2 rows
      s.shape,        # (2,3)
      s.size,         # 2*3 = 6 
      s.itemsize,     # largest string size 11*4 = 44
      s.nbytes,       # 44*6 = 264
      s.dtype,        # <U11
      type(s),        # numpy.ndarray
      type(s[0]),     # numpy.ndarray
      type(s[0][0]),  # numpy.str_
      sep='    ')

2    2    (2, 3)    6    44    264    <U11    <class 'numpy.ndarray'>    <class 'numpy.ndarray'>    <class 'numpy.str_'>


### Data Types

***numpy data types***

- np.int64 # Signed 64-bit integer types
- np.float32 # 8 bits exponent, 23 bits mantissa
- np.double # 11 bits exponent, 52 bits mantissa
- complex # Complex numbers represented by 128 floats
- bool # Boolean type storing TRUE and FALSE values
- object # Python object type
- np.string_ # Fixed-length string type
- np.unicode_ # Fixed-length unicode 

***referring datatypes with 1 character:***

- i # integer
- b # boolean
- u # unsigned integer
- f # float
- c # complex float
- m # timedelta
- M # datetime
- O # object
- S # string
- U # unicode string
- V # fixed chunk of memory for other type ( void )

In [46]:
a = np.power(100, 8, dtype=np.int64)     # 10^16
a2 = np.power(100, 8, dtype=np.int32)    # garbage as size exceeds
b = np.array([1, 2, 3], dtype='f')
c = np.array([1, 2, 3], dtype=np.float32)
d = np.array([1, 2, 3], dtype=np.str_)

In [47]:
print(a,a2,b,c,d)
print(a.dtype,b.dtype,c.dtype,d.dtype)

10000000000000000 1874919424 [  1.0   2.0   3.0] [  1.0   2.0   3.0] ['1' '2' '3']
int64 float32 float32 <U1


#### Casting

In [48]:
b_str = b.astype(np.str_)

print('b:', b, b.dtype)
print('b_str:', b_str, b_str.dtype)

b: [  1.0   2.0   3.0] float32
b_str: ['1.0' '2.0' '3.0'] <U32


### Math Operations

In [49]:
# shape should match or one array should be a scalar
a = np.array([1, 2, 3])
b = np.array([1, 2, 3])
c = np.array([[10,20],
              [30,40]])
s = 5

In [50]:
# s is sclare and a is array
print(np.add(a, s),           # [6 7 8]
      np.subtract(a, s),      # [-4 -3 -2]
      np.divide(b, s),        # [0.2 0.4 0.6]
      np.multiply(a, s),      # [ 5 10 15]
      np.dot(a,s),            # element wise multiplication: [ 5 10 15]
      np.exp(s),              # e^5
      np.sqrt(s),             # sqrt(5)=2.23606797749979
      
      # Comparison Operations
      a == s,                 # elementwise comparision [False False False]
      a < s,                  # elementwise comparision [ True  True  True]
      np.array_equal(a, s),   # array-wise content comparision False
      a is s,                 # array-wise reference comparision False
      sep='\n')

[6 7 8]
[-4 -3 -2]
[  0.2   0.4   0.6]
[ 5 10 15]
[ 5 10 15]
148.4131591025766
2.23606797749979
[False False False]
[ True  True  True]
False
False


In [51]:
# a and b shape matches
print(np.add(a, b),           # [2 4 6]
      np.subtract(a, b),      # [0 0 0]
      np.divide(b, a),        # [1. 1. 1.]
      np.multiply(a, b),      # [1 4 9]
      np.dot(a,b),            # matrix dot product: (1*1 + 2*2 + 3*3) = 14
      np.trace(c),            # 10+40 = 50
      np.exp(a),              # e^a_elements [ 2.71828183  7.3890561  20.08553692]
      np.sqrt(b),             # [1.         1.41421356 1.73205081]
      
      # Comparison Operations
      a == b,                 # element wise comparision [ True  True  True]
      a < 2,                  # element wise comparision [ True False False]
      np.array_equal(a, b),   # arrray content comparision True
      a is b,                 # array object comparision False
     sep='\n')

[2 4 6]
[0 0 0]
[  1.0   1.0   1.0]
[1 4 9]
14
50
[  2.7   7.4  20.1]
[  1.0   1.4   1.7]
[ True  True  True]
[ True False False]
True
False


In [52]:
# columns should be equal. operations are compatible in all dimensions

x = np.ones((3,2))       #   3,2
y = np.full((2,1,2),4)   # 2,1,2

z = x + y
print(z.shape)           # 2,3,2
print(f"x:\n{x}", f"y:\n{y}", f"z:\n{z}", sep="\n---------\n")

(2, 3, 2)
x:
[[  1.0   1.0]
 [  1.0   1.0]
 [  1.0   1.0]]
---------
y:
[[[4 4]]

 [[4 4]]]
---------
z:
[[[  5.0   5.0]
  [  5.0   5.0]
  [  5.0   5.0]]

 [[  5.0   5.0]
  [  5.0   5.0]
  [  5.0   5.0]]]


In [53]:
# Error if cols doesnot match

b1 = np.ones((3,3))                            # 3,3
b2 = np.array([(1, 2), (3, 4)], dtype=float)   # 2,2
print(b1.shape[-1] == b2.shape[-1])            # cols are not equal

# b1 + b2 # ValueError: operands could not be broadcast together with shapes (3,3) (2,2) 

False


In [54]:
# division by 0

print(np.divide(5, 0))    # [inf, inf, inf]
print(np.divide(-5, 0))   # [-inf, -inf, -inf]
print(np.inf, -np.inf)

inf
-inf
inf -inf


  print(np.divide(5, 0))    # [inf, inf, inf]
  print(np.divide(-5, 0))   # [-inf, -inf, -inf]


In [55]:
print(np.sin(a), 
      np.cos(a), 
      np.tan(a), 
      np.log(a), sep='\n')

[  0.8   0.9   0.1]
[  0.5  -0.4  -1.0]
[  1.6  -2.2  -0.1]
[  0.0   0.7   1.1]


In [56]:
a = np.array([1.6, 2.2, 3.09])
print(np.floor(a),                  # less/left discrete number in number line
      np.ceil(a),                   # right/next discrete number in number line
      sep='\n')

[  1.0   2.0   3.0]
[  2.0   3.0   4.0]


### Aggregate Functions

In [57]:
a = np.array([1, 2, 3])
print(a.sum(), 
      a.min(), 
      a.max(), 
      a.cumsum(),      # consecutive elements summation (n elements)
      np.diff(a),      # consecutive elements difference (n-1 elements)
      sep='\n')

6
1
3
[1 3 6]
[1 1]


In [58]:
a = np.array([[1, 2, 2],[4, 5, 6]])
print(a.sum(),                       # scalar
      a.min(),                       # scalar
      a.max(),                       # scalar
      a.cumsum(),                    # 1-d
      np.diff(a),                    # array dimension is maintained
      sep='\n')

20
1
6
[ 1  3  5  9 14 20]
[[1 0]
 [1 1]]


### Statistical functions

In [59]:
a = np.array([10, 5, 2, 4])
b = np.array([[10, 5],[2, 4]])
 
print(np.mean(a),             # 5.25
      np.median(a),           # 4.5
      np.percentile(a, 50),   # 4.5
      np.percentile(a, 75),   # 6.25
      np.percentile(a, 100),  # 10.0
      sep='  ')

5.25  4.5  4.5  6.25  10.0


In [60]:
# standard deviation or spread of data
a = np.array([10, 50, 70, 20, 80, 100])

# std doesnot depend on shape of data
mean_a = np.mean(a)
std_dev_a = np.std(a)

# Standard deviation ranges
first_std_dev_range = (mean_a - std_dev_a, mean_a + std_dev_a)
second_std_dev_range = (mean_a - 2 * std_dev_a, mean_a + 2 * std_dev_a)
third_std_dev_range = (mean_a - 3 * std_dev_a, mean_a + 3 * std_dev_a)

# Printing the standard deviation ranges
print("1st standard deviation range:", first_std_dev_range)
print("2nd standard deviation range:", second_std_dev_range)
print("3rd standard deviation range:", third_std_dev_range)

1st standard deviation range: (22.984378812835757, 87.01562118716424)
2nd standard deviation range: (-9.031242374328485, 119.03124237432849)
3rd standard deviation range: (-41.04686356149273, 151.04686356149273)


**Empirical rule:**
- Approximately 68%, 95%, and 99.7% of the data points fall within 1, 2, and 3 standard deviations from the mean of a normal distribution, respectively

In [61]:
# variance = sqrt(standard deviation)
print(np.var(a))

1025.0


In [62]:
# correlation coefficient

# 1-d
a = np.array([1, 2, 3, 4, 5])
positive_linear_relationship_with_a = np.array([2, 3, 4, 5, 6])
negative_linear_relationship_with_a = np.array([5, 4, 3, 2, 1])
no_linear_relationship_with_a = np.array([5, 4, 6, 4, 5])
small_linear_relationship_with_a = np.array([5, 4, 6, 4, 4])

correlation_matrix_1 = np.corrcoef(a)
correlation_matrix_2 = np.corrcoef(a, positive_linear_relationship_with_a)
correlation_matrix_3 = np.corrcoef(a, negative_linear_relationship_with_a)
correlation_matrix_4 = np.corrcoef(a, no_linear_relationship_with_a)
correlation_matrix_5 = np.corrcoef(a, small_linear_relationship_with_a)

print(correlation_matrix_1)
print(correlation_matrix_2)
print(correlation_matrix_3)
print(correlation_matrix_4)
print(correlation_matrix_5)

1.0
[[  1.0   1.0]
 [  1.0   1.0]]
[[  1.0  -1.0]
 [ -1.0   1.0]]
[[  1.0   0.0]
 [  0.0   1.0]]
[[  1.0  -0.4]
 [ -0.4   1.0]]


In [63]:
# 2-d array corrcoef
matrix = np.array([[1, 2, 3, 4, 5],       # Positive linear relationship (corr coef ≈ 1)
                   [5, 4, 3, 2, 1],       # Negative linear relationship (corr coef ≈ -1)
                   [1, 3, 2, 4, 5],       # Moderate positive linear relationship (corr coef ≈ 0.5)
                   [1, 1, 2, 1, 1],       # No linear relationship (corr coef ≈ 0)
                   [1, 2, 2, 2, 2]])      # Weak positive linear relationship (corr coef ≈ 0.5)


row_wise_correlation_matrix = np.corrcoef(matrix, rowvar = True)
col_wise_correlation_matrix = np.corrcoef(matrix, rowvar = False)
print(row_wise_correlation_matrix)
print(col_wise_correlation_matrix)

[[  1.0  -1.0   0.9   0.0   0.7]
 [ -1.0   1.0  -0.9   0.0  -0.7]
 [  0.9  -0.9   1.0  -0.4   0.7]
 [  0.0   0.0  -0.4   1.0   0.2]
 [  0.7  -0.7   0.7   0.2   1.0]]
[[  1.0   0.8   0.6  -0.2  -0.5]
 [  0.8   1.0   0.5   0.3   0.0]
 [  0.6   0.5   1.0   0.3   0.1]
 [ -0.2   0.3   0.3   1.0   1.0]
 [ -0.5   0.0   0.1   1.0   1.0]]


### copying

In [64]:
# no copy
a = np.array([1, 2, 3])
b = np.array([1, 2, 3])
c = a
print(a is b, a is c, b is c)

False True False


**Shallow copy in list**

In [65]:
a = [[1,2],            # mutable: object reference is stored
     [3,4],
     5,                # immutable: value is stored directly 
     (7,8),            # immutable
     'abc']            # immutable

print(a)

d = a[:]             
d[0] = 10              # changing mutable object reference
d[1][0]=30             # changing value inside the mutable object
d[2]=50                # changing immutable value

# d[3][0] = 70         # TypeError: 'tuple' object does not support item assignment
# d[4][0] = 'p'        # TypeError: 'str' object does not support item assignment

d[3] = 12

print(a)
print(d)

[[1, 2], [3, 4], 5, (7, 8), 'abc']
[[1, 2], [30, 4], 5, (7, 8), 'abc']
[10, [30, 4], 50, 12, 'abc']


**Shallow copy in numpy array**

In [66]:
a = np.array([[1,2],            # mutable: object reference is stored
              [3,4],
              5,                # immutable: value is stored directly 
              (7,8),            # immutable
              'abc'], dtype='object')           # immutable

print(a)

d = a[:]
d[0] = 10              # changing mutable object reference
d[1][0] = 30           # changing value inside the mutable object
d[2] = 50              # changing immutable value

# d[3][0] = 70         # TypeError: 'tuple' object does not support item assignment
# d[4][0] = 'p'        # TypeError: 'str' object does not support item assignment

d[3] = 12

print(a) 
print(d)

[list([1, 2]) list([3, 4]) 5 (7, 8) 'abc']
[10 list([30, 4]) 50 12 'abc']
[10 list([30, 4]) 50 12 'abc']


In [67]:
a = np.array([1, 2, 3])
d = a[:]          # shallow copy to have new view on data
e = np.copy(a)    # deep copy to get complete copy for new work
f = a.copy()      # deep copy
d[0] = 10
e[1] = 20
f[2] = 30

In [68]:
print(a)          # affected due to changes in d, but not from e and f
print(d)
print(e)
print(f)

[10  2  3]
[10  2  3]
[ 1 20  3]
[ 1  2 30]


- shallow copy changes affects the original array
- deep copy changes will not affect the original array

### views

Ex: Image processing
1. Capture the image using the camera and convert it into a NumPy array.
2. Create a view of the NumPy array to work with a specific region of the image (e.g., a rectangular portion).
3. Apply the sepia filter to the view of the image, which modifies the pixel values directly in the view without copying the entire image data.
4. Display the modified portion of the image in real-time.

In [69]:
# use of view

arr = np.array([1, 2, 3, 4, 5])
v1 = arr[1:4]                       # a view 234: part of image
v2 = arr.view()                     # a view  

print("original arr:", arr)
print("v2:", v1)
print("v2:", v2)

original arr: [1 2 3 4 5]
v2: [2 3 4]
v2: [1 2 3 4 5]


In [70]:
# Modify the view array
v1[:] = 100
print("Modified v1:", v1)

v2[1:2] = 200 
print("Modified v2:", v2) # changes from above view are also there

Modified v1: [100 100 100]
Modified v2: [  1 200 100 100   5]


In [71]:
print("Modified Original Array:", arr) # changes from all views affects the original array

Modified Original Array: [  1 200 100 100   5]


### Transpose

In [72]:
a = np.array([[1,2,3],[4,5,6]]) # 2-d
b = np.transpose(a)
c = a.T
print(a.shape, b.shape, c.shape)
# a,b,c

(2, 3) (3, 2) (3, 2)


In [73]:
a = np.array([[1,2,3,4,5,6]]) # 2-d
b = np.transpose(a)
c = a.T
print(a.shape, b.shape, c.shape)
# a,b,c

(1, 6) (6, 1) (6, 1)


In [74]:
a = np.array([1,2,3,4,5,6]) # 1-d
b = np.transpose(a)
c = a.T
print(a.shape, b.shape, c.shape)
# a,b,c

(6,) (6,) (6,)


### Indexing, Subsetting, Slicing

In [75]:
a = np.array([[1,2,3],[4,5,6]])
print(a[0],       # positive indexing
      a[-1],      # negative indexing
      a[a > 4],   # boolean indexing
      a[-1][-1],  
      a[-1, -1],  # subsetting
      sep='\n')

[1 2 3]
[4 5 6]
[5 6]
6
6


In [76]:
print(a[0],a[:1])
print(a[:2])
print(a[0][0])
print(a[:1][0])
print(a[:1][0][0])
print(a[:1,0])

[1 2 3] [[1 2 3]]
[[1 2 3]
 [4 5 6]]
1
[1 2 3]
1
[1]


In [77]:
print(a[1:],         # slicing
      a[-1, :], 
      a[:, 2], 
      a[:2, 1], 
      a[:1,1:], 
      a[1, ...],     # a[1,:]
      
      a[[0, 0, -1, 1], [0, 1, -1, 2]], # a[0,0] a[0,1] a[-1][-1] a[1][2]
      
      a[1,-2:-4:-1],
     sep='\n')

[[4 5 6]]
[4 5 6]
[3 6]
[2 5]
[[2 3]]
[4 5 6]
[1 2 6 6]
[5 4]


In [78]:
# fancy indexing
print(a[[1,0,1,0]],              # select rows
      a[[1,0,1,0]][:],           # select rows and all columns 
      a[[1,0,1,0]][:,[0,1,2,1]], # select rows and specified columns
      sep="\n")

[[4 5 6]
 [1 2 3]
 [4 5 6]
 [1 2 3]]
[[4 5 6]
 [1 2 3]
 [4 5 6]
 [1 2 3]]
[[4 5 6 5]
 [1 2 3 2]
 [4 5 6 5]
 [1 2 3 2]]


### array manipulation

In [79]:
a = np.array([[1, 2, 3], [4, 5, 6], [7,8,9],[10,11,12]])
print(a.shape)
print(a)

(4, 3)
[[ 1  2  3]
 [ 4  5  6]
 [ 7  8  9]
 [10 11 12]]


In [80]:
# Reshape array 'a' into a 3x3 matrix: elements: 9 
# given elements: 12: Not possible
# b = a.reshape((3,3)) # ValueError: cannot reshape array of size 6 into shape (3,3)

In [81]:
reshaped_a = a.reshape((3, 4))
print('reshaped_a',reshaped_a.shape)
print(reshaped_a)

reshaped_a (3, 4)
[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]


In [82]:
# -1 evaluates the last value in reshape according to the total number of elements in the array

reshaped_a = a.reshape((1,-1))        # -1 is evaluated to 12
print('reshaped_a',reshaped_a.shape)
print(reshaped_a)

reshaped_a (1, 12)
[[ 1  2  3  4  5  6  7  8  9 10 11 12]]


In [83]:
reshaped_a = a.reshape((2,-1))        # -1 is evaluated to 6
print('reshaped_a',reshaped_a.shape)
print(reshaped_a)

reshaped_a (2, 6)
[[ 1  2  3  4  5  6]
 [ 7  8  9 10 11 12]]


In [84]:
reshaped_a = a.reshape((1,3,-1))      # -1 is evaluated to 4 
print('reshaped_a',reshaped_a.shape)
print(reshaped_a)

reshaped_a (1, 3, 4)
[[[ 1  2  3  4]
  [ 5  6  7  8]
  [ 9 10 11 12]]]


In [85]:
reshaped_a = a.reshape((2,3,-1))      # -1 is evaluated to 2
print('reshaped_a',reshaped_a.shape)
print(reshaped_a)

reshaped_a (2, 3, 2)
[[[ 1  2]
  [ 3  4]
  [ 5  6]]

 [[ 7  8]
  [ 9 10]
  [11 12]]]


In [86]:
# Resize array 'a' to a new shape (2x4), potentially repeating elements

resized_a = np.resize(a, (2, 4)) # data loss possible
print('resized_a',resized_a.shape)
print(resized_a)

resized_a (2, 4)
[[1 2 3 4]
 [5 6 7 8]]


In [87]:
# Flatten array 'a' (convert multi-dimensional array to a 1D array)
raveled_a = np.ravel(a)
print('raveled_a',raveled_a)

# Flatten array 'a' using the default row-major order (equivalent to np.ravel(a))
flattened_a = a.flatten()
print('flattened_a',flattened_a)

raveled_a [ 1  2  3  4  5  6  7  8  9 10 11 12]
flattened_a [ 1  2  3  4  5  6  7  8  9 10 11 12]


In [88]:
# append, insert, delete: returns new array without modifying existing array

# Append value 7 to the end of array 'a'
appended_a = np.append(a, 7)
print('appended_a',appended_a)

# Insert value 3 into array 'a' at index 2
inserted_a = np.insert(a, 2, 300)
print('inserted_a',inserted_a)

# Delete element at index 2from array 'a'
deleted_a = np.delete(a, 11)
print('deleted_a',deleted_a)

print(a) # original array is not affected

appended_a [ 1  2  3  4  5  6  7  8  9 10 11 12  7]
inserted_a [  1   2 300   3   4   5   6   7   8   9  10  11  12]
deleted_a [ 1  2  3  4  5  6  7  8  9 10 11]
[[ 1  2  3]
 [ 4  5  6]
 [ 7  8  9]
 [10 11 12]]


### Demonstrating axis

- **axis = 0 => 1st dim**
- **axis = 1 => 2nd dim**
- **axis = -1 => last dim**
- **axis = None => regardless of dimension**

In [89]:
# demonstrating axis
arr = np.array([[1, 2, 3], 
                [4, 5, 6], 
                [7, 8, 9]])
print(arr.ndim, arr.shape)
print("1st dimension:",arr.shape[0])
print("2nd dimension:",arr.shape[1])
print("last dimension:",arr.shape[-1])

# demonstrating axis
arr = np.array([[[1, 2, 3], [4, 5, 6]], 
                [[7, 8, 9], [10, 11, 12]]])
print(arr.ndim, arr.shape)
print("1st dimension:",arr.shape[0])
print("2nd dimension:",arr.shape[1])
print("2nd dimension:",arr.shape[2])
print("last dimension:",arr.shape[-1])

2 (3, 3)
1st dimension: 3
2nd dimension: 3
last dimension: 3
3 (2, 2, 3)
1st dimension: 2
2nd dimension: 2
2nd dimension: 3
last dimension: 3


In [90]:
arr = np.array([[1, 2], 
                [4, 5], 
                [7, 8]])
print(arr.shape[0], arr.shape[1])
print(arr.sum(axis=0),     # 1st dim
      arr.sum(axis=1),     # 2nd dim
      arr.sum(axis=-1),    # last dim
      arr.sum(axis=None) # regardless of dimension and default axis for sum() in specific
     ) 

3 2
[12 15] [ 3  9 15] [ 3  9 15] 27


In [91]:
# demonstrating axis
arr = np.array([[[1, 2, 3], [4, 5, 6]], 
                [[7, 8, 9], [10, 11, 12]]])
print(arr.shape[0],arr.shape[1],arr.shape[2])
print(arr.sum(axis=0),    # 1st dim
      arr.sum(axis=1),    # 2nd dim
      arr.sum(axis=2),    # 3rd dim
      arr.sum(axis=-1),   # here, in 3-d array, last axis is 3rd dim
      arr.sum(axis=None), # regardless of dimension
      sep='\n.............\n') 

2 2 3
[[ 8 10 12]
 [14 16 18]]
.............
[[ 5  7  9]
 [17 19 21]]
.............
[[ 6 15]
 [24 33]]
.............
[[ 6 15]
 [24 33]]
.............
78


- default axis may differ from function to function

### Combining arrays

In [92]:
arr = np.array([(1, 2), (3, 4)], dtype=float)

c = np.vstack((arr, arr))               # 1st dim
d = np.hstack((arr, arr))               # 2nd dim
e = np.concatenate((arr, arr), axis=0)  # 1st dim
f = np.concatenate((arr, arr), axis=1)  # 2nd dim
g = np.concatenate((arr, arr), axis=-1) # last dim
print(arr.shape, c.shape, d.shape, e.shape, f.shape, g.shape)

(2, 2) (4, 2) (2, 4) (4, 2) (2, 4) (2, 4)


In [93]:
print("\narr:\n", arr)
print("\nc [vstack]:\n", c)
print("\nd [hstack]:\n", d)
print("\ne [concatenate axis=0]:\n", e)
print("\nf [concatenate axis=1]:\n", f)
print("\ng [concatenate axis=-1]:\n", g)


arr:
 [[  1.0   2.0]
 [  3.0   4.0]]

c [vstack]:
 [[  1.0   2.0]
 [  3.0   4.0]
 [  1.0   2.0]
 [  3.0   4.0]]

d [hstack]:
 [[  1.0   2.0   1.0   2.0]
 [  3.0   4.0   3.0   4.0]]

e [concatenate axis=0]:
 [[  1.0   2.0]
 [  3.0   4.0]
 [  1.0   2.0]
 [  3.0   4.0]]

f [concatenate axis=1]:
 [[  1.0   2.0   1.0   2.0]
 [  3.0   4.0   3.0   4.0]]

g [concatenate axis=-1]:
 [[  1.0   2.0   1.0   2.0]
 [  3.0   4.0   3.0   4.0]]


In [94]:
arr = np.array([[[1, 22, 13], [4, 15, 6]], 
                [[7, 8, 9], [10, 11, 12]]])

c = np.vstack((arr, arr))               # 1st dim
d = np.hstack((arr, arr))               # 2nd dim
e = np.concatenate((arr, arr), axis=0)  # 1st dim
f = np.concatenate((arr, arr), axis=1)  # 2nd dim
g = np.concatenate((arr, arr), axis=-1) # last dim
print(arr.shape, c.shape, d.shape, e.shape, f.shape, g.shape)

(2, 2, 3) (4, 2, 3) (2, 4, 3) (4, 2, 3) (2, 4, 3) (2, 2, 6)


In [95]:
print("\narr:\n", arr)
print("\nc [vstack]:\n", c)
print("\nd [hstack]:\n", d)
print("\ne [concatenate axis=0]:\n", e)
print("\nf [concatenate axis=1]:\n", f)
print("\ng [concatenate axis=-1]:\n", g)


arr:
 [[[ 1 22 13]
  [ 4 15  6]]

 [[ 7  8  9]
  [10 11 12]]]

c [vstack]:
 [[[ 1 22 13]
  [ 4 15  6]]

 [[ 7  8  9]
  [10 11 12]]

 [[ 1 22 13]
  [ 4 15  6]]

 [[ 7  8  9]
  [10 11 12]]]

d [hstack]:
 [[[ 1 22 13]
  [ 4 15  6]
  [ 1 22 13]
  [ 4 15  6]]

 [[ 7  8  9]
  [10 11 12]
  [ 7  8  9]
  [10 11 12]]]

e [concatenate axis=0]:
 [[[ 1 22 13]
  [ 4 15  6]]

 [[ 7  8  9]
  [10 11 12]]

 [[ 1 22 13]
  [ 4 15  6]]

 [[ 7  8  9]
  [10 11 12]]]

f [concatenate axis=1]:
 [[[ 1 22 13]
  [ 4 15  6]
  [ 1 22 13]
  [ 4 15  6]]

 [[ 7  8  9]
  [10 11 12]
  [ 7  8  9]
  [10 11 12]]]

g [concatenate axis=-1]:
 [[[ 1 22 13  1 22 13]
  [ 4 15  6  4 15  6]]

 [[ 7  8  9  7  8  9]
  [10 11 12 10 11 12]]]


### Splitting arrays

In [96]:
a = np.array([(1, 2), (3, 4), (5,6)], dtype=float)
print(a.shape)

(3, 2)


In [97]:
a1, a2, a3 = np.vsplit(a, 3) # 1st dim, 3splits
print(a1.shape, a2.shape, a3.shape)
print(a1)
print(a2)
print(a3)

(1, 2) (1, 2) (1, 2)
[[  1.0   2.0]]
[[  3.0   4.0]]
[[  5.0   6.0]]


In [98]:
a1, a2 = np.hsplit(a, 2)    # 2nd dim, 2splits
print(a1.shape, a2.shape)
print(a1)
print(a2)

(3, 1) (3, 1)
[[  1.0]
 [  3.0]
 [  5.0]]
[[  2.0]
 [  4.0]
 [  6.0]]


In [99]:
a = np.array([(1,2,3,4), 
              (5,6,7,8), 
              (9,10,11,12), 
              (13,14,15,16), 
              (17,18,19,20), 
              (21,22,23,24), 
              (25,26,27,28), 
              (29,30,31,32)])
print(a.shape)                  # (8, 4)

a1, a2, a3, a4, a5, a6, a7, a8  = np.vsplit(a, 8) # 1st dim, 8 % splits = 0
print(a1.shape, a2.shape, a3.shape, a4.shape, a5.shape, a6.shape, a7.shape, a8.shape)

a1, a2, a3, a4  = np.vsplit(a, 4) # 8%4 = 0
print(a1.shape, a2.shape, a3.shape, a4.shape)

a1, a2  = np.vsplit(a, 2) # 8%2 = 0
print(a1.shape, a2.shape)

a1, a2, a3, a4 = np.hsplit(a, 4) # 2nd dim, 4 % splits = 0
print(a1.shape, a2.shape, a3.shape, a4.shape)

a1, a2  = np.hsplit(a, 2)
print(a1.shape, a2.shape)

(8, 4)
(1, 4) (1, 4) (1, 4) (1, 4) (1, 4) (1, 4) (1, 4) (1, 4)
(2, 4) (2, 4) (2, 4) (2, 4)
(4, 4) (4, 4)
(8, 1) (8, 1) (8, 1) (8, 1)
(8, 2) (8, 2)


### Sorting Arrays

In [100]:
arr = np.array([[30, 10, 1], 
                [6, 50, 3]])

print("Shape of arr:", arr.shape)
print("\nSorted along axis 0:\n", np.sort(arr, axis=0))                      # 1st dim
print("\nSorted along axis 1:\n", np.sort(arr, axis=1))                      # 2nd dim
print("\nSorted along last axis (-1):\n", np.sort(arr, axis=-1))             # here, in 2-d array, last axis is 2nd dim
print("\nSorted with axis=None (all elements):\n", np.sort(arr, axis=None))  # sort regardless of dimension


Shape of arr: (2, 3)

Sorted along axis 0:
 [[ 6 10  1]
 [30 50  3]]

Sorted along axis 1:
 [[ 1 10 30]
 [ 3  6 50]]

Sorted along last axis (-1):
 [[ 1 10 30]
 [ 3  6 50]]

Sorted with axis=None (all elements):
 [ 1  3  6 10 30 50]


In [101]:
# demonstrating axis
arr = np.array([[[1, 22, 13], [4, 15, 6]], 
                [[7, 8, 9], [10, 11, 12]]])
print("Shape of arr:", arr.shape)
print("\nSorted along axis 0:\n", np.sort(arr, axis=0))                      # 1st dim
print("\nSorted along axis 1:\n", np.sort(arr, axis=1))                      # 2nd dim
print("\nSorted along last axis (-1):\n", np.sort(arr, axis=-1))             # here, in 3-d array, last axis is 3rd dim
print("\nSorted along default axis (-1):\n", np.sort(arr))                      # default axis = -1 for sort
print("\nSorted with axis=None (all elements):\n", np.sort(arr, axis=None))  # sort regardless of dimension

Shape of arr: (2, 2, 3)

Sorted along axis 0:
 [[[ 1  8  9]
  [ 4 11  6]]

 [[ 7 22 13]
  [10 15 12]]]

Sorted along axis 1:
 [[[ 1 15  6]
  [ 4 22 13]]

 [[ 7  8  9]
  [10 11 12]]]

Sorted along last axis (-1):
 [[[ 1 13 22]
  [ 4  6 15]]

 [[ 7  8  9]
  [10 11 12]]]

Sorted along default axis (-1):
 [[[ 1 13 22]
  [ 4  6 15]]

 [[ 7  8  9]
  [10 11 12]]]

Sorted with axis=None (all elements):
 [ 1  4  6  7  8  9 10 11 12 13 15 22]


In [102]:
a = np.array([[30,10,1], 
              [6,50,3]])

a.sort() # original is affected, default axis = -1
print(a)

[[ 1 10 30]
 [ 3  6 50]]


### Data analysis functions

In [103]:
a = np.array([(1, 2), 
              (8, 4), 
              (8, 1)], dtype=int)
f = a.ravel()                                      # converting 2D images into 1D arrays for processing
t = np.array([np.inf, 5, np.nan])                   


print("Array 'a':")
print(a)

print("\nArray 'f' (ravel of 'a' or flattened array):")
print(f)

print("\nArray 't':")
print(t)

Array 'a':
[[1 2]
 [8 4]
 [8 1]]

Array 'f' (ravel of 'a' or flattened array):
[1 2 8 4 8 1]

Array 't':
[  inf   5.0   nan]


In [104]:
# indices
print(np.argsort(f),      # returns indices of elements when array is sorted 
      np.argmax(a),       # returns index of first occurence of maximum element
      np.argmin(a),       # returns index of minimum element
      np.argmin(t),
      np.where(f > 5),    # returns indices of elements satisfying the condition: (1,0) (2,0) (2,1)
      np.where(a > 5),
      sep='\n'
     )

[0 5 1 3 2 4]
2
0
2
(array([2, 4], dtype=int64),)
(array([1, 2], dtype=int64), array([0, 0], dtype=int64))


In [105]:
product_purchase_history = [1, 2, 3, 2, 4, 1, 5, 3, 1, 2, 4, 5, 1, 2,8]
purchase_count = np.bincount(product_purchase_history)

for product_id, count in enumerate(purchase_count):
    print(f"Product ID: {product_id}, Purchases: {count}")

print()        

for product_id, count in enumerate(purchase_count):
    if count > 0:
        print(f"Product ID: {product_id}, Purchases: {count}")        

Product ID: 0, Purchases: 0
Product ID: 1, Purchases: 4
Product ID: 2, Purchases: 4
Product ID: 3, Purchases: 2
Product ID: 4, Purchases: 2
Product ID: 5, Purchases: 2
Product ID: 6, Purchases: 0
Product ID: 7, Purchases: 0
Product ID: 8, Purchases: 1

Product ID: 1, Purchases: 4
Product ID: 2, Purchases: 4
Product ID: 3, Purchases: 2
Product ID: 4, Purchases: 2
Product ID: 5, Purchases: 2
Product ID: 8, Purchases: 1


In [106]:
scores = np.array([72, 83, 78, 91, 65, 72, 88, 95, 79, 82, 74, 70, 88, 92, 80, 85, 92, 88, 76, 84])
count, bins = np.histogram(scores, bins=5)  # Here, we specify the number of bins

print(bins)
print(count)
print()
for i, bin_count in enumerate(count):
    print(f"Bin {i+1}: {bins[i]} - {bins[i + 1]}: Count = {bin_count}")

[ 65.0  71.0  77.0  83.0  89.0  95.0]
[2 4 4 6 4]

Bin 1: 65.0 - 71.0: Count = 2
Bin 2: 71.0 - 77.0: Count = 4
Bin 3: 77.0 - 83.0: Count = 4
Bin 4: 83.0 - 89.0: Count = 6
Bin 5: 89.0 - 95.0: Count = 4


In [107]:
t = np.array([np.inf, 5, np.nan])                  #  
print(np.isinf(t))             # element wise opeation to check for np.inf
print(np.isnan(t))             # element wise opeation to check for np.nan

[ True False False]
[False False  True]


In [108]:
items = ["apple", "banana", "cherry", "grape", "orange"]
requirement = 8                                                # Number of fruits needed
print(np.random.choice(items, size=requirement))                

['orange' 'cherry' 'orange' 'apple' 'grape' 'banana' 'cherry' 'apple']


In [109]:
items = ["apple", "banana", "cherry", "grape", "orange", "grape"]
requirement = 4  
if requirement<=len(items):
    print(np.random.choice(items, size=requirement, replace=False))

['apple' 'grape' 'cherry' 'grape']


In [110]:
# saved custid for each purchase
customer_ids = np.array([1001, 1002, 1003, 1002, 1001, 1004, 1003, 1005, 1002, 1004, 1006])
unique_elements, counts = np.unique(customer_ids, return_counts=True)
for customer_id, count in zip(unique_elements, counts):
    print(f"Customer ID: {customer_id}, Number of Purchases: {count}")

Customer ID: 1001, Number of Purchases: 2
Customer ID: 1002, Number of Purchases: 3
Customer ID: 1003, Number of Purchases: 2
Customer ID: 1004, Number of Purchases: 2
Customer ID: 1005, Number of Purchases: 1
Customer ID: 1006, Number of Purchases: 1


### masked arrays

In [111]:
# NumPy array with some missing values (e.g., represented by -999)
data = np.array([10, 20, -999, 30, -999, 40, 50, -999])

# Create a masked array by specifying the condition for missing values
masked_data = np.ma.masked_where(data == -999, data)

print("Original Data:", data)
print("Masked Array:", masked_data)

Original Data: [  10   20 -999   30 -999   40   50 -999]
Masked Array: [10 20 -- 30 -- 40 50 --]


In [112]:
# Perform calculations on the masked array (missing values will be ignored)
mean = np.mean(masked_data)
result = np.sum(masked_data)

print("Mean:", mean)
print("Result:", result)

Mean: 30.0
Result: 150
