# NumPy


In [113]:
#loading libraries
import numpy as np
print(np.__version__)

1.24.3


In [114]:
a = np.array([1, 3, 6, 9])
b = np.array([[3.0,9.0,5.0], [1.,79.0,4.9]])
print("a array: ", a)
print("b array:\n ", b)

a array:  [1 3 6 9]
b array:
  [[ 3.   9.   5. ]
 [ 1.  79.   4.9]]


### Get functions

In [115]:
#Get Dimension
print("a dimension: ", a.ndim)
print("b dimension: ", b.ndim)
#Get Shape
print("a shape: ", a.shape)
print("b shape: ", b.shape)
#Get Type
print("a type: ", a.dtype)
print("b type: ", b.dtype)
#Get Size
print("a size: ", a.size)
print("b size: ", b.size)
#Get Total Size
print("a nbytes: ", a.nbytes)
print("b nbytes: ", b.nbytes)

a dimension:  1
b dimension:  2
a shape:  (4,)
b shape:  (2, 3)
a type:  int32
b type:  float64
a size:  4
b size:  6
a nbytes:  16
b nbytes:  48


### Changing elements

In [116]:
a = np.array([[1,2,3,4,5,6,7], [8,9,10,11,12,13,14]])
a

array([[ 1,  2,  3,  4,  5,  6,  7],
       [ 8,  9, 10, 11, 12, 13, 14]])

In [117]:
#Get a specific element [r, c]
print("specific element (a[1,5]): ", a[1,5]) 
#Doing more specific elements [startindex:endindex:stepsize]
print("specific elements: ", a[0,1:6:2])
#Get a specific row
print("specific row (1): ", a[0, :])
#Get a specific column
print("specific column (4): ", a[:, 4])
#Re-assigning values in the array
a[1,5] = 20 # Change in exact location
a[:,2] = 5 # Changing column numbers to 5 
a[:,3] = [3, 10] # Changing specific column numbers
print("a array after the changes:\n ", a)
#Get elements divisible by 2
print("elements divisible by 2: ", a[a%2==0])
#Get element 5 and up
print("element 5 and up:\n ", (a>5) | (a==5))

specific element (a[1,5]):  13
specific elements:  [2 4 6]
specific row (1):  [1 2 3 4 5 6 7]
specific column (4):  [ 5 12]
a array after the changes:
  [[ 1  2  5  3  5  6  7]
 [ 8  9  5 10 12 20 14]]
elements divisible by 2:  [ 2  6  8 10 12 20 14]
element 5 and up:
  [[False False  True False  True  True  True]
 [ True  True  True  True  True  True  True]]


### 3-D Array

In [118]:
b = np.array([[[1,2], [3,4]], [[5,6], [7,8]]])
c = np.array([[1,4,1], [2,7,2], [3,9,3]])
print("b array:\n", b)
print("\nc array:\n", c)

b array:
 [[[1 2]
  [3 4]]

 [[5 6]
  [7 8]]]

c array:
 [[1 4 1]
 [2 7 2]
 [3 9 3]]


In [119]:
#Get specific element (work outside in) [box,row,column]
b[0,1,:]

array([3, 4])

In [120]:
b[:,1,:]

array([[3, 4],
       [7, 8]])

In [121]:
b[:,1,1]

array([4, 8])

In [122]:
b[0,:,1]

array([2, 4])

In [123]:
#Reversing the order of row by subsetting 
print(c[:: -1, ])

[[3 9 3]
 [2 7 2]
 [1 4 1]]


In [124]:
#reversing the order of rows & columns by subsetting
print(c[:: - 1, :: -1])

[[3 9 3]
 [2 7 2]
 [1 4 1]]


In [125]:
#Replacing elements in the array
b[:,1,:] = [[78,34], [22,56]]
print(b)

[[[ 1  2]
  [78 34]]

 [[ 5  6]
  [22 56]]]


### Adding new axis to an array

In [126]:
a = np.array([1, 2, 3, 4, 5])
a.shape

(5,)

In [127]:
a2 = a[np.newaxis, :] #row_vector
a2.shape

(1, 5)

In [128]:
a3 = a[:, np.newaxis] #col_vector
a3.shape

(5, 1)

### Types of arrays

In [129]:
#All 0s matrix
np.zeros(7) #1D

array([0., 0., 0., 0., 0., 0., 0.])

In [130]:
np.zeros((2,2)) #2D

array([[0., 0.],
       [0., 0.]])

In [131]:
np.zeros((2,3,4)) #3D

array([[[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]],

       [[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]]])

In [132]:
#All 1s matrix
np.ones((2, 3, 5))

array([[[1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.]],

       [[1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.]]])

In [133]:
#Any other number
np.full((3,4), 8)

array([[8, 8, 8, 8],
       [8, 8, 8, 8],
       [8, 8, 8, 8]])

In [134]:
#Random float number 
np.random.rand(2, 3, 4)

array([[[0.94294887, 0.39187542, 0.8324594 , 0.7349189 ],
        [0.4056503 , 0.51423751, 0.27226491, 0.12804628],
        [0.25711251, 0.0028701 , 0.01235154, 0.4798363 ]],

       [[0.24596625, 0.50595635, 0.68144059, 0.03145833],
        [0.36695427, 0.90160215, 0.56891047, 0.0905024 ],
        [0.52065332, 0.57177654, 0.35168761, 0.90049029]]])

In [135]:
#Random int values
np.random.randint(-4, 9, (3,3))

array([[ 2, -1, -4],
       [-3,  8,  0],
       [-2,  0,  2]])

In [136]:
#generating an array with 7 BOOM steps b/w 1 and 100
np.arange(1,100,7)

array([ 1,  8, 15, 22, 29, 36, 43, 50, 57, 64, 71, 78, 85, 92, 99])

In [137]:
#generating an array with as many elemnts we want
np.linspace(1,30,10)

array([ 1.        ,  4.22222222,  7.44444444, 10.66666667, 13.88888889,
       17.11111111, 20.33333333, 23.55555556, 26.77777778, 30.        ])

In [138]:
#the identity matrix
np.identity(4)

array([[1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.]])

### Basic Mathematics

In [139]:
a = np.array((1,2,3,4))
b = np.array([1,0,1,0])
a

array([1, 2, 3, 4])

In [140]:
a+2

array([3, 4, 5, 6])

In [141]:
a-2

array([-1,  0,  1,  2])

In [142]:
a*2

array([2, 4, 6, 8])

In [143]:
a/2

array([0.5, 1. , 1.5, 2. ])

In [144]:
a+b

array([2, 2, 4, 4])

In [145]:
a**2 #(a^2)

array([ 1,  4,  9, 16])

In [146]:
np.sin(a)

array([ 0.84147098,  0.90929743,  0.14112001, -0.7568025 ])

In [147]:
np.cos(a)

array([ 0.54030231, -0.41614684, -0.9899925 , -0.65364362])

In [148]:
np.tan(a)

array([ 1.55740772, -2.18503986, -0.14254654,  1.15782128])

In [149]:
s=np.array(np.random.rand(3, 4))
s

array([[0.46290061, 0.56808446, 0.53433228, 0.51984383],
       [0.40197153, 0.92042955, 0.74869284, 0.1863028 ],
       [0.84633908, 0.92523783, 0.68900965, 0.64372992]])

In [150]:
np.min(s)

0.1863028031637728

In [151]:
np.max(s)

0.9252378283165638

In [152]:
np.min(s, axis = 0) #  minimum value within each column

array([0.40197153, 0.56808446, 0.53433228, 0.1863028 ])

In [153]:
np.max(s, axis = 1) #  maximum value within each row

array([0.56808446, 0.92042955, 0.92523783])

In [154]:
np.sum(s)

7.446874375949806

In [155]:
np.mean(s)

0.6205728646624838

In [156]:
np.std(s)

0.21152543359884515

In [157]:
np.squeeze(s)

array([[0.46290061, 0.56808446, 0.53433228, 0.51984383],
       [0.40197153, 0.92042955, 0.74869284, 0.1863028 ],
       [0.84633908, 0.92523783, 0.68900965, 0.64372992]])

In [158]:
s.cumsum()

array([0.46290061, 1.03098507, 1.56531734, 2.08516118, 2.4871327 ,
       3.40756225, 4.1562551 , 4.3425579 , 5.18889698, 6.1141348 ,
       6.80314446, 7.44687438])

In [159]:
#Arithematic Operations
s + np.array([0.1,0.2,0.3,0.4])

array([[0.56290061, 0.76808446, 0.83433228, 0.91984383],
       [0.50197153, 1.12042955, 1.04869284, 0.5863028 ],
       [0.94633908, 1.12523783, 0.98900965, 1.04372992]])

### Reeorginizing Arrays

In [160]:
a = np.array([[1,2,3,4], [5,6,7,8]])
a

array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

In [161]:
a.reshape((8,1))

array([[1],
       [2],
       [3],
       [4],
       [5],
       [6],
       [7],
       [8]])

In [162]:
a.reshape((2, 2, 2))

array([[[1, 2],
        [3, 4]],

       [[5, 6],
        [7, 8]]])

In [163]:
a.reshape((4,2))

array([[1, 2],
       [3, 4],
       [5, 6],
       [7, 8]])

In [164]:
a.transpose() # make it from 4X2 to 2X4

array([[1, 5],
       [2, 6],
       [3, 7],
       [4, 8]])

In [165]:
np.flip(a) # flipping arrays

array([[8, 7, 6, 5],
       [4, 3, 2, 1]])

In [166]:
np.flip(a, axis = 0) # flipping rows

array([[5, 6, 7, 8],
       [1, 2, 3, 4]])

In [167]:
np.flip(a, axis = 1) # flipping column

array([[4, 3, 2, 1],
       [8, 7, 6, 5]])

### Splitting Arrays

In [168]:
x = np.arange(1,25).reshape(2,12)
x

array([[ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12],
       [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]])

In [169]:
#Splitting long arrays into small arrays
np.hsplit(x,3)

[array([[ 1,  2,  3,  4],
        [13, 14, 15, 16]]),
 array([[ 5,  6,  7,  8],
        [17, 18, 19, 20]]),
 array([[ 9, 10, 11, 12],
        [21, 22, 23, 24]])]

In [170]:
np.hsplit(x,4)

[array([[ 1,  2,  3],
        [13, 14, 15]]),
 array([[ 4,  5,  6],
        [16, 17, 18]]),
 array([[ 7,  8,  9],
        [19, 20, 21]]),
 array([[10, 11, 12],
        [22, 23, 24]])]

In [171]:
np.hsplit(x,6)

[array([[ 1,  2],
        [13, 14]]),
 array([[ 3,  4],
        [15, 16]]),
 array([[ 5,  6],
        [17, 18]]),
 array([[ 7,  8],
        [19, 20]]),
 array([[ 9, 10],
        [21, 22]]),
 array([[11, 12],
        [23, 24]])]

### Loading data 

In [203]:
data = np.genfromtxt('Data.txt', delimiter = ',') # Loading from textfile
data

array([[  1.,  13.,  21.,  11., 196.,  75.,   4.,   3.,  34.,   6.,   7.,
          8.,   0.,   1.,   2.,   3.,   4.,   5.],
       [  3.,  42.,  12.,  33., 766.,  75.,   4.,  55.,   6.,   4.,   3.,
          4.,   5.,   6.,   7.,   0.,  11.,  12.],
       [  1.,  22.,  33.,  11., 999.,  11.,   2.,   1.,  78.,   0.,   1.,
          2.,   9.,   8.,   7.,   1.,  76.,  88.]])

In [173]:
data = data.astype('int32') # Changing file type
data

array([[  1,  13,  21,  11, 196,  75,   4,   3,  34,   6,   7,   8,   0,
          1,   2,   3,   4,   5],
       [  3,  42,  12,  33, 766,  75,   4,  55,   6,   4,   3,   4,   5,
          6,   7,   0,  11,  12],
       [  1,  22,  33,  11, 999,  11,   2,   1,  78,   0,   1,   2,   9,
          8,   7,   1,  76,  88]])

In [202]:
# Loading from URL
data2= np.genfromtxt(R'https://raw.githubusercontent.com/selva86/datasets/master/Auto.csv', delimiter =',', skip_header = 1)
data2

array([[ 18.,   8., 307., ...,  70.,   1.,  nan],
       [ 15.,   8., 350., ...,  70.,   1.,  nan],
       [ 18.,   8., 318., ...,  70.,   1.,  nan],
       ...,
       [ 32.,   4., 135., ...,  82.,   1.,  nan],
       [ 28.,   4., 120., ...,  82.,   1.,  nan],
       [ 31.,   4., 119., ...,  82.,   1.,  nan]])

In [204]:
# Filling NULL with 999
data2 = np.genfromtxt(R'https://raw.githubusercontent.com/selva86/datasets/master/Auto.csv',
delimiter =',', skip_header = 1, filling_values= 9999, dtype = 'float')
data2

array([[  18.,    8.,  307., ...,   70.,    1., 9999.],
       [  15.,    8.,  350., ...,   70.,    1., 9999.],
       [  18.,    8.,  318., ...,   70.,    1., 9999.],
       ...,
       [  32.,    4.,  135., ...,   82.,    1., 9999.],
       [  28.,    4.,  120., ...,   82.,    1., 9999.],
       [  31.,    4.,  119., ...,   82.,    1., 9999.]])

In [176]:
# Supressing Scientific Notation in the Dataset
np.set_printoptions(suppress=True)
data2

array([[  18.,    8.,  307., ...,   70.,    1., 9999.],
       [  15.,    8.,  350., ...,   70.,    1., 9999.],
       [  18.,    8.,  318., ...,   70.,    1., 9999.],
       ...,
       [  32.,    4.,  135., ...,   82.,    1., 9999.],
       [  28.,    4.,  120., ...,   82.,    1., 9999.],
       [  31.,    4.,  119., ...,   82.,    1., 9999.]])

In [177]:
# Saving Files into Local
np.savetxt('auto.csv', data2, delimiter = ',')
#this will open in console only
np.save('auto.npy', data2)

In [178]:
# Loading Files through NumPy
k = np.load('auto.npy')
k

array([[  18.,    8.,  307., ...,   70.,    1., 9999.],
       [  15.,    8.,  350., ...,   70.,    1., 9999.],
       [  18.,    8.,  318., ...,   70.,    1., 9999.],
       ...,
       [  32.,    4.,  135., ...,   82.,    1., 9999.],
       [  28.,    4.,  120., ...,   82.,    1., 9999.],
       [  31.,    4.,  119., ...,   82.,    1., 9999.]])

### Concat (Row & Col wise)

In [179]:
a = np.zeros([4,4])
b = np.ones([4,4])

In [180]:
np.concatenate([a, b], axis = 1) # cancate along rows

array([[0., 0., 0., 0., 1., 1., 1., 1.],
       [0., 0., 0., 0., 1., 1., 1., 1.],
       [0., 0., 0., 0., 1., 1., 1., 1.],
       [0., 0., 0., 0., 1., 1., 1., 1.]])

In [181]:
np.concatenate([a, b], axis = 0) # cancate along columns

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.]])

### vstack() & hstack()

In [182]:
# vstack()
v1 = np.array([1,2,3,4,5])
v2 = np.array([6,7,8,9,10])
np.vstack([v1,v2])

array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10]])

In [183]:
np.vstack([v1,v2, v2, v1])

array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10],
       [ 6,  7,  8,  9, 10],
       [ 1,  2,  3,  4,  5]])

In [184]:
# hstack()
# Horizantally stacking vectors
h1 = np.ones((2,4))
h2 = np.zeros((2,2))
np.hstack([h1,h2])

array([[1., 1., 1., 1., 0., 0.],
       [1., 1., 1., 1., 0., 0.]])

In [185]:
np.hstack([h1,h2,h2,h1])

array([[1., 1., 1., 1., 0., 0., 0., 0., 1., 1., 1., 1.],
       [1., 1., 1., 1., 0., 0., 0., 0., 1., 1., 1., 1.]])

### Sorting 


In [186]:
st = np.random.randint(1,10,size = [5,5])
st

array([[6, 2, 9, 1, 7],
       [8, 3, 8, 3, 6],
       [4, 9, 8, 1, 1],
       [6, 6, 1, 1, 8],
       [1, 9, 1, 5, 2]])

In [187]:
#sorting array's rows
np.sort(st)

array([[1, 2, 6, 7, 9],
       [3, 3, 6, 8, 8],
       [1, 1, 4, 8, 9],
       [1, 1, 6, 6, 8],
       [1, 1, 2, 5, 9]])

In [188]:
#sorting a single columns
sorted_col = st[:, 1].argsort()
st[sorted_col]

array([[6, 2, 9, 1, 7],
       [8, 3, 8, 3, 6],
       [6, 6, 1, 1, 8],
       [4, 9, 8, 1, 1],
       [1, 9, 1, 5, 2]])

### Working with dates

In [189]:
d = np.datetime64('2021-08-03 23:10:00')
d

numpy.datetime64('2021-08-03T23:10:00')

In [190]:
d + 100000 #adding seconds 
oneday = np.timedelta64(1, "D") #adding day 
d + oneday
onemin = np.timedelta64(1, 'm') #adding minute 
d + onemin
d

numpy.datetime64('2021-08-03T23:10:00')

In [191]:
#creating a sequence of date-time array
d = np.arange(np.datetime64('2023-12-13'), np.datetime64('2024-01-04'), 3) # one month - 3 day steps
d

array(['2023-12-13', '2023-12-16', '2023-12-19', '2023-12-22',
       '2023-12-25', '2023-12-28', '2023-12-31', '2024-01-03'],
      dtype='datetime64[D]')

In [192]:
### Advanced Functions

In [193]:
## getting num, if he odd, return the num^2 , else return num/2
def foo(x):
    if x%2 == 1:
        return x**2
    else:
        return x/2
foo(10)

5.0

In [194]:
foo_v  = np.vectorize(foo)
foo_v(a)

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

### Boolean 

In [195]:
data > 50

array([[False, False, False, False,  True,  True, False, False, False,
        False, False, False, False, False, False, False, False, False],
       [False, False, False, False,  True,  True, False,  True, False,
        False, False, False, False, False, False, False, False, False],
       [False, False, False, False,  True, False, False, False,  True,
        False, False, False, False, False, False, False,  True,  True]])

In [196]:
np.any(data > 50, axis = 0)

array([False, False, False, False,  True,  True, False,  True,  True,
       False, False, False, False, False, False, False,  True,  True])

In [197]:
np.all(data > 50, axis = 0)

array([False, False, False, False,  True, False, False, False, False,
       False, False, False, False, False, False, False, False, False])

In [198]:
data[((data > 50) & (data < 100))]

array([75, 75, 55, 78, 76, 88])

In [199]:
data[(~(data > 50) & (data < 100))]

array([ 1, 13, 21, 11,  4,  3, 34,  6,  7,  8,  0,  1,  2,  3,  4,  5,  3,
       42, 12, 33,  4,  6,  4,  3,  4,  5,  6,  7,  0, 11, 12,  1, 22, 33,
       11, 11,  2,  1,  0,  1,  2,  9,  8,  7,  1])

### NAN or INF values check

In [200]:
x = np.arange(1, 11, dtype=float)
x = np.insert(x, 2, np.inf, axis=0)
x = np.insert(x, 8, np.nan, axis=0)
x

array([ 1.,  2., inf,  3.,  4.,  5.,  6.,  7., nan,  8.,  9., 10.])

In [201]:
np.isnan(x)

array([False, False, False, False, False, False, False, False,  True,
       False, False, False])

In [112]:
np.isinf(x)

array([False, False,  True, False, False, False, False, False, False,
       False, False, False])