In [1]:
import numpy as np

##### Basics of numpy arrays are similar to ordinary python list

In [4]:
# ordinary python list definition and operations
a = [1, 2, 3, 4, 5]
print(a) # prints the list
print(type(a)) # prints the type i.e., list
print(a[1]) # prints the value at index 1
print(a[1:4])# prints the values from index 1 to 3
print(a[-1]) # prints the value at the last position

[1, 2, 3, 4, 5]
<class 'list'>
2
[2, 3, 4]
5


##### Numpy arrays can be defined similarly. But, they work differently behind the scenes
##### They are written in C and optimized for linear algebra

In [7]:
a = np.array([1, 2, 3, 4, 5])
print(a) #unlike ordinary python lists, this will be printed separated by spaces
print(type(a)) # prints the type i.e., numpy.ndarray
print(a[1]) # prints the value at index 1
print(a[1:4])# prints the values from index 1 to 3
print(a[-1]) # prints the value at the last position

[1 2 3 4 5]
<class 'numpy.ndarray'>
2
[2 3 4]
5


In [9]:
a[2] = 10 # can use assignment with numpy arrays
print(a)

[ 1  2 10  4  5]


In [11]:
# multi dimensional numpy array
a_mul = np.array([[1, 2, 3],
                  [4, 5, 6],
                  [7, 8, 8]])
print(a_mul)
print(a_mul[0])
print(a_mul[0, 1])

[[1 2 3]
 [4 5 6]
 [7 8 8]]
[1 2 3]
2


In [13]:
# find the shape (dimension) of the array
print(a_mul.shape)

(3, 3)


In [15]:
# it can have multiple lists within the array
# below we have 2 lists with 3 lists each with 4 elements
a_mul = np.array([[[1, 2, 3, 1],
                   [4, 5, 6, 1],
                   [7, 8, 8, 1]],
                  [[1, 2, 4, 2],
                   [2, 5, 6, 1],
                   [6, 3, 8, 9]]])
print(a_mul.shape)

(2, 3, 4)


In [17]:
a_mul.ndim # this gives the depth of the array. a_mul here is of 3 levels 

3

In [19]:
a_mul.size # this gives the amount of elements in the array

24

In [21]:
a_mul.dtype # gives the data type of the elements

dtype('int32')

In [23]:
a = np.array([[1, 2, 3],
             [4, "Hello", 6],
             [7, 8, 9]])
print(a.dtype) # the whole data type would be changed due to a string in between
print(a[0][0])
print(a[0][0].dtype) # the value is an integer, but data type would be string
# that is due to one string value in the array
# <U11  = string type with less than or equal 11 characters
# <U1 = string type with 1 character

<U11
1
<U1


In [25]:
a = np.array([[1, 2, 3],
              [4, "Hello", 6],
              [7, 8, 8]], dtype = np.int32)

print(a.dtype)
# We will encounter an error as we cannot typecast a string to an integer
# the other way is possible that is to typecase an integer to a string

ValueError: invalid literal for int() with base 10: 'Hello'

In [27]:
a = np.array([[1, 2, 3],
              [4, "5", 6],
              [7, 8, 8]], dtype = np.int32)

print(a.dtype) # if we dont typecast it, it would be a string
# otherwise, it would be an integer

int32


In [29]:
a = np.array([[1, 2, 3],
              [4, "5", 6],
              [7, 8, 8]])

print(a.dtype) # if we dont typecast it, it would be a string
# otherwise, it would be an integer

<U11


In [31]:
a = np.array([[1, 2, 3],
              [4, "5", 6],
              [7, 8, 8]], dtype = np.float32)

print(a.dtype)
print(a[1][1].dtype)
print(a[1][1])


float32
float32
5.0


In [33]:
# whenever, we define the array with an object that cannot be typecasted
# the entire array datatype converts to that object
d = {'1' : 'A'}

a = np.array([[1, 2, 3],
              [4, d, 6],
              [7, 8, "Hello"]])

print(a.dtype)
print(a[1][1])

object
{'1': 'A'}


In [35]:
a = np.array([[1, 2, 3],
              [4, 5, 6],
              [7, 8, 9]], dtype = '<U2')

print(a.dtype) # we can force typecast

<U2


In [37]:
# to fill the array with the same value, we have couple of options in numpy
a = np.full((2, 3, 4), 9)
# first element is the shape of  array and then the value that we want to fill
print(a)

[[[9 9 9 9]
  [9 9 9 9]
  [9 9 9 9]]

 [[9 9 9 9]
  [9 9 9 9]
  [9 9 9 9]]]


In [39]:
a = np.zeros((2, 3, 2)) # to have the array full of zeros
print(a)

[[[0. 0.]
  [0. 0.]
  [0. 0.]]

 [[0. 0.]
  [0. 0.]
  [0. 0.]]]


In [41]:
a = np.ones((2, 3, 2)) # to have the array full of ones
print(a)

[[[1. 1.]
  [1. 1.]
  [1. 1.]]

 [[1. 1.]
  [1. 1.]
  [1. 1.]]]


In [43]:
b = np.empty((2, 3, 2)) # this reseves the memory without values
print(b)

[[[1. 1.]
  [1. 1.]
  [1. 1.]]

 [[1. 1.]
  [1. 1.]
  [1. 1.]]]


In [45]:
# to generate a range
# args include beginning value, end value, and step size
x_values = np.arange(0, 1000, 5)
print(x_values)

[  0   5  10  15  20  25  30  35  40  45  50  55  60  65  70  75  80  85
  90  95 100 105 110 115 120 125 130 135 140 145 150 155 160 165 170 175
 180 185 190 195 200 205 210 215 220 225 230 235 240 245 250 255 260 265
 270 275 280 285 290 295 300 305 310 315 320 325 330 335 340 345 350 355
 360 365 370 375 380 385 390 395 400 405 410 415 420 425 430 435 440 445
 450 455 460 465 470 475 480 485 490 495 500 505 510 515 520 525 530 535
 540 545 550 555 560 565 570 575 580 585 590 595 600 605 610 615 620 625
 630 635 640 645 650 655 660 665 670 675 680 685 690 695 700 705 710 715
 720 725 730 735 740 745 750 755 760 765 770 775 780 785 790 795 800 805
 810 815 820 825 830 835 840 845 850 855 860 865 870 875 880 885 890 895
 900 905 910 915 920 925 930 935 940 945 950 955 960 965 970 975 980 985
 990 995]


In [47]:
x_values.shape

(200,)

In [49]:
# there is linspace which helps us in specifying the number of values needed in the range instead of the step size
x_values = np.linspace(0, 1000, 101)
print(x_values)

[   0.   10.   20.   30.   40.   50.   60.   70.   80.   90.  100.  110.
  120.  130.  140.  150.  160.  170.  180.  190.  200.  210.  220.  230.
  240.  250.  260.  270.  280.  290.  300.  310.  320.  330.  340.  350.
  360.  370.  380.  390.  400.  410.  420.  430.  440.  450.  460.  470.
  480.  490.  500.  510.  520.  530.  540.  550.  560.  570.  580.  590.
  600.  610.  620.  630.  640.  650.  660.  670.  680.  690.  700.  710.
  720.  730.  740.  750.  760.  770.  780.  790.  800.  810.  820.  830.
  840.  850.  860.  870.  880.  890.  900.  910.  920.  930.  940.  950.
  960.  970.  980.  990. 1000.]


In [51]:
# NaN stands for not a number
# inf stands for infinity
print(np.NaN)
print(np.inf)
print(np.isnan(np.NaN))
print(np.isinf(np.inf))
print(np.sqrt(-1)) # throws a warning, but returns nan as an answer
print(np.array([10])/0) # throws a warning, but returns inf as an answer

nan
inf
True
True
nan
[inf]




In [53]:
# define regular python lists
l1 = [1, 2, 3, 4, 5]
l2 = [5, 6, 7, 8, 9]

# define numpy arrays
a1 = np.array(l1)
a2 = np.array(l2)

print(l1 * 5) # same list would be repeated 5 times
print(a1 * 5) # all the elements of the list would be multiplied by 5

[1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5]
[ 5 10 15 20 25]


In [55]:
print(l1 + 5) # this would result an error

TypeError: can only concatenate list (not "int") to list

In [57]:
print(a1 + 5) # each number of the array would be added with 5

[ 6  7  8  9 10]


In [59]:
print(l1 + l2) # two lists would be concatenated into one
print(a1 + a2) # each element of one array would be added to the corresponding element of the second array

[1, 2, 3, 4, 5, 5, 6, 7, 8, 9]
[ 6  8 10 12 14]


In [61]:
# none of these operations are possible with python lists
# print(l1 * l2)
# print(l1 / l2)
# print(l1 - l2)

In [63]:
# performs all the arthematic operations element-wise
print(a1 * a2)
print(a1 / a2)
print(a1 - a2)

[ 5 12 21 32 45]
[0.2        0.33333333 0.42857143 0.5        0.55555556]
[-4 -4 -4 -4 -4]


In [65]:
b1 = np.array([1, 2, 3])
b2 = np.array([[1],
               [2]])

print(b1.shape)
print(b2.shape)
print(b1 + b2)

(3,)
(2, 1)
[[2 3 4]
 [3 4 5]]


In [67]:
b1 = np.array([1, 2, 3])
b2 = np.array([[1, 1],
               [2, 1]])

print(b1.shape)
print(b2.shape)
print(b1 + b2)

(3,)
(2, 2)


ValueError: operands could not be broadcast together with shapes (3,) (2,2) 

In [69]:
# numpy offers many mathematical functions
# When we apply these functions onto an array, we basically apply these on each elemnet of the array
a = np.array([[7, 2, 3],
              [8, 5, 6]])

print(np.sqrt(a))

[[2.64575131 1.41421356 1.73205081]
 [2.82842712 2.23606798 2.44948974]]


In [71]:
print(np.log10(a))

[[0.84509804 0.30103    0.47712125]
 [0.90308999 0.69897    0.77815125]]


In [73]:
print(np.sin(a))

[[ 0.6569866   0.90929743  0.14112001]
 [ 0.98935825 -0.95892427 -0.2794155 ]]


In [75]:
print(np.exp(a))

[[1096.63315843    7.3890561    20.08553692]
 [2980.95798704  148.4131591   403.42879349]]


In [77]:
# we have a whole list of array methods - append, insert, delete operations
a = np.array([1, 2, 3])

print(np.append(a, [7, 8,9]))# this is not going to later the original array
print(a)
a = np.append(a, [7, 8, 9]) # this alters the original array
print(a)

[1 2 3 7 8 9]
[1 2 3]
[1 2 3 7 8 9]


In [79]:
a = np.insert(a, 3, [4, 5, 6])
# 3 is the position at which we want to insert
print(a)

[1 2 3 4 5 6 7 8 9]


In [81]:
a = np.array([[1, 2, 3],
              [4, 5, 6]])

# to delete, specify the array, index, and axis. If axis not specified, only the element at that index would be deleted
print(np.delete(a, 1))
print(np.delete(a, 0))
print(np.delete(a, 4))


[1 3 4 5 6]
[2 3 4 5 6]
[1 2 3 4 6]


In [83]:
a = np.array([[1, 2, 3],
              [4, 5, 6]])

# axis 0 represents row; 
print(np.delete(a, 1, 0))

# axis 0 represents column; 
print(np.delete(a, 1, 1))

[[1 2 3]]
[[1 3]
 [4 6]]


In [85]:
# We have a list of structural methods
a = np.array([[1, 2, 3, 4, 5],
             [6, 7, 8, 9, 10],
             [11, 12, 13, 14, 15],
             [16, 17, 18, 19, 20]])

print(a.shape)
print(a.reshape((5, 4))) # this would only change the shape but not order

(4, 5)
[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]
 [13 14 15 16]
 [17 18 19 20]]


In [87]:
# this results in one list with 20 elements in it
print(a.reshape((20,)))
# this results in a array with 20 rows and 1 column
print(a.reshape((20, 1)))

[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20]
[[ 1]
 [ 2]
 [ 3]
 [ 4]
 [ 5]
 [ 6]
 [ 7]
 [ 8]
 [ 9]
 [10]
 [11]
 [12]
 [13]
 [14]
 [15]
 [16]
 [17]
 [18]
 [19]
 [20]]


In [89]:
print(a.reshape((2, 2, 5))) # 2 cllections with 2 lists each with 5 elements each
print(a.reshape((2, 5, 2)))
print(a.reshape((5, 2, 2)))

[[[ 1  2  3  4  5]
  [ 6  7  8  9 10]]

 [[11 12 13 14 15]
  [16 17 18 19 20]]]
[[[ 1  2]
  [ 3  4]
  [ 5  6]
  [ 7  8]
  [ 9 10]]

 [[11 12]
  [13 14]
  [15 16]
  [17 18]
  [19 20]]]
[[[ 1  2]
  [ 3  4]]

 [[ 5  6]
  [ 7  8]]

 [[ 9 10]
  [11 12]]

 [[13 14]
  [15 16]]

 [[17 18]
  [19 20]]]


In [91]:
# reshaping doesnt alter the array until we assign the operation
print(a.reshape((10, 2)))
print(a)

[[ 1  2]
 [ 3  4]
 [ 5  6]
 [ 7  8]
 [ 9 10]
 [11 12]
 [13 14]
 [15 16]
 [17 18]
 [19 20]]
[[ 1  2  3  4  5]
 [ 6  7  8  9 10]
 [11 12 13 14 15]
 [16 17 18 19 20]]


In [93]:
# however, resize alters the array without assignment
a.resize(10, 2)
print(a)

[[ 1  2]
 [ 3  4]
 [ 5  6]
 [ 7  8]
 [ 9 10]
 [11 12]
 [13 14]
 [15 16]
 [17 18]
 [19 20]]


In [95]:
# we can flatten an array to get a one dimensional view
a = np.array([[1, 2, 3, 4, 5],
             [6, 7, 8, 9, 10],
             [11, 12, 13, 14, 15],
             [16, 17, 18, 19, 20]])
print(a.flatten())

[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20]


In [97]:
var1 = a.flatten()
var1[2] = 100 # this assignment wont change the original array
print(var1)
print(a)

[  1   2 100   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18
  19  20]
[[ 1  2  3  4  5]
 [ 6  7  8  9 10]
 [11 12 13 14 15]
 [16 17 18 19 20]]


In [99]:
# ravel is similar to flatten
# flattenr retuerns a flatten copy
# whereas, ravel returns as flatten view
var1 = a.ravel() 
var1[2] = 100 # this assignment changes the original array
print(var1)
print(a)

[  1   2 100   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18
  19  20]
[[  1   2 100   4   5]
 [  6   7   8   9  10]
 [ 11  12  13  14  15]
 [ 16  17  18  19  20]]


In [101]:
a = np.array([[1, 2, 3, 4, 5],
             [6, 7, 8, 9, 10],
             [11, 12, 13, 14, 15],
             [16, 17, 18, 19, 20]])

var = [v for v in a.flat] # flat attribute
print(var)

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]


In [103]:
# transposing or swapping of the axis
print(a.transpose()) # swaps the rows and columns
print(a.T) # transpose can be accessed with the T

[[ 1  6 11 16]
 [ 2  7 12 17]
 [ 3  8 13 18]
 [ 4  9 14 19]
 [ 5 10 15 20]]
[[ 1  6 11 16]
 [ 2  7 12 17]
 [ 3  8 13 18]
 [ 4  9 14 19]
 [ 5 10 15 20]]


In [105]:
# swapaxes is similar to transpose
# in case, we have multiple axes, we can specify the axes we need t swap
# whereas, transpose method transposes the entire array
print(a.swapaxes(0,1))

[[ 1  6 11 16]
 [ 2  7 12 17]
 [ 3  8 13 18]
 [ 4  9 14 19]
 [ 5 10 15 20]]


##### Joining and Splitting arrays

In [111]:
a1 = np.array([[1, 2, 3, 4, 5],
              [6, 7, 8, 9, 10]])
a2 = np.array([[11, 12, 13, 14, 15],
              [16, 17, 18, 19, 20]])

a = np.concatenate((a1, a2), axis = 0)
print(a)

[[ 1  2  3  4  5]
 [ 6  7  8  9 10]
 [11 12 13 14 15]
 [16 17 18 19 20]]


In [113]:
a1 = np.array([[1, 2, 3, 4, 5],
              [6, 7, 8, 9, 10]])
a2 = np.array([[11, 12, 13, 14, 15],
              [16, 17, 18, 19, 20]])

a = np.concatenate((a1, a2), axis = 1)
print(a)

[[ 1  2  3  4  5 11 12 13 14 15]
 [ 6  7  8  9 10 16 17 18 19 20]]


In [117]:
a1 = np.array([[1, 2, 3, 4, 5],
              [6, 7, 8, 9, 10]])
a2 = np.array([[11, 12, 13, 14, 15],
              [16, 17, 18, 19, 20]])

a = np.stack((a1, a2)) # two arrays would be stacked together
print(a)

[[[ 1  2  3  4  5]
  [ 6  7  8  9 10]]

 [[11 12 13 14 15]
  [16 17 18 19 20]]]


In [119]:
a1 = np.array([[1, 2, 3, 4, 5],
              [6, 7, 8, 9, 10]])
a2 = np.array([[11, 12, 13, 14, 15],
              [16, 17, 18, 19, 20]])

a = np.vstack((a1, a2)) # vstack same as concatenate over rows
print(a)

[[ 1  2  3  4  5]
 [ 6  7  8  9 10]
 [11 12 13 14 15]
 [16 17 18 19 20]]


In [121]:
a1 = np.array([[1, 2, 3, 4, 5],
              [6, 7, 8, 9, 10]])
a2 = np.array([[11, 12, 13, 14, 15],
              [16, 17, 18, 19, 20]])

a = np.hstack((a1, a2)) # hstack same as concatenate over columns
print(a)

[[ 1  2  3  4  5 11 12 13 14 15]
 [ 6  7  8  9 10 16 17 18 19 20]]


In [129]:
a = np.array([[1, 2, 3, 4, 5, 6],
             [7, 8, 9, 10, 11, 12],
             [13, 14, 15, 16, 17, 18],
             [19, 20, 21, 22, 23, 24]])

print(np.split(a, 2, axis = 0))
print(np.split(a, 4, axis = 0))
print(np.split(a, 2, axis = 1))
print(np.split(a, 3, axis = 1))
print(np.split(a, 6, axis = 1))

[array([[ 1,  2,  3,  4,  5,  6],
       [ 7,  8,  9, 10, 11, 12]]), array([[13, 14, 15, 16, 17, 18],
       [19, 20, 21, 22, 23, 24]])]
[array([[1, 2, 3, 4, 5, 6]]), array([[ 7,  8,  9, 10, 11, 12]]), array([[13, 14, 15, 16, 17, 18]]), array([[19, 20, 21, 22, 23, 24]])]
[array([[ 1,  2,  3],
       [ 7,  8,  9],
       [13, 14, 15],
       [19, 20, 21]]), array([[ 4,  5,  6],
       [10, 11, 12],
       [16, 17, 18],
       [22, 23, 24]])]
[array([[ 1,  2],
       [ 7,  8],
       [13, 14],
       [19, 20]]), array([[ 3,  4],
       [ 9, 10],
       [15, 16],
       [21, 22]]), array([[ 5,  6],
       [11, 12],
       [17, 18],
       [23, 24]])]
[array([[ 1],
       [ 7],
       [13],
       [19]]), array([[ 2],
       [ 8],
       [14],
       [20]]), array([[ 3],
       [ 9],
       [15],
       [21]]), array([[ 4],
       [10],
       [16],
       [22]]), array([[ 5],
       [11],
       [17],
       [23]]), array([[ 6],
       [12],
       [18],
       [24]])]


##### Aggregate functions

In [133]:
a = np.array([[1, 2, 3, 4, 5, 6],
             [7, 8, 9, 10, 11, 12],
             [13, 14, 15, 16, 17, 18],
             [19, 20, 21, 22, 23, 24]])
print(a.min())
print(a.max())
print(a.mean())
print(a.std())
print(a.sum())
print(np.median(a))

1
24
12.5
6.922186552431729
300
12.5


##### Numpy random

In [142]:
number = np.random.randint(100) # print a random integer less than 100
print(number)

20


In [146]:
numbers = np.random.randint(1, size = (2, 3, 4))
# print an array with given dimensions with 0
print(numbers)

[[[0 0 0 0]
  [0 0 0 0]
  [0 0 0 0]]

 [[0 0 0 0]
  [0 0 0 0]
  [0 0 0 0]]]


In [148]:
numbers = np.random.randint(90, 100, size = (2, 3, 4))
# print an array with the given dimensions with the numbers between given range
print(numbers)

[[[96 91 92 93]
  [92 91 99 92]
  [92 97 98 92]]

 [[94 94 99 94]
  [97 96 99 91]
  [94 91 98 91]]]


In [150]:
numbers = np.random.binomial(10, p = 0.5, size = (5, 10))
# printing using the binomial distribution
print(numbers)

[[4 6 4 5 5 6 5 4 5 5]
 [7 6 4 8 6 5 5 8 4 2]
 [6 4 5 7 5 5 6 7 2 3]
 [6 5 6 5 4 6 4 4 6 5]
 [3 3 8 6 4 5 6 7 5 3]]


In [152]:
numbers = np.random.normal(loc = 170, scale = 15, size = (5, 10))
print(numbers)
# here loc and scale are mean, and std of the distribution

[[154.76505116 194.60150236 175.22382976 147.84566685 179.86196809
  150.49282938 146.57916177 164.8417922  181.90399489 163.98181627]
 [165.25286061 168.67031529 155.0901545  189.10084265 175.77330194
  149.67299257 171.42274971 155.34438406 190.06092138 160.81286873]
 [148.226924   174.66709265 191.52867912 155.62983559 157.70964842
  187.30449066 159.28764686 213.13424024 174.67751913 177.70672149]
 [149.74537718 197.6683254  181.32609799 192.70864245 148.70794594
  145.44549739 182.37910663 160.47916356 163.18286915 167.56240368]
 [192.30128732 178.54382581 169.82188961 167.94513885 160.77323509
  160.99239826 176.42074655 165.53889855 157.17778954 157.91934758]]


In [154]:
numbers = np.random.choice([10, 20, 30, 40, 50], size = (5, 10))
print(numbers)
# this is to generate the array with the given list of numbers

[[20 50 10 20 20 50 10 10 10 30]
 [30 20 30 30 30 30 50 50 20 20]
 [20 20 20 30 30 50 10 40 10 30]
 [30 50 30 10 40 30 20 20 40 30]
 [40 30 50 40 10 20 50 30 50 40]]


In [160]:
a = np.array([[1, 2, 3, 4, 5, 6],
             [7, 8, 9, 10, 11, 12],
             [13, 14, 15, 16, 17, 18],
             [19, 20, 21, 22, 23, 24]])
np.save('myarray.npy', a) # to export the array

In [162]:
a = np.load("myarray.npy") # to load the saved array
print(a)

[[ 1  2  3  4  5  6]
 [ 7  8  9 10 11 12]
 [13 14 15 16 17 18]
 [19 20 21 22 23 24]]


In [164]:
a = np.array([[1, 2, 3, 4, 5, 6],
             [7, 8, 9, 10, 11, 12],
             [13, 14, 15, 16, 17, 18],
             [19, 20, 21, 22, 23, 24]])
np.savetxt('myarray.csv', a, delimiter = ',') # to export the array as a csv file

In [166]:
a = np.loadtxt("myarray.csv", delimiter = ',') # to load the saved csv file as an array
print(a)
# these import and export can also be used for datasets

[[ 1.  2.  3.  4.  5.  6.]
 [ 7.  8.  9. 10. 11. 12.]
 [13. 14. 15. 16. 17. 18.]
 [19. 20. 21. 22. 23. 24.]]
