### Numpy major contributions are:
- Efficient numeric computation with C primitives
- A C API for connecting numpy with libraries written in C, C++

In contrast to python where, even a simple ints are objects (which includeds all methods and variables attributed to the object such as __str__ and __repr__ which ultimately increase the size of the variable), numpy uses primitive numeric types (floats, ints) which makes storing and computation efficient.

numpy is one of the core packages for numeric computing in python. pandas, matplotlib and many other scientific libraries rely on numpy.

In [2]:
import numpy as np

In [12]:
a = np.array([1, 2, 3, 4])
b = np.array([0.5, 1, 1.5, 2, 2.5, 3])

In [19]:
# returns the result as a tuple
print((a[0], b[1]))

print(a[0:3])

print(a[1:])

print(a[::3])

(1, 1.0)
[1 2 3]
[2 3 4]
[1 4]


In [24]:
print(b)

#numpy support multiindexing; following will create another numpy array
print(b[[0, 3, -1]])

[0.5 1.  1.5 2.  2.5 3. ]
[0.5 2.  3. ]


### Array types

By default numpy will use int64 or float64 as dataype while creating an array, however this behavior can be changed

In [27]:
print(a.dtype)
print(b.dtype)

int64
float64


In [29]:
c = np.array([1, 2, 3, 4], dtype=float)
print(c.dtype)

float64


### Array Dimensions and shape


In [31]:
# Creating a multidimensional array
A = np.array([
    [1, 2, 3],
    [4, 5, 6]
])
print(A)

[[1 2 3]
 [4 5 6]]


In [34]:
print(f'shape of the matrix is: {A.shape}')
print(f'total count of elements of the matrix is: {A.size}')
print(f'dimension of the matrix is: {A.ndim}')

shape of the matrix is: (2, 3)
total count of elements of the matrix is: 6
dimension of the matrix is: 2


In [38]:
# 3-dimensional array; in this case dimension of inner matrix must match
B = np.array([
    [
        [1, 2, 3],
        [4, 5, 6]
    ],
    [
        [7, 8, 9],
        [11, 12, 13]
    ]
])

print(B)

[[[ 1  2  3]
  [ 4  5  6]]

 [[ 7  8  9]
  [11 12 13]]]


In [37]:
print(f'shape of the matrix is: {B.shape}')
print(f'total count of elements of the matrix is: {B.size}')
print(f'dimension of the matrix is: {B.ndim}')

shape of the matrix is: (2, 2, 3)
total count of elements of the matrix is: 12
dimension of the matrix is: 3


### Indexing and slicing of matrix

In [39]:
C = np.array([
#    0  1  2    
    [1, 2, 3],  # 0
    [4, 5, 6],  # 1
    [7, 8, 9]   # 2
])

In [43]:
print(f'first row of matrix is: {C[0]}')
print(f'first row, second column of matrix is: {C[0][1]}')

# another way to select the element is C[d1, d2, d3]; where d1 is dimenesion 1
print(C[1, 0])  # second row first column

# this method is more useful as it supports slicing

first row of matrix is: [1 2 3]
first row, second column of matrix is: 2
4


In [48]:
# select first 2 rows and then select first column of the selected rows
C[0:2, 0]

array([1, 4])

In [50]:
# select all rows and then select first 2 columns of the rows
C[:, :2]

array([[1, 2],
       [4, 5],
       [7, 8]])

In [52]:
C[:2, :2]

array([[1, 2],
       [4, 5]])

In [57]:
C[2] = np.array([11, 21, 32])
C[1] = 99
C

array([[ 1,  2,  3],
       [99, 99, 99],
       [11, 21, 32]])

### Summary statistics

In [59]:
C.sum()

367

In [60]:
C.mean()

40.77777777777778

In [62]:
C.std()

42.21315692155647

In [63]:
C.sum(axis=0)

array([111, 122, 134])

In [64]:
C.sum(axis=1)

array([  6, 297,  64])

In [69]:
print(C.mean(axis=0))
print(C.mean(axis=1))

[37.         40.66666667 44.66666667]
[ 2.         99.         21.33333333]


In [70]:
print(C.std(axis=0))
print(C.std(axis=1))

[44.0302926  41.97088938 40.20226638]
[0.81649658 0.         8.57645355]


### Broadcasting and vectorised operation


In [77]:
x = np.array([0, 1, 2, 3])

# any changes in the array will create a new array
x + 10
print(x)
x += 10
print(x)

[0 1 2 3]
[10 11 12 13]


In [79]:
# list comprehension
l = [0, 1, 2, 3]
[i*10 for i in l]

[0, 10, 20, 30]

In [89]:
b = np.array([0.5, 1 , 1.5, 2])
b

array([0.5, 1. , 1.5, 2. ])

In [90]:
# will add corresponding values of the two arrays
a + b

array([1.5, 3. , 4.5, 6. ])

In [91]:
a * b 

array([0.5, 2. , 4.5, 8. ])

### Boolean Array


In [94]:
# selecting the values of the array
# Using multiindexing eg. 
print(a[[0, 2]])

# Same can also be achieved using boolean operation
print(a[[True, False, False, True]])

[1 3]
[1 4]


In [95]:
a >= 2

array([False,  True,  True,  True])

In [96]:
# combining both boolean expression and element selection
a[a >= 2]

array([2, 3, 4])

In [100]:
# filtering data using conditions
print(a[a > a.mean()])
print(a[~(a > a.mean())])

[3 4]
[1 2]


In [103]:
a[((a <= 2) & (a % 2 == 0))]

array([2])

In [106]:
A = np.random.randint(10, size=(3, 3))
A

array([[2, 5, 9],
       [7, 7, 3],
       [5, 4, 4]])

In [108]:
A > 3

array([[False,  True,  True],
       [ True,  True, False],
       [ True,  True,  True]])

In [110]:
A[A > 3]

array([5, 9, 7, 7, 5, 4, 4])

### Size of objects in memory

#### int, floats

In [116]:
import sys
# an integer in python is about 28 bytes
sys.getsizeof(1)

28

In [117]:
# longs are even larger
sys.getsizeof(10**100)

72

In [121]:
np.dtype(np.int).itemsize

8

In [120]:
np.dtype(np.int8).itemsize

1

In [122]:
np.dtype(np.float).itemsize

8

#### lists are even bigger

In [124]:
# one element list
sys.getsizeof([1])

64

In [127]:
# array of one element in numpy
np.array([1]).itemsize

8

### And performance is also important

In [138]:
l = list(range(100000))
a = np.arange(100000)

In [135]:
%time np.sum(a ** 2)

CPU times: user 6.4 ms, sys: 2.35 ms, total: 8.76 ms
Wall time: 6.91 ms


333328333350000

In [136]:
%time sum([x ** 2 for x in l])

CPU times: user 13.6 ms, sys: 55.7 ms, total: 69.3 ms
Wall time: 65.8 ms


333328333350000