<a href="https://www.kaggle.com/code/fabinahian/numpy-basics?scriptVersionId=130734130" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

# Importing Libraries

In [1]:
import sys
import numpy as np

# Generating & Storing Arrays

In [2]:
# This is how an array is generated using NumPy

np.array ([1,2,3,4,5])

array([1, 2, 3, 4, 5])

In [3]:
# This is how you can generate AND store an array inside a variable using NumPy

a = np.array([1,2,3,4,5,6])
b = np.array([7,8,9,10,11])

# Slicing & Indexing Arrays

In [4]:
# This is how you can extract values from the arrays
# a = np.array([1,2,3,4,5,6])

a[0]

1

In [5]:
# a = np.array([1,2,3,4,5,6])

a[1]

2

In [6]:
# a = np.array([1,2,3,4,5,6])

a[0],a[1]

(1, 2)

In [7]:
# a = np.array([1,2,3,4,5,6])

a[0:]

array([1, 2, 3, 4, 5, 6])

In [8]:
# a = np.array([1,2,3,4,5,6])

a[1:4]

array([2, 3, 4])

In [9]:
# a = np.array([1,2,3,4,5,6])

a[1:-1]

array([2, 3, 4, 5])

In [10]:
# a = np.array([1,2,3,4,5,6])

a[::2]

array([1, 3, 5])

In [11]:
# b = np.array([7,8,9,10,11])

# multi indexing

b[0], b[2], b[-1]

(7, 9, 11)

In [12]:
# b = np.array([7,8,9,10,11])

# multi indexing

b[[0,2,-1]]

array([ 7,  9, 11])

In [13]:
# multi indexing

a[0], b[0]

(1, 7)

# Array Types

NumPy can automatically pick a data type (eg int, float etc) but you can also specify to declare your own preference



In [14]:
a


array([1, 2, 3, 4, 5, 6])

In [15]:
a.dtype


dtype('int64')

In [16]:
c = np.array([0.0, 1, 5.2, 7, 8])

c.dtype

dtype('float64')

In [17]:
# Declaring the type to be float even though the values are int

d = np.array([1,2,3,4], dtype = float)

d.dtype

dtype('float64')

In [18]:
# You can also change the type from int64 to int8 --> smaller integers for better performance

np.array([1,2,3,4,5,6,7,8,9], dtype = np.int8)

array([1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=int8)

# Matrix: 2 Dimensional Array


In [19]:
# 2 dimensional arrays shown below: 2 rows & 3 columns

A = np.array ([[1,2,3],[4,5,6]]) # You can define it in this way
 
AA = np.array ([[10,11,12],       # You can define it in this way as well
              [13,14,15]])

In [20]:
A.shape # (row, column)

(2, 3)

In [21]:
A.ndim # Number of dimensions : here, 1 vertical & 1 horizontal

2

In [22]:
A.size # Total number of elements


6

# Matrix: 3 Dimensional Array

In [23]:
B = np.array ([
    [
        [1,2,3],
        [4,5,6]
    ],
    [
        [7,8,9],
        [10,11,12]
    ]
])

In [24]:
B

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [25]:
B.shape

(2, 2, 3)

In [26]:
B.ndim

3

In [27]:
B.size

12

# Slicing & Indexing for Matrices (Multi-Dimensional Arrays)

In [28]:
# Square Matrix

A = np.array([
    [1,2,3],  # row 0
    [4,5,6],  # row 1
    [7,8,9]   # row 2 
])

In [29]:
A[1] # Gets row: 1

array([4, 5, 6])

In [30]:
A[1][0] # Gets the element of row: 1 and column: 0

4

In [31]:
# This method also allows for slicing (shown in the next cell)

A [ 1,0 ] # Gets the element of row: 1 and column: 0

4

In [32]:
A [ : , :2] # Gets all rows but only upto 2 for column level

array([[1, 2],
       [4, 5],
       [7, 8]])

In [33]:
# We can insert rows

A[1] = np.array([10,10,10])

A

array([[ 1,  2,  3],
       [10, 10, 10],
       [ 7,  8,  9]])

In [34]:
# We can also insert rather simply while letting NumPy do the work on the dimension

A[2] = 99
A

array([[ 1,  2,  3],
       [10, 10, 10],
       [99, 99, 99]])

# Summary Statistics For Arrays

In [35]:
a = np.array([1,2,3,4,5])

In [36]:
a.sum()

15

In [37]:
a.mean()

3.0

In [38]:
a.std()

1.4142135623730951

In [39]:
a.var()

2.0

# Summary Statistics For Matrices

In [40]:
A = np.array([
    [1,2,3],
    [4,5,6]
])

In [41]:
A.sum()

21

In [42]:
A.mean()

3.5

In [43]:
A.std()

1.707825127659933

In [44]:
A.var()

2.9166666666666665

In [45]:
# You can also use all these functions with only rows or only columns by using axises

A.sum (axis = 0 ) # Gets sum of all individual columns : axis = 0 

array([5, 7, 9])

In [46]:
A.sum (axis = 1 ) # Gets sum of all individual rows : axis = 1

# You can keep increasing the value of "axis" if you work with more dimensions

array([ 6, 15])

# Broadcasting & Vectorized Operations

In [47]:
a = np.arange(5) # Gets a basic array
a

array([0, 1, 2, 3, 4])

In [48]:
# Each of the elements of arrays can be modified easily since the operation gets broadcasted 

a + 10 # adds 10 to every element of the array named a

array([10, 11, 12, 13, 14])

In [49]:
a*4

array([ 0,  4,  8, 12, 16])

In [50]:
# Note: the operations are not chnaging the array "a" itself; they're creating new arrays. so, if we look at 'a', it's still the same

a

array([0, 1, 2, 3, 4])

In [51]:
# We can overwrite array 'a' by using commands

a += 10 # This will add 10 to every element and give a new array; the new array will be stored in 'a' overwriting the previous one

a

array([10, 11, 12, 13, 14])

In [52]:
# List comprehensions

l = [0,1,2,3]

[i+10 for i in l]

[10, 11, 12, 13]

In [53]:
a = np.arange(4)
b = np.array([2,4,6,8])

In [54]:
a

array([0, 1, 2, 3])

In [55]:
b

array([2, 4, 6, 8])

In [56]:
a + b

array([ 2,  5,  8, 11])

In [57]:
a * b 

array([ 0,  4, 12, 24])

# Boolean Arrays (Also called Masks)

In [58]:
a = np.arange(4)

In [59]:
a

array([0, 1, 2, 3])

In [60]:
# Method of getting first and last elements of an array

a[[0,-1]]

array([0, 3])

In [61]:
# Method of getting first and last elements of an array

a[0], a[-1]

(0, 3)

In [62]:
# Boolean method of getting first and last element

a[[True, False, False, True]]

array([0, 3])

In [63]:
# We can check conditions for all individual elements using the Boolean property 

a >= 2

array([False, False,  True,  True])

In [64]:
# We can extract values from millions of data of an array by using Boolean condition

a[a>=2] # Only the values that are greater than or equal to 2 will show up in the result : Filtering

array([2, 3])

In [65]:
a.mean() # shows the mean of array 'a'

1.5

In [66]:
a [ a > a.mean()] # Gives values that are greater than the mean 

array([2, 3])

In [67]:
a [ ~ (a>a.mean())] # gives values that are NOT greater than the mean

array([0, 1])

In [68]:
# OR operation
a [ (a == 0) | (a == 1 )]

array([0, 1])

In [69]:
# AND operation

a [ (a >= 0 ) & ( a <= 2 )]

array([0, 1, 2])

# Works with Matrices as well !!!

In [70]:
A = np.random.randint (100, size = (3,3))
A

array([[ 8, 42, 51],
       [19, 55, 82],
       [77, 19, 91]])

In [71]:
A[np.array([
    [True, False, True],
    [False, False, True],
    [True, False, False]
])]


array([ 8, 51, 82, 77])

In [72]:
A>30

array([[False,  True,  True],
       [False,  True,  True],
       [ True, False,  True]])

In [73]:
A[A>30]

array([42, 51, 55, 82, 77, 91])

# Linear Algebra

In [74]:
A = np.array ([
    [1,2,3],
    [4,5,6],
    [7,8,9]
])

In [75]:
B = np.array([
    [1,2],
    [3,4],
    [5,6]
])

In [76]:
A. dot (B) # Dot product

array([[ 22,  28],
       [ 49,  64],
       [ 76, 100]])

In [77]:
A @ B # Cross product

array([[ 22,  28],
       [ 49,  64],
       [ 76, 100]])

In [78]:
B. T # Transpose

array([[1, 3, 5],
       [2, 4, 6]])

In [79]:
B. T @ A

array([[48, 57, 66],
       [60, 72, 84]])

# Sizes of Objects in Memory

In [80]:
# An integer in Python is > 24 bytes

sys.getsizeof(1)


28

In [81]:
sys.getsizeof(10**100)

72

In [82]:
sys.getsizeof([1])

64

In [83]:
# NumPy is much smaller in size compared to Python

np.dtype(int).itemsize

8

In [84]:
np.dtype(np.int8).itemsize

1

In [85]:
np.dtype(float).itemsize

8

In [86]:
np.array([1]).nbytes

8

# Performance 

In [87]:
l = list(range(10000000))

In [88]:
%time sum([x**2 for x in l])

CPU times: user 3.55 s, sys: 174 ms, total: 3.72 s
Wall time: 3.73 s


333333283333335000000

In [89]:
a = np.arange(10000000)

In [90]:
%time np.sum(a**2)

CPU times: user 12.7 ms, sys: 4.98 ms, total: 17.7 ms
Wall time: 17.7 ms


1291890006563070912

# Useful Numpy Functions

# Random

In [91]:
np.random.random(size = 2)

array([0.37248701, 0.44284302])

In [92]:
np.random.normal (size = 2)

array([-0.97070112, -0.7959161 ])

In [93]:
np.random.rand(2,4)

array([[0.86120663, 0.45456394, 0.31650233, 0.19506201],
       [0.62396793, 0.81069401, 0.31715466, 0.01956752]])

# Arange

In [94]:
np.arange(10)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [95]:
np.arange(5,10)

array([5, 6, 7, 8, 9])

In [96]:
np.arange(0,1,.1)

array([0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])

# Reshape

In [97]:
np.arange(10).reshape(2,5)

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

In [98]:
np.arange(10).reshape(5,2)

array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7],
       [8, 9]])

# Linspace

In [99]:
np.linspace(0,1,5)

array([0.  , 0.25, 0.5 , 0.75, 1.  ])

In [100]:
np.linspace(0,1,20)

array([0.        , 0.05263158, 0.10526316, 0.15789474, 0.21052632,
       0.26315789, 0.31578947, 0.36842105, 0.42105263, 0.47368421,
       0.52631579, 0.57894737, 0.63157895, 0.68421053, 0.73684211,
       0.78947368, 0.84210526, 0.89473684, 0.94736842, 1.        ])

In [101]:
np.linspace(0,1,20, False)

array([0.  , 0.05, 0.1 , 0.15, 0.2 , 0.25, 0.3 , 0.35, 0.4 , 0.45, 0.5 ,
       0.55, 0.6 , 0.65, 0.7 , 0.75, 0.8 , 0.85, 0.9 , 0.95])

# Zeros, Ones, Empty

In [102]:
np.zeros(5)

array([0., 0., 0., 0., 0.])

In [103]:
np.zeros((3,3))

array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.]])

In [104]:
np.zeros((3,3,), dtype = int)

array([[0, 0, 0],
       [0, 0, 0],
       [0, 0, 0]])

In [105]:
np.ones(5)

array([1., 1., 1., 1., 1.])

In [106]:
np.ones((3,3))

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [107]:
np.ones((3,3), dtype = int)

array([[1, 1, 1],
       [1, 1, 1],
       [1, 1, 1]])

In [108]:
np.empty(5)

array([1., 1., 1., 1., 1.])

In [109]:
np.empty((2,2))

array([[0.25, 0.5 ],
       [0.75, 1.  ]])

In [110]:
np.empty((2,2), dtype = int)

array([[4598175219545276416, 4602678819172646912],
       [4604930618986332160, 4607182418800017408]])

# Identity and Eye

In [111]:
np.identity(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [112]:
np.eye(3,3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [113]:
np.eye(8,4)

array([[1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [114]:
np.eye(8,4, k=1)

array([[0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [115]:
np.eye(8,4, k = -3)

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.],
       [0., 0., 0., 0.]])

In [116]:
"Hello" [2]

'l'