# Ndarray Basics

## How To Create a Rank 1 numpy array:

In [3]:
import numpy as np

In [4]:
arr = np.array([3,33,333])

In [5]:
print(type(arr))

<class 'numpy.ndarray'>


In [6]:
print(arr.shape)

(3,)


In [7]:
print(arr[0], arr[1], arr[2])

3 33 333


In [10]:
arr[0] = 999
print(arr)

[999  33 333]


## How To Create a Rank 2 numpy array:

In [11]:
a = np.array([[11,12,13],[21,22,23]])

In [12]:
print(a)

[[11 12 13]
 [21 22 23]]


In [13]:
print("The 2D array has 2 rows and three columns and it's shape is ", a.shape)

The 2D array has 2 rows and three columns and it's shape is  (2, 3)


In [14]:
print(type(a))

<class 'numpy.ndarray'>


In [15]:
print("Accessing elements [0,0], [0,1], [0,2] of 2D array: ", a[0,0], a[0,1], a[0,2])

Accessing elements [0,0], [0,1], [0,2] of 2D array:  11 12 13


## There are many ways to create numpy arrays:

In [22]:
z = np.zeros((2,2))

In [19]:
print(z)

[[0. 0.]
 [0. 0.]]


In [21]:
f = np.full((2,2), 9.0)

In [23]:
print(f)

[[9. 9.]
 [9. 9.]]


In [24]:
e = np.eye(2,2)
print(e)

[[1. 0.]
 [0. 1.]]


In [25]:
o = np.ones((1,2))
print(o)

[[1. 1.]]


In [26]:
print(o.shape)

(1, 2)


In [27]:
print(o[0,1])

1.0


In [32]:
r = np.random.random((2,2))
print(r)

[[0.59144886 0.80385571]
 [0.92424047 0.76302149]]


# Ndarray Indexing

In [33]:
import numpy as np

In [14]:
a = np.array([[11,12,13,14],[21,22,23,24],[31,32,33,34]])

In [15]:
print(a)

[[11 12 13 14]
 [21 22 23 24]
 [31 32 33 34]]


In [1]:
import numpy as np

In [2]:
a = np.array([[11,12,13,14],[21,22,23,24],[31,32,33,34]])

In [7]:
a_slice = a[:2,1:3]     #Getting middle columns of row zero and one 

In [8]:
print(a_slice)

[[12 13]
 [22 23]]


When we modify a slice, we actually modify the underlying array

Because slices are refrences to same underlying data as the original array

In [9]:
print(a_slice[0,0])

12


In [10]:
print(a[0,1])

12


In [11]:
a_slice[0,0] = 99

In [13]:
print(a)
print(a_slice)

[[11 99 13 14]
 [21 22 23 24]
 [31 32 33 34]]
[[99 13]
 [22 23]]


In [16]:
a = np.array([[11,12,13,14],[21,22,23,24],[31,32,33,34]])

In [17]:
a_slice_copy = np.array(a[:2,1:3])    #Making a copy of the slice

In [18]:
print(a_slice_copy)

[[12 13]
 [22 23]]


In [19]:
print(a[0,1])

12


In [20]:
print(a_slice_copy[0,0])

12


In [21]:
a_slice_copy[0,0] = 99

In [22]:
print(a_slice_copy)

[[99 13]
 [22 23]]


In [23]:
print(a)

[[11 12 13 14]
 [21 22 23 24]
 [31 32 33 34]]


## Use both integer indexing and slice indexing:

In [24]:
# Using both integr indexing and slicing generates an array of lower rank
arr = a[1, :]   # Using colon means all columns    # Rank 1 view

In [25]:
print(arr.shape)
print(arr)

(4,)
[21 22 23 24]


In [26]:
arr2 = a[1:2, :]      # Rank 2 view

In [28]:
print(arr2.shape)
print(arr2)

(1, 4)
[[21 22 23 24]]


In [29]:
col_rank1 = a[:, 1]
col_rank2 = a[:, 1:2]

In [30]:
print(col_rank1.shape)
print(col_rank1)

(3,)
[12 22 32]


In [31]:
print(col_rank2.shape)
print(col_rank2)

(3, 1)
[[12]
 [22]
 [32]]


## Array Indexing for changing elements:

Sometimes it's useful to use an array of indexes to access or change elements

In [32]:
import numpy as np

In [33]:
arr = np.array([[11,12,13],[21,22,23],[31,32,33],[41,42,43]])

In [35]:
print(arr)

[[11 12 13]
 [21 22 23]
 [31 32 33]
 [41 42 43]]


In [36]:
col_indices = np.array([0,1,2,0])
print("Column indices picked : ", col_indices)

Column indices picked :  [0 1 2 0]


In [38]:
row_indices = np.arange(4)
print("Row indices picked : ", row_indices)

Row indices picked :  [0 1 2 3]


In [39]:
# Examine the parings of row_indices or column_indices
for row,column in zip(row_indices,col_indices):
    print(row,", ",column)

0 ,  0
1 ,  1
2 ,  2
3 ,  0


In [41]:
print("Values in the array at the picked indices: ",arr[row_indices, col_indices])

Values in the array at the picked indices:  [11 22 33 41]


In [42]:
arr[row_indices, col_indices] += 10000

In [43]:
print("Changed array: ")
print(arr)

Changed array: 
[[10011    12    13]
 [   21 10022    23]
 [   31    32 10033]
 [10041    42    43]]


In [44]:
arr[row_indices, col_indices] -= 10000

In [45]:
print("Changed array: ")
print(arr)

Changed array: 
[[11 12 13]
 [21 22 23]
 [31 32 33]
 [41 42 43]]


# Ndarray Boolean Indexing

In [46]:
a = np.array([[11,12],[21,22],[31,32]])
print(a)

[[11 12]
 [21 22]
 [31 32]]


In [53]:
filter =  (a > 15)
print(filter)
print()
print(type(filter))

[[False False]
 [ True  True]
 [ True  True]]

<class 'numpy.ndarray'>


In [63]:
print(filter.dtype)

bool


In [54]:
# We can now select or get those elements that meet the criteria of filter
print(a[filter])

[21 22 31 32]


In [57]:
# This can be done directly too
print(a[a > 15])

[21 22 31 32]


In [58]:
print(a[(a > 20) & (a < 30)])

[21 22]


In [60]:
print(a[a % 2 == 0])     # Even elements of the array

[12 22 32]


In [61]:
a[a % 2 == 0] += 100
print(a)

[[ 11 112]
 [ 21 122]
 [ 31 132]]


In [62]:
a[a % 2 == 0] -= 100
print(a)

[[11 12]
 [21 22]
 [31 32]]


# Ndarray Datatypes and Operations

In [67]:
a = np.array([11,12])   # Python assigns the data type
print(a.dtype)

int32


In [66]:
ad = np.array([11.0,12.0])   
print(ad.dtype)

float64


In [68]:
a = np.array([11,12], dtype=np.int64)   
print(a.dtype)

int64


In [69]:
a = np.array([11.1,12.7], dtype=np.int64)    # It forces the floats to drop the decimal, effectively doing the floor function
print(a.dtype)

int64


In [70]:
print(a)

[11 12]


In [71]:
a = np.array([11,12], dtype=np.float64)   
print(a.dtype)
print(a)

float64
[11. 12.]


Conclusion: Datatypes matter for ndarrays

## Arithmetic Array operations:

In [72]:
x = np.array([[111, 112],[121,122]], dtype=np.int)
y = np.array([[211.1, 211.1],[221.1,222.1]], dtype=np.float64)

In [73]:
print(x)
print()
print(y)

[[111 112]
 [121 122]]

[[211.1 211.1]
 [221.1 222.1]]


In [74]:
print(x + y)     # Result will be upcasted to avoid losing precision 
print()
print(np.add(x,y))

[[322.1 323.1]
 [342.1 344.1]]

[[322.1 323.1]
 [342.1 344.1]]


In [75]:
print(x - y)
print()
print(np.subtract(x,y))

[[-100.1  -99.1]
 [-100.1 -100.1]]

[[-100.1  -99.1]
 [-100.1 -100.1]]


In [76]:
print(x * y)
print()
print(np.multiply(x,y))

[[23432.1 23643.2]
 [26753.1 27096.2]]

[[23432.1 23643.2]
 [26753.1 27096.2]]


In [77]:
print(x / y)
print()
print(np.divide(x,y))

[[0.52581715 0.53055424]
 [0.54726368 0.54930212]]

[[0.52581715 0.53055424]
 [0.54726368 0.54930212]]


In [78]:
print(np.sqrt(x))
print()
print(np.sqrt(y))

[[10.53565375 10.58300524]
 [11.         11.04536102]]

[[14.52928078 14.52928078]
 [14.86943173 14.90301983]]


In [79]:
print(np.exp(x))
print()
print(np.exp(y))

[[1.60948707e+48 4.37503945e+48]
 [3.54513118e+52 9.63666567e+52]]

[[4.78151068e+91 4.78151068e+91]
 [1.05319781e+96 2.86288848e+96]]


# Statistical, Sorting, and Set Operations

In [1]:
import numpy as np

In [7]:
a = 10 * np.random.randn(2,5)
print(a)

[[ -5.47634462  -3.11771304 -15.87690285  -8.67098768  12.7572898 ]
 [ -9.70490456  -8.60274023  10.27757184  16.78131866   6.58209935]]


In [8]:
# Compute the mean for all elements
print(a.mean())

-0.5051313328226166


In [9]:
# Compute the means by row
print(a.mean(axis=1))

[-4.07693168  3.06666901]


In [10]:
# Compute the means by column
print(a.mean(axis=0))

[-7.59062459 -5.86022664 -2.7996655   4.05516549  9.66969458]


In [11]:
# Sum of all elements
print(a.sum())

-5.051313328226166


In [12]:
# Compute the median by row
print(np.median(a, axis=1))

[-5.47634462  6.58209935]


In [13]:
# Compute the median by column
print(np.median(a, axis=0))

[-7.59062459 -5.86022664 -2.7996655   4.05516549  9.66969458]


## Sorting:

In [14]:
arr = np.random.randn(10)
print(arr)

[ 0.45513373  0.90323114 -1.6992152   1.48364751  0.15184628 -1.7921735
 -0.25472317  1.62359524  1.2456805  -0.03514418]


In [15]:
s = np.array(arr)

In [16]:
s.sort()
print(s)

[-1.7921735  -1.6992152  -0.25472317 -0.03514418  0.15184628  0.45513373
  0.90323114  1.2456805   1.48364751  1.62359524]


In [17]:
print(arr)

[ 0.45513373  0.90323114 -1.6992152   1.48364751  0.15184628 -1.7921735
 -0.25472317  1.62359524  1.2456805  -0.03514418]


In [19]:
# Inplace Sorting - Sorting the original array
arr.sort()
print(arr)

[-1.7921735  -1.6992152  -0.25472317 -0.03514418  0.15184628  0.45513373
  0.90323114  1.2456805   1.48364751  1.62359524]


In [20]:
print(arr)

[-1.7921735  -1.6992152  -0.25472317 -0.03514418  0.15184628  0.45513373
  0.90323114  1.2456805   1.48364751  1.62359524]


## Finding Unique Elements:

In [21]:
array = np.array([1,2,1,4,2,1,4,2])
print(np.unique(array))

[1 2 4]


## Set operations with np.array data type:

In [22]:
s1 = np.array(['desk', 'chair', 'bulb'])
s2 = np.array(['lamp', 'bulb', 'chair'])

print(s1, s2)

['desk' 'chair' 'bulb'] ['lamp' 'bulb' 'chair']


In [24]:
print(np.intersect1d(s1, s2))

['bulb' 'chair']


In [26]:
print(np.union1d(s1, s2))

['bulb' 'chair' 'desk' 'lamp']


In [27]:
print(np.setdiff1d(s1, s2))            # Elements in s1 but not in s2

['desk']


In [28]:
print(np.in1d(s1, s2))               # Elements of s1 that are also there in s2

[False  True  True]


# Broadcasting

In [1]:
import numpy as np

In [2]:
start = np.zeros((4,3))
print(start)

[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]


In [10]:
add_row = np.array([1, 0, 2])
print(add_row)

[1 0 2]


In [11]:
y = start + add_row
print(y)

[[1. 0. 2.]
 [1. 0. 2.]
 [1. 0. 2.]
 [1. 0. 2.]]


In [12]:
add_col = np.array([[0, 1, 2, 3]])
add_col = add_col.T
print(add_col)

[[0]
 [1]
 [2]
 [3]]


In [13]:
z = start + add_col
print(z)

[[0. 0. 0.]
 [1. 1. 1.]
 [2. 2. 2.]
 [3. 3. 3.]]


In [14]:
add_scalar = np.array([1])

In [15]:
w = start + add_scalar
print(w)

[[1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]]


# Speed Test ndarray vs. list

In [16]:
from numpy  import arange
from timeit import Timer

size = 1000000
timeits = 1000

In [18]:
# Create the ndarray with values 0,1,2,.........,size-1
nd_array = arange(size)
print(type(nd_array))

<class 'numpy.ndarray'>


In [19]:
print(nd_array)

[     0      1      2 ... 999997 999998 999999]


In [20]:
# Timer expects the operation as a parameter

timer_numpy = Timer("nd_array.sum()","from __main__ import nd_array")

print("Time taken by numpy ndarray: %f seconds" %
     (timer_numpy.timeit(timeits)/timeits))

Time taken by numpy ndarray: 0.000569 seconds


In [21]:
# Create the ndarray with values 0,1,2,.........,size-1
a_list = list(range(size))
print(type(a_list))

<class 'list'>


In [23]:
timer_numpy = Timer("sum(a_list)","from __main__ import a_list")

print("Time taken by list: %f seconds" %
     (timer_numpy.timeit(timeits)/timeits))

Time taken by list: 0.032639 seconds
