# How to create a Rank 1 numpy array

In [1]:
import numpy as np
an_array = np.array([3, 33, 333])
print(type(an_array))

<class 'numpy.ndarray'>


In [2]:
print(an_array.shape)

(3,)


In [3]:
print(an_array[0], an_array[1], an_array[2])

3 33 333


In [4]:
# ndarrays are mutable
an_array[0] = 888
print(an_array)

[888  33 333]


# How to create a Rank 2 numpy array

In [5]:
another = np.array([[11, 12, 13], [21, 22, 23]]) # (2, 3)
print(another)
print(another.shape)
print(another[0, 0], another[0, 1])

[[11 12 13]
 [21 22 23]]
(2, 3)
11 12


# Many ways to create numpy arrays

In [6]:
import numpy as np

# create 2x2 array of zeros
ex1 = np.zeros((2,2))
print(ex1)

[[0. 0.]
 [0. 0.]]


In [7]:
# reate 2x2 array filled with 9.0
ex2 = np.full((2,2), 9.0)
print(ex2)

[[9. 9.]
 [9. 9.]]


In [8]:
# create a 2x2 matrix with diagonal 1s and the other 0
ex3 = np.eye(2,2)
print(ex3)

[[1. 0.]
 [0. 1.]]


In [9]:
# create an array of ones
ex4 = np.ones((1,2))
print(ex4)

[[1. 1.]]


In [11]:
# ex4 is actual rank 2: 1x2 array
print(ex4.shape)

# which means we need to use two indexes to access an element
print()
print(ex4[0,1])

(1, 2)

1.0


In [12]:
# create an array of random floats btween 0 and 1
ex5 = np.random.random((2,2))
print(ex5)

[[0.05727305 0.07688889]
 [0.67108721 0.44794706]]


# Array Indexing

## Slice indexing:

In [13]:
import numpy as np

# Rank 2 array of shape (3, 4)
an_array = np.array([
    [11, 12, 13, 14],
    [21, 22, 23, 24],
    [31, 32, 33, 34]    
])

print(an_array)

[[11 12 13 14]
 [21 22 23 24]
 [31 32 33 34]]


In [15]:
a_slice = an_array[:2, 1:3]
a_new_copy_array = np.array(an_array[:2, 1:3]) # change in new array won't affect original array
print(a_slice)

[[12 13]
 [22 23]]


In [16]:
# When you modify a slice, you actually modify the underlying array.
print('Before:', an_array[0,1])
a_slice[0,0] = 1000
print('After:', an_array[0,1])

Before: 12
After: 1000


## Use both integer indexing & slice indexing
To create a diffent shaped matrices

In [23]:
# Create a Rank 2 array of shape (3, 4)
an_array = np.array([
    [11, 12, 13, 14],
    [21, 22, 23, 24],
    [31, 32, 33, 34]    
])
print(an_array)

[[11 12 13 14]
 [21 22 23 24]
 [31 32 33 34]]


In [24]:
# Using both integer indexing and slicing generates an array of lower rank
row_rank1 = an_array[1, :] # rank 1 view
print(row_rank1, row_rank1.shape)

[21 22 23 24] (4,)


In [25]:
# Slicing alone: generates an array of the same rank as the an_array
row_rank2 = an_array[1:2, :] # rank 2 view
print(row_rank2, row_rank2.shape)

[[21 22 23 24]] (1, 4)


In [26]:
# We can do the same thing for columns of an array

col_rank1 = an_array[:, 1]
col_rank2 = an_array[:, 1:2]
print(col_rank1, col_rank1.shape)
print(col_rank2, col_rank2.shape)

[12 22 32] (3,)
[[12]
 [22]
 [32]] (3, 1)


## Array indexing for changing elements

In [27]:
# Create a new array
an_array = np.array([
    [11, 12, 13],
    [21, 22, 23],
    [31, 32, 33],
    [41, 42, 43]
])
print('Original Array:')
print(an_array)

Original Array:
[[11 12 13]
 [21 22 23]
 [31 32 33]
 [41 42 43]]


In [28]:
# Create an array of indices
col_indices = np.array([0, 1, 2, 0])
print('\nCol indices picked:', col_indices)

row_indices = np.arange(4)
print('\nRows indices picked:', row_indices)


Col indices picked: [0 1 2 0]

Rows indices picked: [0 1 2 3]


In [30]:
# Examine the parings of row_indices and col_indices
for row, col in zip(row_indices, col_indices):
    print(row, ',', col)

0 , 0
1 , 1
2 , 2
3 , 0


In [31]:
# Select one element for each row
print('Values in the array at those indices:', an_array[row_indices, col_indices])

Values in the array at those indices: [11 22 33 41]


In [32]:
# Change one elmenent from each row using the indices selected
an_array[row_indices, col_indices] += 10000

print('\nChanged Array:')
print(an_array)


Changed Array:
[[10011    12    13]
 [   21 10022    23]
 [   31    32 10033]
 [10041    42    43]]


# Boolean Indexing

## Array Indexing for changing elements:

In [34]:
# Create a 3x2 array
an_array = np.array([
    [11, 12],
    [21, 22],
    [31, 32]
])
print(an_array)

[[11 12]
 [21 22]
 [31 32]]


In [36]:
# Create a filter which will be boolean values for whether each element meets the criteria
filter = (an_array > 15)
filter

array([[False, False],
       [ True,  True],
       [ True,  True]])

In [38]:
# We can now select just those elements which meet that cirteria
print(an_array[filter])

[21 22 31 32]


In [42]:
# for short, we could just used the approach below without the need for filter
an_array[an_array > 15]

array([21, 22, 31, 32])

In [43]:
an_array[(an_array > 20) & (an_array < 30)]

array([21, 22])

In [44]:
an_array[an_array % 2 == 0]

array([12, 22, 32])

In [45]:
# Change element based on conditions
an_array[an_array % 2 == 0] += 100
print(an_array)

[[ 11 112]
 [ 21 122]
 [ 31 132]]


# Datatypes and Array Operations

## Datatypes:

In [46]:
ex1 = np.array([11, 12])
print(ex1.dtype)

int32


In [47]:
ex2 = np.array([11.0, 12.0])
print(ex2.dtype)

float64


In [48]:
# we can tell Python to assign data type
ex3 = np.array([11, 12], dtype=np.int64)
print(ex3.dtype)

int64


In [51]:
# Force floats into integers
ex4 = np.array([11.1, 12.7], dtype=np.int64)
print(ex4.dtype)
print(ex4)

int64
[11 12]


In [52]:
# Force integers to floats
ex5 = np.array([11, 12], dtype=np.float)
print(ex5.dtype)
print(ex5)

float64
[11. 12.]


## Arithmetic Array Operations:

In [53]:
x = np.array([
    [111, 112],
    [121, 122]
], dtype=np.int)
y = np.array([
    [211.1, 212.1],
    [221.1, 222.2]
], dtype=np.float64)

print(x)
print()
print(y)

[[111 112]
 [121 122]]

[[211.1 212.1]
 [221.1 222.2]]


In [54]:
# Add
print(x+y)
print()
print(np.add(x, y))

[[322.1 324.1]
 [342.1 344.2]]

[[322.1 324.1]
 [342.1 344.2]]


In [55]:
# Subtract
print(x - y)
print()
print(np.subtract(x, y))

[[-100.1 -100.1]
 [-100.1 -100.2]]

[[-100.1 -100.1]
 [-100.1 -100.2]]


In [56]:
# Multiply
print(x*y)
print()
print(np.multiply(x,y))

[[23432.1 23755.2]
 [26753.1 27108.4]]

[[23432.1 23755.2]
 [26753.1 27108.4]]


In [57]:
# Divide 
print(x/y)
print()
print(np.divide(x,y))

[[0.52581715 0.52805281]
 [0.54726368 0.54905491]]

[[0.52581715 0.52805281]
 [0.54726368 0.54905491]]


In [59]:
# Square root
print(np.sqrt(x))

[[10.53565375 10.58300524]
 [11.         11.04536102]]


In [60]:
# Exponent (e ** x)
print(np.exp(x))

[[1.60948707e+48 4.37503945e+48]
 [3.54513118e+52 9.63666567e+52]]


# Statistical Methods, Sorting, and Set Operations:

## Statistical Methods

In [62]:
# Setup a random 2x4 matrix
arr = 10 * np.random.randn(2,5)
print(arr)

[[  8.26254209   5.42186726   2.8787701   11.90992945  16.64214646]
 [  6.13715939   4.41542127   2.65045697 -11.98688235 -13.37884694]]


In [64]:
# Mean from all elements
print(arr.mean())

3.2952563701470696


In [65]:
# Means by row
print(arr.mean(axis = 1))

[ 9.02305107 -2.43253833]


In [66]:
# Means  by column
print(arr.mean(axis = 0))

[ 7.19985074  4.91864426  2.76461353 -0.03847645  1.63164976]


In [67]:
# Sum all the mean
print(arr.sum())

32.952563701470694


In [68]:
# Medians by row
print(np.median(arr, axis=1))

[8.26254209 2.65045697]


## Sorting

In [77]:
# Create a 10 element array of randoms 
unsorted = np.array(10 * np.random.randn(10), dtype=np.int)
print(unsorted)

[ -7 -12  -1  -5   5 -13  -1  29  -2  -6]


In [78]:
# Create a copy and sort
sorted = np.array(unsorted)
sorted.sort()

print(sorted)
print()
print(unsorted)

[-13 -12  -7  -6  -5  -2  -1  -1   5  29]

[ -7 -12  -1  -5   5 -13  -1  29  -2  -6]


In [80]:
## Finding Unique elements
array = np.array([1,2,1,5,2,4,4,5,3,1,2])
print(np.unique(array))

[1 2 3 4 5]


## Set Operations with np.array data type:

In [83]:
s1 = np.array(['desk', 'chair', 'bulb'])
s2 = np.array(['lamp', 'bulb', 'chair'])

print(s1, s2)

['desk' 'chair' 'bulb'] ['lamp' 'bulb' 'chair']


In [84]:
print(np.intersect1d(s1,s2))

['bulb' 'chair']


In [85]:
print(np.union1d(s1,s2))

['bulb' 'chair' 'desk' 'lamp']


In [86]:
# elements from s1 that are not in s2
print(np.setdiff1d(s1,s2))

['desk']


In [88]:
# which element of s1 is also in s2
print(np.in1d(s1,s2))
print(s1[np.in1d(s1,s2)])

[False  True  True]
['chair' 'bulb']


# Broadcasting:

In [89]:
import numpy as np

start = np.zeros((4,3))
print(start)

[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]


In [90]:
# create a rank 1 ndarray with 3 values
add_rows = np.array([1, 0, 2])
print(add_rows)

[1 0 2]


In [91]:
print(start + add_rows)

[[1. 0. 2.]
 [1. 0. 2.]
 [1. 0. 2.]
 [1. 0. 2.]]


In [92]:
# create an ndarray which is 1x4 to broadcast across columns
add_cols = np.array([[0,1,2,3]])
add_cols = add_cols.T

print(add_cols)

[[0]
 [1]
 [2]
 [3]]


In [93]:
# add to each column of 'start' using broadcasting
print(start + add_cols)

[[0. 0. 0.]
 [1. 1. 1.]
 [2. 2. 2.]
 [3. 3. 3.]]


In [94]:
# broadcast in both dimentions
add_scalar = np.array([1])
print(start + add_scalar)

[[1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]]


# Speedtest: ndarrays vs lists

In [95]:
from numpy import arange
from timeit import Timer

size = 1000000
timeits = 1000

In [96]:
# create the ndarray with values 0,1,2...,size-1
nd_array = arange(size)
print(type(nd_array))

<class 'numpy.ndarray'>


In [97]:
# timer expects the operation as a parameter,
# here we pass nd_array.sum()
timer_numpy = Timer('nd_array.sum()', 'from __main__ import nd_array')

print('Time taken by numpy ndarray: %f seconds' % (timer_numpy.timeit(timeits)/timeits))

Time taken by numpy ndarray: 0.000663 seconds


In [98]:
# create the list with value 0,1,2..,size-1
a_list = list(range(size))
print(type(a_list))

<class 'list'>


In [99]:
# timer expects the operation as a parameter,
# here we pass sum(a_list)
timer_list = Timer('sum(a_list)', 'from __main__ import a_list')

print('Time taken by numpy ndarray: %f seconds' % (timer_list.timeit(timeits)/timeits))

Time taken by numpy ndarray: 0.046062 seconds
