# Introduction to Python Data Analytics
# Part 2: NumPy

Author: Kang P. Lee <br>
References:
- NumPy official website (http://www.numpy.org/) 
- Python Data Science Handbook by Jake VanderPlas (http://shop.oreilly.com/product/0636920034919.do)

## ▪ Import NumPy Library

In [1]:
import numpy as np

## ▪ Create NumPy Arrays from Python Lists

In [2]:
x = np.array([1, 2, 3, 4, 5])
x

array([1, 2, 3, 4, 5])

In [3]:
x = np.array([1, 2, 3, "4", "5"])
x

array(['1', '2', '3', '4', '5'], dtype='<U11')

Unlike Python lists, NumPy does not allow the elements of different types. If types do not match, NumPy will upcast if possible (here, integers are upcasted to strings)

In [4]:
x = np.array([1, 2, 3, "4", "5"], dtype=int)
x

array([1, 2, 3, 4, 5])

In [5]:
x = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
x

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

## ▪ Create NumPy Arrays from Scratch

These NumPy functions are very useful when you need to quickly generate an array of values that follow some rule. 

In [6]:
# zeros(shape, dtype=float, order='C')
# Return a new array of given shape and type, filled with all zeros

x = np.zeros(10, dtype=int)
x

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [7]:
# np.ones(shape, dtype=None, order='C')
# Return a new array of given shape and type, filled with all ones

x = np.ones((5, 5), dtype=float)
x

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [8]:
# np.full(shape, fill_value, dtype=None, order='C')
# Return a new array of given shape and type, filled with `fill_value`

x = np.full((5, 5), 3.14)
x

array([[3.14, 3.14, 3.14, 3.14, 3.14],
       [3.14, 3.14, 3.14, 3.14, 3.14],
       [3.14, 3.14, 3.14, 3.14, 3.14],
       [3.14, 3.14, 3.14, 3.14, 3.14],
       [3.14, 3.14, 3.14, 3.14, 3.14]])

In [9]:
# arange([start,] stop[, step,], dtype=None) 
# Return evenly spaced values within a given interval

x = np.arange(0, 10)
x

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [10]:
x = np.arange(10)                   # You can skip start if it is 0
x

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [11]:
x = np.arange(0, 10, 2)             # Step by 2
x

array([0, 2, 4, 6, 8])

In [12]:
# np.linspace(start, stop, num=50, endpoint=True, retstep=False, dtype=None)
# Return evenly spaced numbers over a specified interval

x = np.linspace(0, 1, 5)
x

array([0.  , 0.25, 0.5 , 0.75, 1.  ])

In [13]:
# normal(loc=0.0, scale=1.0, size=None)
# Return random samples from a normal (Gaussian) distribution with loc being the mean and scale being the standard deviation

x = np.random.normal(0, 1, (3, 3))
x

array([[-1.18816465, -0.06784476, -1.44480047],
       [ 0.08615785,  0.88044314,  1.00926238],
       [ 0.06655195,  0.775454  ,  0.70176136]])

In [14]:
# randint(low, high=None, size=None, dtype='l')
# Return random integers from `low` (inclusive) to `high` (exclusive).

x = np.random.randint(0, 10, (3, 3))
x

array([[1, 5, 0],
       [9, 4, 8],
       [3, 5, 4]])

In [15]:
# seed(seed=None)
# Seed the generator to make the random numbers reproducible

np.random.seed(seed=0)               
np.random.randint(0, 10, (3, 3))

array([[5, 0, 3],
       [3, 7, 9],
       [3, 5, 2]])

In [16]:
# choice(a, size=None, replace=True, p=None)
# Generate a random sample from a given 1-D array

np.random.choice(10, 3, replace=False)        # Without replacement

array([3, 5, 1])

## Exercises

Create a 3-by-5 array of random three-digit integers.

In [17]:
# Your answer here


Create an array of 10 random integers between 0 (inclusive) and 100 (inclusive) without replacement.

In [18]:
# Your answer here


## ▪ NumPy Array Attributes

In [19]:
x = np.random.randint(0, 100, (3, 3))
x

array([[39, 87, 46],
       [88, 81, 37],
       [25, 77, 72]])

In [20]:
x.ndim

2

In [21]:
x.shape

(3, 3)

In [22]:
x.size

9

In [23]:
x.dtype

dtype('int32')

## ▪ Array Indexing & Slicing

In [24]:
x = np.random.randint(0, 100, 10)
x

array([ 9, 20, 80, 69, 79, 47, 64, 82, 99, 88])

In [25]:
x[0]

9

In [26]:
x[-1]

88

In [27]:
x[3:-3]

array([69, 79, 47, 64])

In [28]:
x[3:-3:2]          # stepping by 2

array([69, 47])

In [29]:
x[::]

array([ 9, 20, 80, 69, 79, 47, 64, 82, 99, 88])

In [30]:
x[::-1]

array([88, 99, 82, 64, 47, 79, 69, 80, 20,  9])

In [31]:
x = np.random.randint(0, 100, (5, 5))
x

array([[49, 29, 19, 19, 14],
       [39, 32, 65,  9, 57],
       [32, 31, 74, 23, 35],
       [75, 55, 28, 34,  0],
       [ 0, 36, 53,  5, 38]])

In [32]:
x[0]

array([49, 29, 19, 19, 14])

In [33]:
x[0, 1]

29

In [34]:
x[:2, :3]

array([[49, 29, 19],
       [39, 32, 65]])

# Exercises

Suppose you have an array x like below:

In [35]:
x = np.random.randint(0, 1000, (10, 10))
x

array([[488, 756, 273, 335, 388, 617,  42, 442, 543, 888],
       [257, 321, 999, 937,  57, 291, 870, 119, 779, 430],
       [ 82,  91, 896, 398, 611, 565, 908, 633, 938,  84],
       [203, 324, 774, 964,  47, 639, 131, 972, 868, 180],
       [846, 143, 660, 227, 954, 791, 719, 909, 373, 853],
       [560, 305, 581, 169, 675, 448,  95, 197, 606, 256],
       [881, 690, 292, 930, 816, 861, 387, 610, 554, 973],
       [368, 999, 917, 201, 383, 512, 906, 370, 555, 954],
       [383,  23, 699, 130, 377,  98, 574, 931, 734, 123],
       [963, 594, 942, 739, 148, 209, 562, 411, 782,  41]])

Get the element on the third row and the fifth column of x.

In [36]:
# Your answer here


Extract the first three rows and the first five columns of x.

In [37]:
# Your answer here


## ▪ Array Concatenation and Splitting

In [38]:
# concatenate((a1, a2, ...), axis=0)
# Join a sequence of arrays along an existing axis

x = np.array([1, 2, 3])
y = np.array([4, 5, 6])
np.concatenate([x, y])                   # Concatenate one-dimensional arrays.

array([1, 2, 3, 4, 5, 6])

In [39]:
from IPython.display import Image
Image(url="https://i.stack.imgur.com/DL0iQ.jpg")

In Numpy and Pandas, axis 0 refers to the row axis while axis 1 the column axis

In [40]:
x = np.array([[1, 2, 3], [4, 5, 6]])
x

array([[1, 2, 3],
       [4, 5, 6]])

In [41]:
np.concatenate([x, x], axis=0)           # Concatenate two-dimensional arrays along the axis 0 (row axis).

array([[1, 2, 3],
       [4, 5, 6],
       [1, 2, 3],
       [4, 5, 6]])

In [42]:
np.concatenate([x, x], axis=1)           # Concatenate along the axis 1 (column axis).

array([[1, 2, 3, 1, 2, 3],
       [4, 5, 6, 4, 5, 6]])

In [43]:
# np.split(ary, indices_or_sections, axis=0)
# Split an array into multiple sub-arrays

x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
x1, x2, x3 = np.split(x, [3, 5])       # Split x with 3 and 5 being the split points.
print(x1, x2, x3)

[1 2 3] [4 5] [ 6  7  8  9 10]


Splitting is the opposite of concatenation.

## ▪ Computation on NumPy Arrays

In [44]:
x = np.array([1, 2, 3, 4, 5])
x + 5

array([ 6,  7,  8,  9, 10])

In [45]:
y = [1, 2, 3, 4, 5]
y + 5

TypeError: can only concatenate list (not "int") to list

Note that primitive Python lists do not allow computation on lists.

In [46]:
x = np.array([1, 2, 3, 4, 5])
x ** 2         # x to the power of 2

array([ 1,  4,  9, 16, 25], dtype=int32)

In [47]:
x = np.array([1, 2, 3, 4, 5])
-x

array([-1, -2, -3, -4, -5])

In [48]:
x = np.array([-1, 2, -3, 4, -5])
np.abs(x)     # absolute value

array([1, 2, 3, 4, 5])

In [49]:
x = [1, 2, 3]
np.exp(x)      # exponential (= e^x)

array([ 2.71828183,  7.3890561 , 20.08553692])

In [50]:
x = [1, 2, 3]
np.power(3, x) # power (= 3^x)

array([ 3,  9, 27], dtype=int32)

In [51]:
x = [1, 2, 4, 10]
np.log(x)      # ln(x)

array([0.        , 0.69314718, 1.38629436, 2.30258509])

In [52]:
x = [1, 2, 4, 10]
np.log2(x)     # log2(x)

array([0.        , 1.        , 2.        , 3.32192809])

In [53]:
x = [1, 2, 4, 10]
np.log10(x)    # log10(x)

array([0.        , 0.30103   , 0.60205999, 1.        ])

In [54]:
x = np.array([1, 2, 3])
y = np.array([1, 3, 5])
x + y

array([2, 5, 8])

In [55]:
x * y

array([ 1,  6, 15])

In [56]:
x = np.array([[1, 2], [3, 4]])
y = np.array([[5, 6], [7, 8]])
np.dot(x, y)

array([[19, 22],
       [43, 50]])

## ▪ Aggregations

In [57]:
x = np.random.rand(10)
x

array([0.92115761, 0.08311249, 0.27771856, 0.0093567 , 0.84234208,
       0.64717414, 0.84138612, 0.26473016, 0.39782075, 0.55282148])

In [58]:
x.sum()

4.837620104987658

In [59]:
x.mean()

0.48376201049876577

In [60]:
x.var()

0.0962657496833393

In [61]:
x.std()

0.3102672230245072

In [62]:
x.min()

0.009356704856532616

In [63]:
x.max()

0.9211576102371998

In [64]:
x.argmin()

3

In [65]:
x.argmax()

0

## ▪ Comparisons

In [66]:
x = np.array([1, 2, 3, 4, 5])
x

array([1, 2, 3, 4, 5])

In [67]:
x < 3                # Return an array of answers.

array([ True,  True, False, False, False])

In [68]:
x == 3               # Return an array of answers.

array([False, False,  True, False, False])

In [69]:
(x < 3) | (x == 3)   # Return an array of answers.

array([ True,  True,  True, False, False])

In [70]:
(x < 3) & (x == 3)   # Return an array of answers.

array([False, False, False, False, False])

## ▪ Sorting NumPy Arrays

In [71]:
x = np.random.choice(10, 5, replace=False)
x

array([4, 0, 1, 7, 6])

In [72]:
np.sort(x)

array([0, 1, 4, 6, 7])

In [73]:
x               # x hasn't changed.

array([4, 0, 1, 7, 6])

In [74]:
x.sort()
x               # x has changed.

array([0, 1, 4, 6, 7])

In [75]:
x = np.random.choice(10, 5, replace=False)
x

array([4, 1, 7, 6, 5])

In [76]:
np.sort(x)

array([1, 4, 5, 6, 7])

In [77]:
np.argsort(x)   # Return the indices of the sorted elements, instead of the elements.

array([1, 0, 4, 3, 2], dtype=int64)