# Introduction to NumPy

In [1]:
# problem calculate speeds given distances and time the non-numpy way

distances = [10,15,17,26,20]
times = [0.3,0.47,0.55,1.20,1.0]

In [2]:
# Calculate speeds with Python

speeds = []
for i in range(len(distances)):
    speeds.append(distances[i]/times[i])
speeds

[33.333333333333336,
 31.914893617021278,
 30.909090909090907,
 21.666666666666668,
 20.0]

In [3]:
#alt way is to use a list comprehension 

[d/t for d,t in zip(distances,times)]

[33.333333333333336,
 31.914893617021278,
 30.909090909090907,
 21.666666666666668,
 20.0]

In [6]:
# another example getting the total sum of the purchases made

product_quantities = [13,5,6,10,11]
prices = [1.2,6.5,1.0,4.8,5.0]

total = sum([q*p for q,p in zip(product_quantities,prices)])
total

157.1

In [5]:
#Using numpy to solve the above problems using numpy

import numpy as np

In [8]:
distances = np.array(distances)
times = np.array(times)
product_quantities = np.array(product_quantities)
prices = np.array(prices)

#checking the type of one of the new arrays

distances

type(distances)

numpy.ndarray

In [9]:
# example of a 2d array using numpy

A = np.array([[1,2],[3,4]])
A

array([[1, 2],
       [3, 4]])

In [10]:
# Create a length-10 integer array filled with zeros

np.zeros(10,dtype=int)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [11]:
# Create a 3x5 floating-point array filled with ones
np.ones(shape=(3,5), dtype=float)

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [13]:
# Create an array filled with a linear sequence 
# Starting at 0, ending at 20, stepping by 2

np.arange(start=0,stop=20,step=2)

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

In [14]:
# Create an array of 20 values evenly spaced between 0 and 1
np.linspace(0,1,20)

array([0.        , 0.05263158, 0.10526316, 0.15789474, 0.21052632,
       0.26315789, 0.31578947, 0.36842105, 0.42105263, 0.47368421,
       0.52631579, 0.57894737, 0.63157895, 0.68421053, 0.73684211,
       0.78947368, 0.84210526, 0.89473684, 0.94736842, 1.        ])

In [15]:
# examine the attributes of numpy arrays

A = np.ones(shape=(3,4),dtype=float)
A

array([[1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.]])

In [18]:
# to check the dimensions of the array
print("2d Array A dimensions are {}".format(A.ndim))

# to check the shape of the array
print("The shape of 2d Array is {}".format(A.shape))

# to check the total number of elements in the array
print("The total number of elements in the array is {}".format(A.size))

2d Array A dimensions are 2
The shape of 2d Array is (3, 4)
The total number of elements in the array is 12


In [19]:
# using numpy's vectorized operations to calculate speed given distance and time numpy arrays
speeds = distances/times
speeds

array([33.33333333, 31.91489362, 30.90909091, 21.66666667, 20.        ])

In [20]:
# using numpy's vectorized operations to calculate the sum of product_quantities and prices

values = product_quantities * prices
total = values.sum()
print("The values of product quantities times prices are {} and the sum is {}".format(values,total))

The values of product quantities times prices are [15.6 32.5  6.  48.  55. ] and the sum is 157.1


In [21]:
# basic math operations on an numpy array

x = np.arange(start = 0, stop=20, step= 2)
x

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

In [22]:
x + 1

array([ 1,  3,  5,  7,  9, 11, 13, 15, 17, 19])

In [23]:
x * 2

array([ 0,  4,  8, 12, 16, 20, 24, 28, 32, 36])

In [24]:
x/2

array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])

In [25]:
# getting the sin of each element
np.sin(x)

array([ 0.        ,  0.90929743, -0.7568025 , -0.2794155 ,  0.98935825,
       -0.54402111, -0.53657292,  0.99060736, -0.28790332, -0.75098725])

In [26]:
# getting the exponent of each element
np.exp(x)

array([1.00000000e+00, 7.38905610e+00, 5.45981500e+01, 4.03428793e+02,
       2.98095799e+03, 2.20264658e+04, 1.62754791e+05, 1.20260428e+06,
       8.88611052e+06, 6.56599691e+07])

In [27]:
# getting the natural log of x + 1
np.log(x+1)

array([0.        , 1.09861229, 1.60943791, 1.94591015, 2.19722458,
       2.39789527, 2.56494936, 2.7080502 , 2.83321334, 2.94443898])

In [28]:
# the sqrt of x
np.sqrt(x)

array([0.        , 1.41421356, 2.        , 2.44948974, 2.82842712,
       3.16227766, 3.46410162, 3.74165739, 4.        , 4.24264069])

In [29]:
one_dim = np.linspace(-0.5,0.6,12)
one_dim

array([-0.5, -0.4, -0.3, -0.2, -0.1,  0. ,  0.1,  0.2,  0.3,  0.4,  0.5,
        0.6])

In [30]:
one_dim[0]

-0.5

In [31]:
one_dim[0] = 1
one_dim

array([ 1. , -0.4, -0.3, -0.2, -0.1,  0. ,  0.1,  0.2,  0.3,  0.4,  0.5,
        0.6])

In [32]:
#creating a 2d array 

x2 = np.array([[2,3],[3,6],[7,8]])
x2

array([[2, 3],
       [3, 6],
       [7, 8]])

In [33]:
x2[0,0] = 1
x2

array([[1, 3],
       [3, 6],
       [7, 8]])

In [34]:
# reshaping arrays using numpy

#convert a 1d array to a 4 x 3 array

one_dim.reshape(4,3)

array([[ 1. , -0.4, -0.3],
       [-0.2, -0.1,  0. ],
       [ 0.1,  0.2,  0.3],
       [ 0.4,  0.5,  0.6]])

In [35]:
# to flatten a 2d array
x2.flatten()

array([1, 3, 3, 6, 7, 8])

# using numpy for simulations

In [36]:
# using randint to simulate a coin flip

np.random.randint(low = 0, high = 2, size = 1)

array([1])

In [37]:
# getting the sum of 10 coin flips

experiment = np.random.randint(0,2,size=10)
print("Array {} , sum {}".format(experiment,experiment.sum()))

Array [1 0 0 1 1 0 0 0 1 1] , sum 5


In [38]:
# creating a simulation to determine distribution of the number of heads when throwing 10 coins at a time. 
coin_matrix = np.random.randint(low = 0, high =2, size = (10000,10))
print(coin_matrix[:5,:])

[[0 1 1 0 1 0 1 0 0 0]
 [0 0 1 0 0 1 0 0 1 0]
 [1 0 1 1 1 0 1 1 0 0]
 [1 1 1 0 0 1 1 1 1 1]
 [0 1 0 0 1 0 0 0 1 0]]


In [41]:
# sum all the rows to get the total number of heads as well as other descriptive statistics
counts = coin_matrix.sum(axis = 1)
print(counts[:25])
print(counts.mean())
print(np.median(counts))
print(counts.min(),counts.max())
print(counts.std())

[4 3 6 8 3 4 5 6 5 6 7 3 4 4 7 5 5 7 4 4 4 5 7 4 5]
5.0035
5.0
0 10
1.5867853509533039


In [43]:
# using bincount to get the distribution of the number of heads as well as percentages

np.bincount(counts)

unique_numbers = np.arange(0,11)
observed_times = np.bincount(counts)
print("=====================\n")

for n,count in zip(unique_numbers,observed_times):
    print("{} heads observed {} times ({:0.1f}%)".format(n,count,100*count/2000))


0 heads observed 6 times (0.3%)
1 heads observed 94 times (4.7%)
2 heads observed 443 times (22.1%)
3 heads observed 1192 times (59.6%)
4 heads observed 2048 times (102.4%)
5 heads observed 2431 times (121.5%)
6 heads observed 2068 times (103.4%)
7 heads observed 1150 times (57.5%)
8 heads observed 454 times (22.7%)
9 heads observed 98 times (4.9%)
10 heads observed 16 times (0.8%)
