7.2 Creating arrays from Existing Data

In [1]:
import numpy as np
numbers = np.array([2, 3, 5, 7, 11])
type(numbers)


numpy.ndarray

In [2]:
numbers

array([ 2,  3,  5,  7, 11])

In [3]:
np.array([[1, 2, 3], [4, 5, 6]])

array([[1, 2, 3],
       [4, 5, 6]])

7.2 Self Check

In [4]:
# 2) Create a one-dimensional array from a list comprehension that produces the even integers from 2 through 20.

np.array([x for x in range(2, 21, 2)])

array([ 2,  4,  6,  8, 10, 12, 14, 16, 18, 20])

In [5]:
# 3) Create a 2-by-5 array containing the even integers from 2 through 10 in the first row and the odd integers
# from 1 through 9 in the second row.

np.array([[2, 4, 6, 8, 10], [1, 3, 5, 7, 9]])

array([[ 2,  4,  6,  8, 10],
       [ 1,  3,  5,  7,  9]])

7.3 array Attributes

In [6]:
integers = np.array([[1, 2, 3], [4, 5, 6]])

integers

array([[1, 2, 3],
       [4, 5, 6]])

In [7]:
floats = np.array([0.0, 0.1, 0.2, 0.3, 0.4])

floats

array([0. , 0.1, 0.2, 0.3, 0.4])

In [8]:
# The array function determines an array's element type from its argument's elements

integers.dtype

dtype('int32')

In [9]:
floats.dtype

dtype('float64')

In [10]:
# Determining an arrays dimensions

integers.ndim

2

In [11]:
floats.ndim

1

In [12]:
integers.shape

(2, 3)

In [13]:
floats.shape

(5,)

In [14]:
# Determining an array's Number of elements (using size) and number of bytes required to store each element (itemsize)

integers.size

6

In [15]:
integers.itemsize

4

In [16]:
floats.size

5

In [17]:
floats.itemsize

8

In [18]:
# Iterating through a multidimensional array's elements

for row in integers:
    for column in row:
        print(column, end=' ')
    print()

1 2 3 
4 5 6 


In [19]:
for i in integers.flat:
    print(i, end=' ')

1 2 3 4 5 6 

7.3 Self Check

In [20]:
# 2) For the two-dimensional array in the previous section's Self Check, display the number of dimensions and shape of 
# the array.

a = np.array([[2, 4, 6, 8, 10], [1, 3, 5, 7, 9]])

a.ndim

2

In [21]:
a.shape

(2, 5)

7.4 Filling arrays with Specific Values

In [22]:
# For an integer, each function returns a one-dimensional array with the specified number of elements:

np.zeros(5)

array([0., 0., 0., 0., 0.])

In [23]:
# You can specify the array's element type with the zeros and ones function's dtype keyword argument:

np.ones((2, 4), dtype=int)

array([[1, 1, 1, 1],
       [1, 1, 1, 1]])

7.5 Creating arrays from Ranges

In [24]:
# Arange function 

np.arange(5)

array([0, 1, 2, 3, 4])

In [25]:
np.arange(5, 10)

array([5, 6, 7, 8, 9])

In [26]:
np.arange(10, 1, -2)

array([10,  8,  6,  4,  2])

In [27]:
# Floating point ranges with linspace

np.linspace(0.0, 1.0, num=5)

array([0.  , 0.25, 0.5 , 0.75, 1.  ])

In [28]:
#Reshaping an array

np.arange(1, 21).reshape(4, 5)

array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10],
       [11, 12, 13, 14, 15],
       [16, 17, 18, 19, 20]])

In [29]:
# Displaying large arrays, NumPy drops the middle rows, columns, or both from the output:

np.arange(1, 100001).reshape(4, 25000)

array([[     1,      2,      3, ...,  24998,  24999,  25000],
       [ 25001,  25002,  25003, ...,  49998,  49999,  50000],
       [ 50001,  50002,  50003, ...,  74998,  74999,  75000],
       [ 75001,  75002,  75003, ...,  99998,  99999, 100000]])

In [30]:
np.arange(1, 100001).reshape(100, 1000)

array([[     1,      2,      3, ...,    998,    999,   1000],
       [  1001,   1002,   1003, ...,   1998,   1999,   2000],
       [  2001,   2002,   2003, ...,   2998,   2999,   3000],
       ...,
       [ 97001,  97002,  97003, ...,  97998,  97999,  98000],
       [ 98001,  98002,  98003, ...,  98998,  98999,  99000],
       [ 99001,  99002,  99003, ...,  99998,  99999, 100000]])

7.5 Self Check

In [31]:
# 2) Use NumPy function arange to create an array of 20 even integers from 2 through 40, then reshape the result into
# a 4-by-5 array.

np.arange(2, 41, 2).reshape(4, 5)

array([[ 2,  4,  6,  8, 10],
       [12, 14, 16, 18, 20],
       [22, 24, 26, 28, 30],
       [32, 34, 36, 38, 40]])

7.6 List vs. array Performance: Introducing %timeit

In [32]:
# Timing the creation of an array containing results of 6,000,000 Die Rolls

%timeit rolls_array = np.random.randint(1, 7, 6_000_000)

60.8 ms ± 244 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [33]:
#60,000,000 and 600,000,000 Die Rolls

%timeit rolls_array = np.random.randint(1, 7, 60_000_000)

594 ms ± 6.93 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [34]:
%timeit rolls_array = np.random.randint(1, 7, 600_000_000)

6.04 s ± 55.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


7.6 Self Check

In [35]:
# 1) Use %timeit to compare the execution time of the following two statements. The first uses a list comprehension to create
# a list of the integers from 0 to 9,999,999, then totals them with the built-in sum function. The second statement does the
# same thing using an array and its sum method.

# sum([x for x in range(10_000_000)])
# np.arange(10_000_000).sum.()

%timeit sum([x for x in range(10_000_000)])

886 ms ± 3.27 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [36]:
%timeit np.arange(10_000_000).sum()

16.2 ms ± 372 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


7.7 array Operators

In [37]:
# Element-wise operations are applied to every element, each returns a new array containing the result.

import numpy as np

numbers = np.arange(1, 6)
numbers

array([1, 2, 3, 4, 5])

In [38]:
numbers * 2

array([ 2,  4,  6,  8, 10])

In [39]:
numbers ** 3

array([  1,   8,  27,  64, 125], dtype=int32)

In [40]:
numbers #unchanged by arithmetic operators

array([1, 2, 3, 4, 5])

In [41]:
# Augmented assignments modify every element in the left operand

numbers += 10

numbers

array([11, 12, 13, 14, 15])

In [42]:
# Arithmetic between arrays

numbers2 = np.linspace(1.1, 5.5, 5)

numbers2

array([1.1, 2.2, 3.3, 4.4, 5.5])

In [43]:
numbers * numbers2

array([12.1, 26.4, 42.9, 61.6, 82.5])

In [44]:
# Comparing Rays

numbers

array([11, 12, 13, 14, 15])

In [45]:
numbers >= 13

array([False, False,  True,  True,  True])

In [46]:
numbers2

array([1.1, 2.2, 3.3, 4.4, 5.5])

In [47]:
numbers2 < numbers

array([ True,  True,  True,  True,  True])

In [48]:
numbers == numbers2

array([False, False, False, False, False])

In [49]:
numbers == numbers

array([ True,  True,  True,  True,  True])

7.7 Self Check

In [50]:
# 2) Create an array of the values from 1 through 5, then use broadcasting to square each value.

np.arange(1, 6) ** 2

array([ 1,  4,  9, 16, 25], dtype=int32)

7.8 NumPy Calculation Methods

In [51]:
# Calculate sum, min, max, mean, standard deviation, and variance

grades = np.array([[87, 96, 70], [100, 87, 90],
                   [94, 77, 90], [100, 81, 82]])

grades

array([[ 87,  96,  70],
       [100,  87,  90],
       [ 94,  77,  90],
       [100,  81,  82]])

In [52]:
grades.sum()

1054

In [53]:
grades.min()

70

In [54]:
grades.mean()

87.83333333333333

In [55]:
grades.std()

8.792357792739987

In [56]:
grades.var()

77.30555555555556

In [57]:
# Calculations by row

grades.mean(axis=0)

array([95.25, 85.25, 83.  ])

In [58]:
grades.mean(axis=1)

array([84.33333333, 92.33333333, 87.        , 87.66666667])

7.8 Self Check

In [59]:
# 2) Use NumPy random-number generation to create an array of twelve random grades in the range 60
# through 100, then reshape the result into a 3-by-4 array. Calculate the average of all the grades, 
# the average of the grades in each column and the averages of the grades in each row.

grades = np.random.randint(60, 101, 12).reshape(3, 4)

grades

array([[98, 74, 77, 97],
       [62, 99, 96, 72],
       [96, 60, 60, 61]])

In [60]:
grades.mean()

79.33333333333333

In [61]:
grades.mean(axis=0)

array([85.33333333, 77.66666667, 77.66666667, 76.66666667])

In [62]:
grades.mean(axis=1)

array([86.5 , 82.25, 69.25])

7.9 Universal Functions

In [63]:
# sqrt

numbers = np.array([1, 4, 9, 16, 25, 36])

np.sqrt(numbers)

array([1., 2., 3., 4., 5., 6.])

In [64]:
numbers2 = np.arange(1, 7) * 10

numbers2

array([10, 20, 30, 40, 50, 60])

In [65]:
np.add(numbers, numbers2)

array([11, 24, 39, 56, 75, 96])

In [66]:
# multiply

np.multiply(numbers2, 5)

array([ 50, 100, 150, 200, 250, 300])

In [67]:
numbers3 = numbers2.reshape(2, 3)

numbers3

array([[10, 20, 30],
       [40, 50, 60]])

In [68]:
numbers4 = np.array([2, 4, 6])

np.multiply(numbers3, numbers4)

array([[ 20,  80, 180],
       [ 80, 200, 360]])

7.9 Self Check

In [69]:
# 2) Create an array of the values from 1 through 5, then use the power universal function and broadcasting to cube 
# each value.

numbers = np.arange(1, 6)

np.power(numbers, 3)

array([  1,   8,  27,  64, 125], dtype=int32)

7.10 Indexing and Slicing

In [70]:
grades = np.array([[87, 96, 70], [100, 87, 90],
                   [94, 77, 90], [100, 81, 82]])

grades

array([[ 87,  96,  70],
       [100,  87,  90],
       [ 94,  77,  90],
       [100,  81,  82]])

In [71]:
grades[0, 1] # row 0, column 1

96

In [72]:
# row 1 
grades[1]

array([100,  87,  90])

In [73]:
# sequential rows 0 through 2
grades[0:2]

array([[ 87,  96,  70],
       [100,  87,  90]])

In [74]:
# nonsequential rows
grades[[1, 3]]

array([[100,  87,  90],
       [100,  81,  82]])

In [75]:
# grades in first column
grades[:, 0]

array([ 87, 100,  94, 100])

In [76]:
# consecutive columns

grades[:, 1:3]

array([[96, 70],
       [87, 90],
       [77, 90],
       [81, 82]])

In [77]:
# specific columns
grades[:, [0, 2]]

array([[ 87,  70],
       [100,  90],
       [ 94,  90],
       [100,  82]])

7.10 Self Check

In [78]:
# 1) Given the following array:
#    array([[1, 2, 3, 4, 5],
#           [6, 7, 8, 9, 10],
#           [11, 12, 13, 14, 15]])
#
# write statements to perform the following tasks:
#
# a) Select the second row.
# b) Select the first and third rows.
# c) Select the middle three columns.

a = np.arange(1, 16).reshape(3, 5)

a

array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10],
       [11, 12, 13, 14, 15]])

In [79]:
a[1]

array([ 6,  7,  8,  9, 10])

In [80]:
a[[0, 2]]

array([[ 1,  2,  3,  4,  5],
       [11, 12, 13, 14, 15]])

In [81]:
a[:, 1:4]

array([[ 2,  3,  4],
       [ 7,  8,  9],
       [12, 13, 14]])

7.11 Views: Shallow Copies

In [82]:
# Create an array and view of the array

numbers = np.arange(1, 6)

numbers

array([1, 2, 3, 4, 5])

In [83]:
numbers2 = numbers.view()

numbers2

array([1, 2, 3, 4, 5])

In [84]:
# Use built-in id function to see that numbers and numbers2 are different objects

id(numbers)

2299078218928

In [85]:
id(numbers2)

2299099216944

In [86]:
# To show that numbers2 views the same data as numbers

numbers[1] *= 10

numbers2

array([ 1, 20,  3,  4,  5])

In [87]:
numbers

array([ 1, 20,  3,  4,  5])

In [88]:
numbers2[1] /= 10

numbers

array([1, 2, 3, 4, 5])

In [89]:
numbers2

array([1, 2, 3, 4, 5])

In [90]:
# Slice views

numbers2 = numbers[0:3]

numbers2

array([1, 2, 3])

In [91]:
id(numbers)

2299078218928

In [92]:
id(numbers2)

2299099311920

In [93]:
# numbers2 is a view of only the first three numbers, so attempting to access the other numbers 
# produces an IndexError

numbers2[3]

IndexError: index 3 is out of bounds for axis 0 with size 3

In [None]:
numbers[1] *= 20

numbers

In [94]:
numbers

array([1, 2, 3, 4, 5])

In [95]:
numbers2

array([1, 2, 3])

7.12 Deep Copies

In [102]:
# array method copy creates a new array object with a deep copy of the original array object's data

numbers = np.arange(1, 6)

numbers

array([1, 2, 3, 4, 5])

In [103]:
numbers2 = numbers.copy()

numbers2

array([1, 2, 3, 4, 5])

In [104]:
numbers[1] *= 10

numbers

array([ 1, 20,  3,  4,  5])

In [105]:
numbers2

array([1, 2, 3, 4, 5])

7.13 Reshaping and Transposing

In [106]:
# reshape vs. resize
# reshape is a view (shallow copy)

grades = np.array([[87, 96, 70], [100, 87, 90]])

grades

array([[ 87,  96,  70],
       [100,  87,  90]])

In [107]:
grades.reshape(1, 6)

array([[ 87,  96,  70, 100,  87,  90]])

In [108]:
grades

array([[ 87,  96,  70],
       [100,  87,  90]])

In [110]:
# resize modifies the original array's shape

grades.resize(1, 6)

grades

array([[ 87,  96,  70, 100,  87,  90]])

In [111]:
# flatten vs. ravel
# flatten deep copies the original array's data

grades = np.array([[87, 96, 70], [100, 87, 90]])

grades

array([[ 87,  96,  70],
       [100,  87,  90]])

In [112]:
flattened = grades.flatten()

flattened

array([ 87,  96,  70, 100,  87,  90])

In [113]:
grades

array([[ 87,  96,  70],
       [100,  87,  90]])

In [114]:
# flattened and grade DO NOT SHARE data

flattened[0] = 100

flattened

array([100,  96,  70, 100,  87,  90])

In [115]:
grades

array([[ 87,  96,  70],
       [100,  87,  90]])

In [116]:
# ravel produces a view of the original array, which shares the grade's array data

raveled = grades.ravel()

raveled

array([ 87,  96,  70, 100,  87,  90])

In [117]:
grades

array([[ 87,  96,  70],
       [100,  87,  90]])

In [118]:
raveled[0] = 100

raveled

array([100,  96,  70, 100,  87,  90])

In [119]:
grades

array([[100,  96,  70],
       [100,  87,  90]])

In [120]:
# Transposing rows and columns

grades.T

array([[100, 100],
       [ 96,  87],
       [ 70,  90]])

In [121]:
grades

array([[100,  96,  70],
       [100,  87,  90]])

In [122]:
# Horizontal and vertical stacking

grades2 = np.array([[94, 77, 90], [100, 81, 82]])

np.hstack((grades, grades2)) # combines grades and grades2

array([[100,  96,  70,  94,  77,  90],
       [100,  87,  90, 100,  81,  82]])

In [123]:
np.vstack((grades, grades2))

array([[100,  96,  70],
       [100,  87,  90],
       [ 94,  77,  90],
       [100,  81,  82]])

7.13 Self Check

In [124]:
# 1) Given a 2-by-3 array:
#
#     array([[1, 2, 3],
#            [4, 5, 6]])
#
# use hstack and vstack to produce the following array:
#
#     array([[1, 2, 3, 1, 2, 3],
#            [4, 5, 6, 4, 5, 6],
#            [1, 2, 3, 1, 2, 3],
#            [4, 5, 6, 4, 5, 6]])

a = np.arange(1, 7).reshape(2, 3)

a

array([[1, 2, 3],
       [4, 5, 6]])

In [127]:
a = np.hstack((a, a))

a = np.vstack((a, a))

a

array([[1, 2, 3, 1, 2, 3],
       [4, 5, 6, 4, 5, 6],
       [1, 2, 3, 1, 2, 3],
       [4, 5, 6, 4, 5, 6]])

7.14 Intro to Data Science: pandas Series and DataFrames

In [129]:
# Creating a series with Default indices

import pandas as pd

grades = pd.Series([87, 100, 94])

grades # to display a series

0     87
1    100
2     94
dtype: int64

In [130]:
# Creating series with all elements having the same value

pd.Series(98.6, range(3))

0    98.6
1    98.6
2    98.6
dtype: float64

In [131]:
#Accessing a Series' elements

grades[0]

87

In [132]:
# Producing Descriptive Statistics for a Series

grades.count()

3

In [133]:
grades.mean()

93.66666666666667

In [134]:
grades.min()

87

In [135]:
grades.max()

100

In [136]:
grades.std()

6.506407098647712

In [137]:
grades.describe()

count      3.000000
mean      93.666667
std        6.506407
min       87.000000
25%       90.500000
50%       94.000000
75%       97.000000
max      100.000000
dtype: float64

In [138]:
# Creating a Series with Custom Indices

grades = pd.Series([87, 100, 94], index=['Wally', 'Eva', 'Sam'])

grades

Wally     87
Eva      100
Sam       94
dtype: int64

In [139]:
# Dictionary Initializers

grades = pd.Series({'Wally': 87, 'Eva':100, 'Sam': 94})

grades

Wally     87
Eva      100
Sam       94
dtype: int64

In [141]:
# Accessing Elements of a Series Via Custom Indices

grades['Eva']

100

In [142]:
grades.Wally

87

In [143]:
grades.dtype

dtype('int64')

In [144]:
grades.values

array([ 87, 100,  94], dtype=int64)

In [145]:
# Creating a Series of Strings

hardware = pd.Series(['Hammer', 'Saw', 'Wrench'])

hardware

0    Hammer
1       Saw
2    Wrench
dtype: object

In [146]:
# string method contains - determine whether the value of each element contains a lowercase 'a'

hardware.str.contains('a')

0     True
1     True
2    False
dtype: bool

In [147]:
# Produce a new Series containing the uppercase versions of each element in hardware:

hardware.str.upper()

0    HAMMER
1       SAW
2    WRENCH
dtype: object

7.14.1 Self Check

In [149]:
# 1) Use the NumPy's random-number generation to create and array of five random integers that represent summertime
# temperatures in the range 60-100, then perform the following tasks:
#
# a) Convert the array into the Series names temperatures and display it.
# b) Determine the lowest, highest, and average temperatures.
# c) Produce descriptive statistics for the Series.

import numpy as np

import pandas as pd

temps = np.random.randint(60, 101, 5)

temperatures = pd.Series(temps)

temperatures

0    86
1    83
2    82
3    65
4    82
dtype: int32

In [150]:
temperatures.min()

65

In [153]:
temperatures.max()

86

In [155]:
temperatures.mean()

79.6

In [156]:
temperatures.describe()

count     5.000000
mean     79.600000
std       8.324662
min      65.000000
25%      82.000000
50%      82.000000
75%      83.000000
max      86.000000
dtype: float64