In [3]:
import sys
import numpy as np

#### BASIC NUMPY ARRAYS

In [4]:
np.array([1,2,3,4])

array([1, 2, 3, 4])

In [6]:
a = np.array([1,2,3,4])

In [8]:
b = np.array([0,.5,1, 1.5, 2])

In [9]:
a[0], a[1]

(1, 2)

In [10]:
a[0:]

array([1, 2, 3, 4])

In [11]:
a[1:3]

array([2, 3])

In [12]:
a[1:-1]

array([2, 3])

In [13]:
a[::2]

array([1, 3])

#### Multi-Indexing

In [15]:
b[0], b[2], b[-1]

(0.0, 1.0, 2.0)

In [17]:
b[[0,2,-1]] #Creating a nother numpy array with '[[]]'

array([0., 1., 2.])

#### Array  Types

In [18]:
a.dtype

dtype('int32')

In [19]:
b.dtype

dtype('float64')

##### Changing data type

In [21]:
np.array([1,2,3,4], dtype=np.float64)

array([1., 2., 3., 4.])

In [22]:
np.array([1,2,3,4], dtype=np.int8)

array([1, 2, 3, 4], dtype=int8)

In [26]:
c = np.array(['a', 'b', 'c'])

In [27]:
c.dtype

dtype('<U1')

In [23]:
d = np.array([{'a':1}, sys])

In [24]:
d.dtype

dtype('O')

#### Dimensions and Shapes

In [28]:
A = np.array([[1,2,3,], [4,5,6]])

In [29]:
A.shape #2 rows by 3 colums

(2, 3)

In [32]:
A.ndim #Dimensions 1 horiztion/1 vertical

2

In [33]:
A.size #Total Elements

6

In [34]:
B=np.array([
    [
        [12,11,10], 
        [9,8,7]
    ],
    [
        [6,5,4],
        [3,2,1]
    ]
    ])

In [35]:
B

array([[[12, 11, 10],
        [ 9,  8,  7]],

       [[ 6,  5,  4],
        [ 3,  2,  1]]])

In [36]:
B.shape #2x2x3

(2, 2, 3)

In [37]:
B.ndim

3

In [38]:
B.size #elements

12

In [48]:
C=np.array(
    [
        [
            [
                [
                    [ 
                        [12,11,10], [9,8,7]                     
                    ],
                    [ 
                        [6,5,4], [3,2,1] 
                    ]             
                ]  
            ]   
        ] 
    ]
)

In [50]:
C.shape #2x6x3

(1, 1, 1, 2, 2, 3)

In [51]:
C.ndim

6

In [52]:
C.size

12

#### Indexing and Slicing of Matrices

In [54]:
A = np.array([
#COL 0|1|2
    [1,2,3], # 0 ROW
    [4,5,6], # 1 ROW
    [7,8,9]  # 2 ROW
])

In [55]:
A[1] # Should return [4,5,6]

array([4, 5, 6])

In [56]:
A[1][0] #Should return 4

4

In [57]:
A[1,0] #Should return 4

4

In [59]:
A[0:2] #Should return entire not including ROW 2

array([[1, 2, 3],
       [4, 5, 6]])

In [60]:
A[:, :2] #Should return all columns excluding COL 2

array([[1, 2],
       [4, 5],
       [7, 8]])

<b>Note:</b> To access a COL with slicing (like above), you must handle the ROW. In this case, we did nothing with the row [':'] so that we could access the COL [..., :2]. 

In [61]:
A[:2, :2] #Should only return ROW 0,1 and COL 0,1 --> [1,2][4,5]

array([[1, 2],
       [4, 5]])

In [62]:
A[:2, 2:] #Should return ROW 0,1 and COL 2 --> [3][6]

array([[3],
       [6]])

In [63]:
A

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [65]:
A[1] = np.array([10,10,10]) #Should replace entire ROW 1 with '10' ---> 
#    [4,5,6] will become [10,10,10]

In [66]:
A

array([[ 1,  2,  3],
       [10, 10, 10],
       [ 7,  8,  9]])

In [69]:
A[2] = 90 #Should replace ROW 2 with all '90'

In [70]:
A

array([[ 1,  2,  3],
       [10, 10, 10],
       [90, 90, 90]])

#### Summary Statistics

In [72]:
a = np.array([1,2,3,4])

In [73]:
a.sum()

10

In [74]:
a.mean()

2.5

In [78]:
a.std()

1.118033988749895

In [76]:
a.var()

1.25

In [80]:
A = np.array([
    [1,2,3], 
    [4,5,6],
    [7,8,9]
])

In [81]:
A.sum()

45

In [82]:
A.mean()

5.0

In [83]:
A.std()

2.581988897471611

In [84]:
A.var()

6.666666666666667

<b>Axis Parameters</b>

In [85]:
A

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [88]:
A.sum(axis=0)

array([12, 15, 18])

In [89]:
A.sum(axis=1)

array([ 6, 15, 24])

In [90]:
A.mean(axis=0)

array([4., 5., 6.])

In [91]:
A.mean(axis=1)

array([2., 5., 8.])

In [92]:
A.std(axis=0) #COL std

array([2.44948974, 2.44948974, 2.44948974])

In [93]:
A.std(axis=1) #ROW std

array([0.81649658, 0.81649658, 0.81649658])

<b>Broadcasting and Vectorized Operations</b>

In [94]:
a = np.arange(4)

In [95]:
a

array([0, 1, 2, 3])

In [96]:
a+10 

array([10, 11, 12, 13])

In [97]:
a*10

array([ 0, 10, 20, 30])

In [98]:
a

array([0, 1, 2, 3])

In [99]:
a+=100

<b>NOTE:</b> The '+=' operator actually CHANGES the values in the array rather than creating a new array like in other vectorized operations.

In [100]:
a

array([100, 101, 102, 103])

In [102]:
l = [0,1,2,3]

In [103]:
[i*10 for i in l]

[0, 10, 20, 30]

Creates a new array based on 'l' with each element multiplied by 10

In [104]:
a = np.arange(4)

The 'np.arange(4)' function simply assigns elements 0-3 to 'a'

In [105]:
a

array([0, 1, 2, 3])

In [106]:
b = np.array([10,10,10,10])

In [108]:
b

array([10, 10, 10, 10])

In [107]:
a+b

array([10, 11, 12, 13])

In [109]:
a*b

array([ 0, 10, 20, 30])

<b> Boolean Arrays </b>

In [110]:
a = np.arange(4)

In [111]:
a

array([0, 1, 2, 3])

In [112]:
a[0], a[-1]

(0, 3)

Regular 'Pythonic' way of selecting data in an array

In [113]:
a[[0,-1]]

array([0, 3])

Multi-Index Selection

In [114]:
a[[True, False, False, True]]

array([0, 3])

This basically says: 
"I want to select each element" (assumes four elements in this example). 

Each <b>'True'/'False'</b> statement represents an element in the array.

However, it's not scalable if you have...like...a million records, right? 

In [115]:
a

array([0, 1, 2, 3])

In [117]:
a >=2

array([False, False,  True,  True])

This says: 
"Return 'True' for elements <u>equal or greater than</u> '2'"

Which returns: 
<br>array([0,1,2,3])
<br>array([F,F,T,T])
<br><br>
This is a 'query' method; You're looking up some data and give me all the elements that match this condition.

In [118]:
a[a >=2]

array([2, 3])

Returns an array with elemental values <u>equal or greater than</u> '2' compiled from 'a'.

In [120]:
a.mean()

1.5

In [121]:
a[a > a.mean()]

array([2, 3])

Returns elements in the array that are greater than the mean.

In [122]:
a[~(a > a.mean())]

array([0, 1])

Returns elements that...are NOT greater than the mean.

In [124]:
a[(a==0) | (a==1)]

array([0, 1])

In [125]:
a[(a <= 2) & (a % 2 == 0)]

array([0, 2])

In [126]:
A = np.random.randint(100, size=(3,3))

<b>NOTE:</b> <br>
<u>'random'</u> uses the random generator library. <br>
<u>'randint()'</u> seeds the generator with random numbers
<ol>Takes two parameters:
    <ol>1. The max number size - IE. Any num to 100</ol>
    <ol>2. <b>'size'</b> in numpy means array, which takes as many params as wanted
        <ol> Ex. This array will have '3' ROWS/ '3' COLS</ol></ol>
    </ol>

In [131]:
A

array([[92, 83, 92],
       [61, 21, 96],
       [ 5,  2, 23]])

In [134]:
#Boolean Array#

A[np.array(
    [
    [True, False, True],
    [False, True, False], 
    [True, False, True]
    ]
)]

array([92, 92, 21,  5, 23])

The boolean array allowed us to select specific values from the 'A' array and created a new array with the selected values. 

In [135]:
A > 30

array([[ True,  True,  True],
       [ True, False,  True],
       [False, False, False]])

In [136]:
A[A > 30]

array([92, 83, 92, 61, 96])

We can also populate new arrays with filtered data!

In [144]:
failingScores = A[A<=60]

In [145]:
failingScores

array([21,  5,  2, 23])

<b> Linear Algebra </b>

In [149]:
A = np.array([
    [1,2,3], 
    [4,5,6],
    [7,8,9]
])

In [150]:
B = np.array([
    [6,5],
    [4,3],
    [2,1]
])

In [151]:
A.dot(B)

array([[20, 14],
       [56, 41],
       [92, 68]])

In [152]:
A @ B

array([[20, 14],
       [56, 41],
       [92, 68]])

In [153]:
B.T

array([[6, 4, 2],
       [5, 3, 1]])

In [154]:
A

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [155]:
B.T @ A

array([[36, 48, 60],
       [24, 33, 42]])

<b> Size of objects in Memory </b>

In [158]:
sys.getsizeof(10**100)

72

In [156]:
#Numpy int sizes are much smaller than Python
np.dtype(int).itemsize

4

In [157]:
np.dtype(float).itemsize

8

<b> Lists are even larger </b>

In [159]:
#A one-element list in python
sys.getsizeof([1])

64

In [160]:
#An array of one element in numpy
np.array([1]).nbytes

4

<b> And performance is also important </b> 

In [161]:
l = list(range(1000))

In [162]:
a = np.arange(1000)

In [163]:
%time np.sum(a**2)

Wall time: 0 ns


332833500

In [164]:
%time sum([x**2 for x in l])

Wall time: 0 ns


332833500

<b> USEFUL NUMPY FUNCTIONS </b>

<button>random</button>

In [165]:
np.random.random(size=2)

array([0.51736874, 0.58407445])

In [166]:
np.random.normal(size=2)

array([-0.02318612, -0.61222951])

In [167]:
np.random.rand(2,4)

array([[2.62076148e-01, 9.15668564e-01, 3.91730780e-02, 9.31259519e-01],
       [8.86394948e-01, 1.30100262e-01, 5.27261760e-04, 5.18142705e-01]])

<button>arange</button>

In [168]:
np.arange(10)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [169]:
np.arange(5,10)

array([5, 6, 7, 8, 9])

In [170]:
np.arange(0,1,.1)

array([0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])

<button>reshape</button>

In [171]:
np.arange(10).reshape(2,5)

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

In [172]:
np.arange(10).reshape(5,2)

array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7],
       [8, 9]])

<button>linspace</button>

In [173]:
np.linspace(0,1,5)

array([0.  , 0.25, 0.5 , 0.75, 1.  ])

In [174]:
np.linspace(0,1,20)

array([0.        , 0.05263158, 0.10526316, 0.15789474, 0.21052632,
       0.26315789, 0.31578947, 0.36842105, 0.42105263, 0.47368421,
       0.52631579, 0.57894737, 0.63157895, 0.68421053, 0.73684211,
       0.78947368, 0.84210526, 0.89473684, 0.94736842, 1.        ])

In [176]:
np.linspace(0,1,20, False)

array([0.  , 0.05, 0.1 , 0.15, 0.2 , 0.25, 0.3 , 0.35, 0.4 , 0.45, 0.5 ,
       0.55, 0.6 , 0.65, 0.7 , 0.75, 0.8 , 0.85, 0.9 , 0.95])

<button>zeros, ones, empty</button>

In [177]:
np.zeros(5)

array([0., 0., 0., 0., 0.])

In [178]:
np.zeros((3,3))

array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.]])

In [180]:
np.zeros((3,3), dtype = np.int32)

array([[0, 0, 0],
       [0, 0, 0],
       [0, 0, 0]])

In [181]:
np.ones(5)

array([1., 1., 1., 1., 1.])

In [182]:
np.ones((3,3))

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [183]:
np.empty(5)

array([1., 1., 1., 1., 1.])

In [184]:
np.empty((2,2))

array([[0.25, 0.5 ],
       [0.75, 1.  ]])

<button>identity and eye</button>

In [185]:
np.identity(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [186]:
np.eye(3,3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [187]:
np.eye(8,4)

array([[1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [188]:
np.eye(8,4, k=1)

array([[0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [189]:
np.eye(8,4,k=-3)

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.],
       [0., 0., 0., 0.]])

In [190]:
'Hello World'[6]

'W'