# Numpy

### Import Libraries
Do this for every new Jupyter notebook

In [2]:
import numpy as np

### Creating Arrays
Creates a 1-dimensional array from a list

In [3]:
my_list1 = [1, 2, 3, 4]
my_array1 = np.array(my_list1)
my_array1

array([1, 2, 3, 4])

Creates a multi-dimensional array from a list of lists

In [4]:
my_list2 = [11, 22, 33, 44]
my_lists = [my_list1, my_list2]
my_array2 = np.array(my_lists)
my_array2

array([[ 1,  2,  3,  4],
       [11, 22, 33, 44]])

Creating from scratch (requires two sets of parentheses)

In [5]:
array_2d = (([1,2,3], [4,5,6]))
array_2d

([1, 2, 3], [4, 5, 6])

Describes the size & shape of the array (rows, columns)

In [6]:
my_array2.shape

(2, 4)

Describes the data type of the array

In [7]:
my_array2.dtype

dtype('int64')

### Special Case Array

Zeros array

In [8]:
np.zeros(5)

array([ 0.,  0.,  0.,  0.,  0.])

Resembles zeros array

In [9]:
np.empty(5)

array([ 0.,  0.,  0.,  0.,  0.])

Array with '1' values only

In [10]:
np.ones(5)

array([ 1.,  1.,  1.,  1.,  1.])

The "identity array"

In [11]:
np.eye(5)

array([[ 1.,  0.,  0.,  0.,  0.],
       [ 0.,  1.,  0.,  0.,  0.],
       [ 0.,  0.,  1.,  0.,  0.],
       [ 0.,  0.,  0.,  1.,  0.],
       [ 0.,  0.,  0.,  0.,  1.]])

Using a range

In [12]:
# np.arange([start,] stop[, step])
np.arange(5,10,2)

array([5, 7, 9])

### Using Arrays and Scalars
Note the double parentheses/brackets

In [13]:
arr1 = np.array([[1,2,3], [8,9,10]])
arr1

array([[ 1,  2,  3],
       [ 8,  9, 10]])

Adding arrays

In [14]:
arr1+arr1

array([[ 2,  4,  6],
       [16, 18, 20]])

Subtracting arrays

In [15]:
arr1-arr1

array([[0, 0, 0],
       [0, 0, 0]])

Multiplying arrays

In [16]:
arr1*arr1

array([[  1,   4,   9],
       [ 64,  81, 100]])

Dividing arrays

In [17]:
arr1/arr1

array([[ 1.,  1.,  1.],
       [ 1.,  1.,  1.]])

Arithmetic operations with scalars on arrays

In [18]:
1/arr1

array([[ 1.        ,  0.5       ,  0.33333333],
       [ 0.125     ,  0.11111111,  0.1       ]])

In [19]:
arr1**3

array([[   1,    8,   27],
       [ 512,  729, 1000]])

### Indexing Arrays
Arrays are sequenced - they are modified by slice in operations.

In [20]:
arr = np.arange(11)
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [21]:
slice_of_arr = arr[0:6]
slice_of_arr

array([0, 1, 2, 3, 4, 5])

Change the slice

In [22]:
slice_of_arr[:] = 99
slice_of_arr

array([99, 99, 99, 99, 99, 99])

Note that the changes also occur in our original array. Data is not copied, it's a view of the original array. This avoids memory problems.

In [23]:
arr

array([99, 99, 99, 99, 99, 99,  6,  7,  8,  9, 10])

To get a copy, you need to be explicit

In [24]:
arr_copy = arr.copy()
arr_copy

array([99, 99, 99, 99, 99, 99,  6,  7,  8,  9, 10])

Indexing 2D array - grab a row

In [25]:
arr_2d = np.array(([5,10,15],[20,25,30],[35,40,45]))
arr_2d

array([[ 5, 10, 15],
       [20, 25, 30],
       [35, 40, 45]])

In [26]:
# format follows arr_2d[row][col] or arr_2d[row,col]
arr_2d[1]

array([20, 25, 30])

Indexing 2D array - grab an individual element

In [27]:
arr_2d[1][0]

20

In [28]:
arr_2d[1,0]

20

Slicing 2D Array - grab a 2x2 slice from top right corner

In [29]:
arr_2d[:2,1:]

array([[10, 15],
       [25, 30]])

Fancy Indexing - allows a selection of rows in any order using embedded brackets *(notethatarr[2,1] returns12.0)

In [30]:
arr

array([99, 99, 99, 99, 99, 99,  6,  7,  8,  9, 10])

In [31]:
arr[[2,1]]

array([99, 99])

### Array Transposition
create an array

In [33]:
arr = np.arange(24).reshape((4,6))
arr

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23]])

transpose the array (this does NOT change the array in place)

In [34]:
arr.T

array([[ 0,  6, 12, 18],
       [ 1,  7, 13, 19],
       [ 2,  8, 14, 20],
       [ 3,  9, 15, 21],
       [ 4, 10, 16, 22],
       [ 5, 11, 17, 23]])

take the dot product of these two arrays

In [35]:
np.dot(arr.T,arr)

array([[504, 540, 576, 612, 648, 684],
       [540, 580, 620, 660, 700, 740],
       [576, 620, 664, 708, 752, 796],
       [612, 660, 708, 756, 804, 852],
       [648, 700, 752, 804, 856, 908],
       [684, 740, 796, 852, 908, 964]])

You can also transpose a 3D matrix:

In [37]:
arr3d = np.arange(18).reshape((3,3,2))
arr3d

array([[[ 0,  1],
        [ 2,  3],
        [ 4,  5]],

       [[ 6,  7],
        [ 8,  9],
        [10, 11]],

       [[12, 13],
        [14, 15],
        [16, 17]]])

In [39]:
arr3d.transpose((1,0,2))

array([[[ 0,  1],
        [ 6,  7],
        [12, 13]],

       [[ 2,  3],
        [ 8,  9],
        [14, 15]],

       [[ 4,  5],
        [10, 11],
        [16, 17]]])

If you need to get more specific use swapaxes:

In [40]:
arr = np.array([[1,2,3]])
arr

array([[1, 2, 3]])

In [41]:
arr.swapaxes(0,1)

array([[1],
       [2],
       [3]])

### Universal Array Functions

In [42]:
arr = np.arange(6)
arr

array([0, 1, 2, 3, 4, 5])

square-root function

In [43]:
np.sqrt(arr)

array([ 0.        ,  1.        ,  1.41421356,  1.73205081,  2.        ,
        2.23606798])

exponential(e^)

In [44]:
np.exp(arr)

array([   1.        ,    2.71828183,    7.3890561 ,   20.08553692,
         54.59815003,  148.4131591 ])

binary functions (require two arrays) - returns sum of matching values of two arrays

In [46]:
np.add(arr,arr)

array([ 0,  2,  4,  6,  8, 10])

In [49]:
np.maximum(arr, arr)

array([0, 1, 2, 3, 4, 5])

### Random Number Generator
random array (normal distribution)

In [50]:
np.random.randn(10)

array([ 0.82547382, -1.29211824, -0.249479  ,  0.45815376,  0.37382295,
        2.78148186, -2.05271726, -1.4268713 , -2.23000302,  0.99766174])

### Using numpy.where

In [51]:
A = np.array([1,2,3,4])
B = np.array([100,200,300,400])
condition = np.array([True,True,False,False]) # a Boolean array


The slow way: Using a list comprehension

Problems include speed issues and multi-dimensional array issues

In [52]:
answer1 = [(A_val if cond else B_val) for A_val,B_val,cond in zip(A,B,condition)] 
answer1

[1, 2, 300, 400]

The numpy.where way:

follows (test, if true, if false)

In [53]:
answer2 = np.where(condition,A,B)
answer2

array([  1,   2, 300, 400])

Using numpy.where for 2D manipulation:

Where array is less than zero, make that value zero, otherwise leave as is

In [54]:
arr = np.random.randn(5,5)
np.where(arr < 0,0,arr)

array([[ 0.        ,  0.        ,  1.35492234,  0.        ,  0.        ],
       [ 1.4114757 ,  1.56541121,  0.        ,  0.        ,  1.85461939],
       [ 1.74942247,  0.        ,  1.4914493 ,  0.14290347,  0.        ],
       [ 0.        ,  0.03262788,  0.30449429,  1.19093464,  0.        ],
       [ 0.16813837,  0.44571226,  0.        ,  0.06992259,  1.00139178]])

### Other Statistical Tools

In [55]:
arr = np.array([[1,2,3],[4,5,6],[7,8,9]])

regular sums

In [56]:
arr.sum()

45

sums along vertical axes

In [57]:
arr.sum(0)

array([12, 15, 18])

note there are no "median" or "mode" functions

In [58]:
arr.mean()

5.0

variance

In [59]:
arr.var()

6.666666666666667

standard deviation

In [60]:
arr.std()

2.5819888974716112

Any and all for processing Boolean arrays:

In [61]:
bool_arr = np.array([True,False,True])
bool_arr.any()

True

In [62]:
bool_arr.all()

False

Sort, Unique and In1d:

In [66]:
arr = np.random.randn(5,5)

sorts each row individually, in place

In [67]:
arr.sort()
arr

array([[-2.52074993, -1.52781619, -1.30982866, -0.13714473,  1.46188373],
       [-0.93385642, -0.75053214, -0.56621764,  0.19988637,  1.39841583],
       [-1.54996938, -1.13178123, -0.34647121,  0.23871409,  1.71366264],
       [-0.8911459 , -0.79536335,  0.10246147,  0.70557698,  1.64894846],
       [-1.90708592, -0.52675171, -0.02941399,  0.41909856,  0.84178561]])

sorts each item horizontally

In [68]:
np.apply_along_axis(sorted, 0, arr)

array([[-2.52074993, -1.52781619, -1.30982866, -0.13714473,  0.84178561],
       [-1.90708592, -1.13178123, -0.56621764,  0.19988637,  1.39841583],
       [-1.54996938, -0.79536335, -0.34647121,  0.23871409,  1.46188373],
       [-0.93385642, -0.75053214, -0.02941399,  0.41909856,  1.64894846],
       [-0.8911459 , -0.52675171,  0.10246147,  0.70557698,  1.71366264]])

unique

In [69]:
countries = np.array(['France', 'Germany', 'USA', 'Russia', 'USA', 'Mexico']) 
np.unique(countries)

array(['France', 'Germany', 'Mexico', 'Russia', 'USA'], 
      dtype='<U7')

In [70]:
np.in1d(['France','USA','Sweden'],countries)

array([ True,  True, False], dtype=bool)

### Array Input and Output
Insert an element into an array

In [71]:
a = np.array([[1, 1], [2, 2], [3, 3]])
a

array([[1, 1],
       [2, 2],
       [3, 3]])

inserts a 5 before index 1 and flattens the array (but not in-place!)

In [72]:
np.insert(a, 1, 5)

array([1, 5, 1, 2, 2, 3, 3])

Saving the array on disk in binary format (file extension .npy)

In [4]:
arr = np.arange(5)
np.save('my_array',arr)

Loading the array

In [6]:
np.load('my_array.npy')

array([0, 1, 2, 3, 4])

Saving multiple arrays into a zip (.npz) file

In [7]:
arr2 = np.arange(10) # second array
np.savez('two_arrays.npz',x=arr,y=arr2)

Loading multiple arrays

In [11]:
archive_array = np.load('two_arrays.npz')
archive_array['x'] # call first array

array([0, 1, 2, 3, 4])

In [12]:
archive_array['y'] # call second array

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

Saving text files

In [13]:
arr = np.array([[1,2,3],[4,5,6]]) 
np.savetxt('my_test_text.txt',arr,delimiter=',')

Loading text files

In [14]:
arr = np.loadtxt('my_test_text.txt',delimiter = ',')
arr

array([[ 1.,  2.,  3.],
       [ 4.,  5.,  6.]])