In [2]:
import numpy as np

# Basic Array functions

In [19]:
pythonList = [1,2,3,4]
# creating a numpy array from a python array
npArray = np.array(pythonList)
npArray

array([1, 2, 3, 4])

In [8]:
pythonLists = [pythonList, [4,5,6,7]]
npMatrixArray = np.array(pythonLists)
npMatrixArray

array([[1, 2, 3, 4],
       [4, 5, 6, 7]])

In [13]:
npArray.shape

(4,)

In [10]:
npMatrixArray.shape

(2, 4)

In [14]:
npMatrixArray.dtype

dtype('int32')

In [15]:
# creates a decimal array with 0 values
np.zeros(5)

array([0., 0., 0., 0., 0.])

In [16]:
np.ones([5,5])

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [17]:
# identity matrix
np.eye(5)

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

In [18]:
# start at 5, stop at 50, increment by 2
np.arange(5,50,2)

array([ 5,  7,  9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37,
       39, 41, 43, 45, 47, 49])

# Mathmatical Operations

In [21]:
npMatrixArray

array([[1, 2, 3, 4],
       [4, 5, 6, 7]])

In [22]:
npMatrixArray*npMatrixArray

array([[ 1,  4,  9, 16],
       [16, 25, 36, 49]])

In [23]:
# reciprocal
1 / npMatrixArray

array([[1.        , 0.5       , 0.33333333, 0.25      ],
       [0.25      , 0.2       , 0.16666667, 0.14285714]])

# Indexing

In [48]:
npArranged = np.arange(0,10)
npArranged

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [28]:
npArranged[8]

8

In [35]:
npArranged[1:3]

array([1, 2])

In [31]:
npArranged[:]

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [34]:
npArranged[:3]

array([0, 1, 2])

In [37]:
#start at position 1, stop before 7th, and skip every 2
npArranged[1:7:2]

array([1, 3, 5])

In [44]:
# start 5 from the last, since length is 10, index at 5. Stop before 8th.
npArranged[-5:8]

array([5, 6, 7])

In [47]:
# start 3 from the end at 7th position, stop before the 2, increement -1.
npArranged[-3:2:-1]

array([7, 6, 5, 4, 3])

In [71]:
arr3d = np.arange(1,25)
arr3d = arr3d.reshape(2,3,4)
arr3d

array([[[ 1,  2,  3,  4],
        [ 5,  6,  7,  8],
        [ 9, 10, 11, 12]],

       [[13, 14, 15, 16],
        [17, 18, 19, 20],
        [21, 22, 23, 24]]])

In [72]:
arr3d.shape

(2, 3, 4)

In [76]:
# ellipsis - makes a 2d array for the first column
arr2d = arr3d[...,0]
arr2d

array([[ 1,  5,  9],
       [13, 17, 21]])

In [77]:
arr2d.shape

(2, 3)

In [81]:
arr3d[:,np.newaxis,:,:]

array([[[[ 1,  2,  3,  4],
         [ 5,  6,  7,  8],
         [ 9, 10, 11, 12]]],


       [[[13, 14, 15, 16],
         [17, 18, 19, 20],
         [21, 22, 23, 24]]]])

In [82]:
arr3d[:,np.newaxis,:,:].shape

(2, 1, 3, 4)

In [103]:
# boolean indexing
arr2d = np.array([[1.,2.],[np.nan, 2.],[np.nan,np.nan]])
arr2d

array([[ 1.,  2.],
       [nan,  2.],
       [nan, nan]])

In [104]:
arr2d[~np.isnan(arr2d)]

array([1., 2., 2.])

In [106]:
# set null values to 0
arr2d[np.isnan(arr2d)] = 0
arr2d

array([[1., 2.],
       [0., 2.],
       [0., 0.]])

In [109]:
arr2d = np.array([[1,2,3],[4,5,6],[7,8,9]])
arr2d

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [129]:
# first row
arr2d[:2]

array([[1, 2, 3],
       [4, 5, 6]])

In [131]:
arr_length = arr2d.shape[1]
for i in range(arr_length):
    arr2d[i] = i
arr2d

array([[0, 0, 0],
       [1, 1, 1],
       [2, 2, 2]])

In [132]:
arr2d[[0,2]]

array([[0, 0, 0],
       [2, 2, 2]])

# Array Transposing

In [133]:
# reshape takes a single array and makes it into a 10x5 (10 rows, 5 columns)
arr = np.arange(50).reshape(10,5)
arr

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24],
       [25, 26, 27, 28, 29],
       [30, 31, 32, 33, 34],
       [35, 36, 37, 38, 39],
       [40, 41, 42, 43, 44],
       [45, 46, 47, 48, 49]])

In [134]:
arr.T

array([[ 0,  5, 10, 15, 20, 25, 30, 35, 40, 45],
       [ 1,  6, 11, 16, 21, 26, 31, 36, 41, 46],
       [ 2,  7, 12, 17, 22, 27, 32, 37, 42, 47],
       [ 3,  8, 13, 18, 23, 28, 33, 38, 43, 48],
       [ 4,  9, 14, 19, 24, 29, 34, 39, 44, 49]])

In [135]:
np.dot(arr.T,arr)

array([[7125, 7350, 7575, 7800, 8025],
       [7350, 7585, 7820, 8055, 8290],
       [7575, 7820, 8065, 8310, 8555],
       [7800, 8055, 8310, 8565, 8820],
       [8025, 8290, 8555, 8820, 9085]])

In [141]:
# For 3D matrix - 
arr3d = np.arange(24).reshape((2,3,4))
arr3d

array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]],

       [[12, 13, 14, 15],
        [16, 17, 18, 19],
        [20, 21, 22, 23]]])

In [142]:
arr3d.transpose((1,0,2))


array([[[ 0,  1,  2,  3],
        [12, 13, 14, 15]],

       [[ 4,  5,  6,  7],
        [16, 17, 18, 19]],

       [[ 8,  9, 10, 11],
        [20, 21, 22, 23]]])

In [143]:
# If you need to get more specific use swapaxes
arr = np.array([[1,2,3]])
arr

array([[1, 2, 3]])

In [145]:
arr.swapaxes(0,1)

array([[1],
       [2],
       [3]])

# universal array functions

for all the available universal array funtions: http://docs.scipy.org/doc/numpy/reference/ufuncs.html#available-ufuncs

In [147]:
arr = np.arange(11)
np.sqrt(arr)

array([0.        , 1.        , 1.41421356, 1.73205081, 2.        ,
       2.23606798, 2.44948974, 2.64575131, 2.82842712, 3.        ,
       3.16227766])

In [149]:
arr = np.arange(11)
np.exp(arr)

array([1.00000000e+00, 2.71828183e+00, 7.38905610e+00, 2.00855369e+01,
       5.45981500e+01, 1.48413159e+02, 4.03428793e+02, 1.09663316e+03,
       2.98095799e+03, 8.10308393e+03, 2.20264658e+04])

In [150]:
A = np.random.randn(10)
A

array([ 0.92333244,  1.27129392, -0.6644622 ,  0.48263651,  0.0894356 ,
       -1.42430705, -2.07866522,  0.01187386,  0.43111008,  0.79640391])

In [152]:
B = np.random.randn(10)
B

array([ 0.00443014, -0.57047027,  0.46457077,  0.1311211 ,  1.48879185,
       -0.17237503,  0.07321806,  0.00912691,  0.02589464,  0.47114248])

In [153]:
np.add(A,B)

array([ 0.92776258,  0.70082365, -0.19989143,  0.61375761,  1.57822745,
       -1.59668208, -2.00544716,  0.02100077,  0.45700472,  1.26754639])

In [154]:
np.maximum(A,B)

array([ 0.92333244,  1.27129392,  0.46457077,  0.48263651,  1.48879185,
       -0.17237503,  0.07321806,  0.01187386,  0.43111008,  0.79640391])

# Array Processing

In [158]:
#list comprehension
A = np.array([1,2,3,4])
B= np.array([100,200,300,400])
#Now a boolean array
condition = np.array([True,True,False,False])
print(A)
print(B)
print(condition)

[1 2 3 4]
[100 200 300 400]
[ True  True False False]


In [159]:
#Using a list comprehension
answer = [(A_val if cond else B_val) for A_val,B_val,cond in zip(A,B,condition)]
answer

[1, 2, 300, 400]

In [161]:
#alternative - array lengths should match
answer2 = np.where(condition,A,B)
answer2

array([  1,   2, 300, 400])

In [163]:
# using random values for this example
from numpy.random import randn
arr = randn(5,5)
arr

array([[-0.18897263, -0.8299726 ,  0.93118502,  0.64495355,  0.39224054],
       [ 1.58776486, -0.02785221,  0.27654452, -1.4206013 , -0.53845575],
       [ 1.15677341,  0.75871433,  0.97320504,  0.77031319, -1.89177345],
       [-0.17512611,  0.35320794, -0.64659239,  0.21771649, -0.55881547],
       [-1.40340707, -0.1051511 , -0.21327674,  0.95040232,  0.43326854]])

In [164]:
# useful for cleaning data, removing null values
np.where(arr<0,0,arr)

array([[0.        , 0.        , 0.93118502, 0.64495355, 0.39224054],
       [1.58776486, 0.        , 0.27654452, 0.        , 0.        ],
       [1.15677341, 0.75871433, 0.97320504, 0.77031319, 0.        ],
       [0.        , 0.35320794, 0.        , 0.21771649, 0.        ],
       [0.        , 0.        , 0.        , 0.95040232, 0.43326854]])

In [165]:
arr.mean()

0.057851719089811876

In [166]:
arr.std()

0.8501167701658003

In [167]:
arr.var()

0.7226985229171322

In [168]:
bool_arr = np.array([True,False,True])
# .any will return true if a true value exists
bool_arr.any()

True

In [169]:
# .all will return true if all values are true
bool_arr.all()

False

In [172]:
# sort
arr.sort()
arr
np.where(arr<0,0,arr)

array([[0.        , 0.        , 0.39224054, 0.64495355, 0.93118502],
       [0.        , 0.        , 0.        , 0.27654452, 1.58776486],
       [0.        , 0.75871433, 0.77031319, 0.97320504, 1.15677341],
       [0.        , 0.        , 0.        , 0.21771649, 0.35320794],
       [0.        , 0.        , 0.        , 0.43326854, 0.95040232]])

In [175]:
countries = np.array(['us', 'rus', 'china', 'usa', 'rus', 'rus'])
countries

array(['us', 'rus', 'china', 'usa', 'rus', 'rus'], dtype='<U5')

In [176]:
np.unique(countries)

array(['china', 'rus', 'us', 'usa'], dtype='<U5')

In [178]:
np.in1d(['us', 'France', 'US'],countries)

array([ True, False, False])

# Array Output and Input

In [186]:
# saves arr on disk in binary format with a .npy extension
arr1 = np.arange(1,10)
arr2 = np.arange(10,20)
np.save('my_array',arr1)

In [188]:
# load arr
np.load('my_array.npy')

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [189]:
# save as zipfile
np.savez('zipArr.npz', x=arr1, y=arr2)

In [192]:
archive_array = np.load('zipArr.npz')
archive_array['x']

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [193]:
archive_array['y']

array([10, 11, 12, 13, 14, 15, 16, 17, 18, 19])

In [194]:
#Now saving and loading text files
arr = np.array([[1,2,3],[4,5,6]])

In [195]:
# separates the elements in array by a comma in the text file
np.savetxt('my_test_text.txt',arr,delimiter=',')

In [196]:
arr = np.loadtxt('my_test_text.txt',delimiter = ',')

In [197]:
arr

array([[1., 2., 3.],
       [4., 5., 6.]])