# NumPy

numpy is python's package for doing math that is more advanced than +-*/

This includes special functions like cosine, exponential, sqrt, ...

On top of this we can use numpy to generate samples from many types of random variables

numpy also has a powerful data type to define vectors, matrices, and tensors

With these data types numpy also allows us to do linear algebra - matrix multiplication and matrix-vector solutions

In [15]:
# the first step of using numpy is to tell python to use it
import numpy as np

In [16]:
print(np.cos(np.pi))
print(np.sqrt(1.21))
print(np.log(np.exp(5.2)))

-1.0
1.1
5.2


In [17]:
# we can create numpy arrays by converting lists
# this is a vector
vec = np.array([1,2,3])
print(vec)
# we can create matrices by converting lists of lists
mat = np.array([[1,2,1],[4,5,9],[1,8,9]])
print('')
print(mat)
print('')
print(mat.T)

[1 2 3]

[[1 2 1]
 [4 5 9]
 [1 8 9]]

[[1 4 1]
 [2 5 8]
 [1 9 9]]


In [18]:
# there are lots of other ways to create numpy arrays
vec2 = np.arange(0,15)
print(vec2)
print('')
vec3 = np.arange(3,21,6)
print(vec3)


[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14]

[ 3  9 15]


In [19]:

vec4 = np.linspace(0,5,10)
print(vec4)
print('')
print(vec4.reshape(5,2))
vec4_reshaped = vec4.reshape(5,2)
print(vec4_reshaped)
print(vec4)

[0.         0.55555556 1.11111111 1.66666667 2.22222222 2.77777778
 3.33333333 3.88888889 4.44444444 5.        ]

[[0.         0.55555556]
 [1.11111111 1.66666667]
 [2.22222222 2.77777778]
 [3.33333333 3.88888889]
 [4.44444444 5.        ]]
[[0.         0.55555556]
 [1.11111111 1.66666667]
 [2.22222222 2.77777778]
 [3.33333333 3.88888889]
 [4.44444444 5.        ]]
[0.         0.55555556 1.11111111 1.66666667 2.22222222 2.77777778
 3.33333333 3.88888889 4.44444444 5.        ]


In [20]:
mat2 = np.zeros([5,3])
print(mat2)
mat3 = np.ones((3,5))
print('')
print(mat3)
mat4 = np.eye(5)
print('')
print(mat4)

[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]

[[1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]]

[[1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 1.]]


In [21]:
# we can +-*/ arrays together if they're the right size
vec5 = np.arange(1,6)
vec6 = np.arange(3,8)
print(vec5)
print(vec6)
print(vec5+vec6)
print(vec5*vec6)
print(1/vec5)
print(np.sqrt(vec6))

[1 2 3 4 5]
[3 4 5 6 7]
[ 4  6  8 10 12]
[ 3  8 15 24 35]
[1.         0.5        0.33333333 0.25       0.2       ]
[1.73205081 2.         2.23606798 2.44948974 2.64575131]


In [22]:
# we can do matrix multiplication
print(mat)
print('')
print(vec)
print()
product = np.matmul(mat,vec)
print(product)

[[1 2 1]
 [4 5 9]
 [1 8 9]]

[1 2 3]

[ 8 41 44]


In [23]:
print(np.linalg.solve(mat,product))
print('')
print(np.linalg.inv(mat))

[1. 2. 3.]

[[ 0.5         0.18518519 -0.24074074]
 [ 0.5        -0.14814815  0.09259259]
 [-0.5         0.11111111  0.05555556]]


In [24]:
# we can find the unique values in an array
vec7 = np.array(['blue','red','orange','purple','purple','orange','Red',6])
print(vec7)
print(np.unique(vec7))

['blue' 'red' 'orange' 'purple' 'purple' 'orange' 'Red' '6']
['6' 'Red' 'blue' 'orange' 'purple' 'red']


In [25]:
# we can also use numpy to generate samples of a random variable
rand_mat = np.random.rand(5,5) # uniform random variable
print(rand_mat)
rand_mat2 = np.random.randn(10,5) # standard normal random variable
print('')
print(rand_mat2)

[[0.64861108 0.23385747 0.57303187 0.70819923 0.78492511]
 [0.44359969 0.36170667 0.83773448 0.98564671 0.75986176]
 [0.15847906 0.477543   0.98934954 0.06461887 0.17425507]
 [0.81302801 0.38060956 0.0747944  0.91470987 0.41776917]
 [0.60691725 0.65545265 0.01085822 0.9209951  0.0899208 ]]

[[-3.55702317  2.12140359  0.70379071  0.73836319 -0.37078681]
 [-0.47206693 -0.58162134  0.9569869  -0.49577682  0.1022353 ]
 [-0.16856326  0.81201137  0.77629135 -2.07010476 -0.30531091]
 [ 0.07129996  0.6248255  -0.85902862 -1.8831032   0.00388922]
 [-0.09893375 -2.00467035 -0.0378239   0.92710741  0.10823116]
 [ 0.1318712   0.64830528 -0.31670936  0.16661694 -1.45385978]
 [ 0.42632643  0.44154802  0.61334278 -0.9025564   0.23036323]
 [ 0.1217577  -0.7056504   0.62577094  0.74957902 -0.11499762]
 [ 0.78418689 -1.62882116  0.90893389 -1.66416216  1.25697547]
 [-1.24687363  0.48292205  0.37585276  0.18455466  0.21320537]]


In [26]:
# we can also use numpy for statistical tools on arrays
print(np.mean(rand_mat))
print(np.std(rand_mat2))

0.5234589860148116
1.0111047680171688


In [27]:
print(np.min(rand_mat))
print(np.max(rand_mat2))

0.010858223961757174
2.1214035877333357


In [28]:
# break here for next video!

In [29]:
# how do we access entries in a numpy vector
rand_vec = np.random.randn(19)
print(rand_vec)
print(rand_vec[6])

[ 0.77732029  1.44905503  0.69637575 -0.25063898 -1.32642624  0.44327151
  1.37999873  0.88152016 -0.50354578  0.58297045 -1.85427334 -1.75974606
 -2.22477832 -1.26717065 -0.80012003  0.62659914  0.29401351 -1.29365003
  0.94291771]
1.3799987272226917


In [30]:
# we can access multiple entries at once using :
print(rand_vec[4:9])

[-1.32642624  0.44327151  1.37999873  0.88152016 -0.50354578]


In [31]:
# we can also access multiple non-consecutive entries using np.arange
print(np.arange(0,15,3))
print(rand_vec[np.arange(0,15,3)])

[ 0  3  6  9 12]
[ 0.77732029 -0.25063898  1.37999873  0.58297045 -2.22477832]


In [32]:
# what about matrices
print(rand_mat)
print(rand_mat[1][2])
print(rand_mat[1,2])


[[0.64861108 0.23385747 0.57303187 0.70819923 0.78492511]
 [0.44359969 0.36170667 0.83773448 0.98564671 0.75986176]
 [0.15847906 0.477543   0.98934954 0.06461887 0.17425507]
 [0.81302801 0.38060956 0.0747944  0.91470987 0.41776917]
 [0.60691725 0.65545265 0.01085822 0.9209951  0.0899208 ]]
0.8377344782690709
0.8377344782690709


In [33]:
print(rand_mat[0:2,1:3])

[[0.23385747 0.57303187]
 [0.36170667 0.83773448]]


In [34]:
# let's change some values in an array!
print(rand_vec)
rand_vec[3:5] = 4
print('')
print(rand_vec)
rand_vec[3:5] = [1,2]
print('')
print(rand_vec)

[ 0.77732029  1.44905503  0.69637575 -0.25063898 -1.32642624  0.44327151
  1.37999873  0.88152016 -0.50354578  0.58297045 -1.85427334 -1.75974606
 -2.22477832 -1.26717065 -0.80012003  0.62659914  0.29401351 -1.29365003
  0.94291771]

[ 0.77732029  1.44905503  0.69637575  4.          4.          0.44327151
  1.37999873  0.88152016 -0.50354578  0.58297045 -1.85427334 -1.75974606
 -2.22477832 -1.26717065 -0.80012003  0.62659914  0.29401351 -1.29365003
  0.94291771]

[ 0.77732029  1.44905503  0.69637575  1.          2.          0.44327151
  1.37999873  0.88152016 -0.50354578  0.58297045 -1.85427334 -1.75974606
 -2.22477832 -1.26717065 -0.80012003  0.62659914  0.29401351 -1.29365003
  0.94291771]


In [35]:
print(rand_mat)
rand_mat[1:3,3:5] = 0
print('')
print(rand_mat)

[[0.64861108 0.23385747 0.57303187 0.70819923 0.78492511]
 [0.44359969 0.36170667 0.83773448 0.98564671 0.75986176]
 [0.15847906 0.477543   0.98934954 0.06461887 0.17425507]
 [0.81302801 0.38060956 0.0747944  0.91470987 0.41776917]
 [0.60691725 0.65545265 0.01085822 0.9209951  0.0899208 ]]

[[0.64861108 0.23385747 0.57303187 0.70819923 0.78492511]
 [0.44359969 0.36170667 0.83773448 0.         0.        ]
 [0.15847906 0.477543   0.98934954 0.         0.        ]
 [0.81302801 0.38060956 0.0747944  0.91470987 0.41776917]
 [0.60691725 0.65545265 0.01085822 0.9209951  0.0899208 ]]


In [36]:
sub_mat = rand_mat[0:2,0:3]
print(sub_mat)
sub_mat[:] = 3
print(sub_mat)


[[0.64861108 0.23385747 0.57303187]
 [0.44359969 0.36170667 0.83773448]]
[[3. 3. 3.]
 [3. 3. 3.]]


In [37]:
print(rand_mat)

[[3.         3.         3.         0.70819923 0.78492511]
 [3.         3.         3.         0.         0.        ]
 [0.15847906 0.477543   0.98934954 0.         0.        ]
 [0.81302801 0.38060956 0.0747944  0.91470987 0.41776917]
 [0.60691725 0.65545265 0.01085822 0.9209951  0.0899208 ]]


In [38]:
sub_mat2 = rand_mat[0:2,0:3].copy()
sub_mat2[:] = 99
print(sub_mat2)
print(rand_mat)


[[99. 99. 99.]
 [99. 99. 99.]]
[[3.         3.         3.         0.70819923 0.78492511]
 [3.         3.         3.         0.         0.        ]
 [0.15847906 0.477543   0.98934954 0.         0.        ]
 [0.81302801 0.38060956 0.0747944  0.91470987 0.41776917]
 [0.60691725 0.65545265 0.01085822 0.9209951  0.0899208 ]]


In [39]:
# break here for next video

In [40]:
# we can also access entries with logicals
rand_vec = np.random.randn(15)

print(rand_vec)
print(rand_vec>0)
print(rand_vec[rand_vec>0])

[-0.88562467  0.5148713  -0.47112207 -0.20372057  1.33379937  0.15290067
  0.6791536  -1.32910634 -0.06472117  1.78249083  0.2110884   0.41571271
 -0.06754128  0.37456064 -0.43511237]
[False  True False False  True  True  True False False  True  True  True
 False  True False]
[0.5148713  1.33379937 0.15290067 0.6791536  1.78249083 0.2110884
 0.41571271 0.37456064]


In [41]:
print(rand_mat2)
print(rand_mat2[rand_mat2>0])

[[-3.55702317  2.12140359  0.70379071  0.73836319 -0.37078681]
 [-0.47206693 -0.58162134  0.9569869  -0.49577682  0.1022353 ]
 [-0.16856326  0.81201137  0.77629135 -2.07010476 -0.30531091]
 [ 0.07129996  0.6248255  -0.85902862 -1.8831032   0.00388922]
 [-0.09893375 -2.00467035 -0.0378239   0.92710741  0.10823116]
 [ 0.1318712   0.64830528 -0.31670936  0.16661694 -1.45385978]
 [ 0.42632643  0.44154802  0.61334278 -0.9025564   0.23036323]
 [ 0.1217577  -0.7056504   0.62577094  0.74957902 -0.11499762]
 [ 0.78418689 -1.62882116  0.90893389 -1.66416216  1.25697547]
 [-1.24687363  0.48292205  0.37585276  0.18455466  0.21320537]]
[2.12140359 0.70379071 0.73836319 0.9569869  0.1022353  0.81201137
 0.77629135 0.07129996 0.6248255  0.00388922 0.92710741 0.10823116
 0.1318712  0.64830528 0.16661694 0.42632643 0.44154802 0.61334278
 0.23036323 0.1217577  0.62577094 0.74957902 0.78418689 0.90893389
 1.25697547 0.48292205 0.37585276 0.18455466 0.21320537]


In [42]:

print(rand_vec)
print('')
rand_vec[rand_vec>0.5] = -5
print(rand_vec)

[-0.88562467  0.5148713  -0.47112207 -0.20372057  1.33379937  0.15290067
  0.6791536  -1.32910634 -0.06472117  1.78249083  0.2110884   0.41571271
 -0.06754128  0.37456064 -0.43511237]

[-0.88562467 -5.         -0.47112207 -0.20372057 -5.          0.15290067
 -5.         -1.32910634 -0.06472117 -5.          0.2110884   0.41571271
 -0.06754128  0.37456064 -0.43511237]


In [43]:
# let's save some arrays on the disk for use later!
np.save('saved_file_name',rand_mat2)


In [44]:
np.savez('zipped_file_name',rand_mat=rand_mat,rand_mat2=rand_mat2)

In [45]:
# now let's load it
loaded_vec = np.load('saved_file_name.npy')
loaded_zip = np.load('zipped_file_name.npz')

print(loaded_vec)
print('')
print(loaded_zip)

[[-3.55702317  2.12140359  0.70379071  0.73836319 -0.37078681]
 [-0.47206693 -0.58162134  0.9569869  -0.49577682  0.1022353 ]
 [-0.16856326  0.81201137  0.77629135 -2.07010476 -0.30531091]
 [ 0.07129996  0.6248255  -0.85902862 -1.8831032   0.00388922]
 [-0.09893375 -2.00467035 -0.0378239   0.92710741  0.10823116]
 [ 0.1318712   0.64830528 -0.31670936  0.16661694 -1.45385978]
 [ 0.42632643  0.44154802  0.61334278 -0.9025564   0.23036323]
 [ 0.1217577  -0.7056504   0.62577094  0.74957902 -0.11499762]
 [ 0.78418689 -1.62882116  0.90893389 -1.66416216  1.25697547]
 [-1.24687363  0.48292205  0.37585276  0.18455466  0.21320537]]

<numpy.lib.npyio.NpzFile object at 0x7ff67241e340>


In [46]:
print(loaded_zip['rand_mat'])
print('')
print(loaded_zip['rand_mat2'])

new_array  = loaded_zip['rand_mat']
print(new_array)

[[3.         3.         3.         0.70819923 0.78492511]
 [3.         3.         3.         0.         0.        ]
 [0.15847906 0.477543   0.98934954 0.         0.        ]
 [0.81302801 0.38060956 0.0747944  0.91470987 0.41776917]
 [0.60691725 0.65545265 0.01085822 0.9209951  0.0899208 ]]

[[-3.55702317  2.12140359  0.70379071  0.73836319 -0.37078681]
 [-0.47206693 -0.58162134  0.9569869  -0.49577682  0.1022353 ]
 [-0.16856326  0.81201137  0.77629135 -2.07010476 -0.30531091]
 [ 0.07129996  0.6248255  -0.85902862 -1.8831032   0.00388922]
 [-0.09893375 -2.00467035 -0.0378239   0.92710741  0.10823116]
 [ 0.1318712   0.64830528 -0.31670936  0.16661694 -1.45385978]
 [ 0.42632643  0.44154802  0.61334278 -0.9025564   0.23036323]
 [ 0.1217577  -0.7056504   0.62577094  0.74957902 -0.11499762]
 [ 0.78418689 -1.62882116  0.90893389 -1.66416216  1.25697547]
 [-1.24687363  0.48292205  0.37585276  0.18455466  0.21320537]]
[[3.         3.         3.         0.70819923 0.78492511]
 [3.         3.    

In [47]:
# we can also save/load as text files...but only single variables
np.savetxt('text_file_name.txt',rand_mat,delimiter=',')
rand_mat_txt = np.loadtxt('text_file_name.txt',delimiter=',')
print(rand_mat)
print('')
print(rand_mat_txt)

[[3.         3.         3.         0.70819923 0.78492511]
 [3.         3.         3.         0.         0.        ]
 [0.15847906 0.477543   0.98934954 0.         0.        ]
 [0.81302801 0.38060956 0.0747944  0.91470987 0.41776917]
 [0.60691725 0.65545265 0.01085822 0.9209951  0.0899208 ]]

[[3.         3.         3.         0.70819923 0.78492511]
 [3.         3.         3.         0.         0.        ]
 [0.15847906 0.477543   0.98934954 0.         0.        ]
 [0.81302801 0.38060956 0.0747944  0.91470987 0.41776917]
 [0.60691725 0.65545265 0.01085822 0.9209951  0.0899208 ]]


In [48]:
print(np.arange(0,22,6))

[ 0  6 12 18]


In [49]:
test_array = [10, 11.5, 12, 13.5, 14,15]

In [50]:
test_array[2:3]

[12]

In [51]:
mat = np.array([1,2,3,4,5])

In [52]:
mat

array([1, 2, 3, 4, 5])

In [53]:
new_mat = mat

In [55]:
new_mat[2] = 9

In [56]:
mat

array([1, 2, 9, 4, 5])

In [61]:
import pandas as pd
s1 = pd.Series(['a', 'b'])
s2 = pd.Series(['c', 'd'])

In [62]:
pd.concat([s1, s2])

0    a
1    b
0    c
1    d
dtype: object

In [63]:
pd.concat([s1, s2], axis=0)

0    a
1    b
0    c
1    d
dtype: object

In [64]:
pd.concat([s1, s2], axis=1)

Unnamed: 0,0,1
0,a,c
1,b,d


In [65]:
pd.concat[s1, s2]

TypeError: 'function' object is not subscriptable

In [86]:
loaded_vec[loaded_vec<0]=0
loaded_vec

array([[0.        , 2.12140359, 0.70379071, 0.73836319, 0.        ],
       [0.        , 0.        , 0.9569869 , 0.        , 0.1022353 ],
       [0.        , 0.81201137, 0.77629135, 0.        , 0.        ],
       [0.07129996, 0.6248255 , 0.        , 0.        , 0.00388922],
       [0.        , 0.        , 0.        , 0.92710741, 0.10823116],
       [0.1318712 , 0.64830528, 0.        , 0.16661694, 0.        ],
       [0.42632643, 0.44154802, 0.61334278, 0.        , 0.23036323],
       [0.1217577 , 0.        , 0.62577094, 0.74957902, 0.        ],
       [0.78418689, 0.        , 0.90893389, 0.        , 1.25697547],
       [0.        , 0.48292205, 0.37585276, 0.18455466, 0.21320537]])

In [82]:
for i in len(loaded_vec):
    if loaded_vec[i]<0:
        loaded_vec[i]=0

TypeError: 'int' object is not iterable

In [83]:
for i in range(0,len(loaded_vec)+1):
    if loaded_vec[i]<0:
        loaded_vec[i]=0

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

In [69]:
loaded_vec

array([[-3.55702317,  2.12140359,  0.70379071,  0.73836319, -0.37078681],
       [-0.47206693, -0.58162134,  0.9569869 , -0.49577682,  0.1022353 ],
       [-0.16856326,  0.81201137,  0.77629135, -2.07010476, -0.30531091],
       [ 0.07129996,  0.6248255 , -0.85902862, -1.8831032 ,  0.00388922],
       [-0.09893375, -2.00467035, -0.0378239 ,  0.92710741,  0.10823116],
       [ 0.1318712 ,  0.64830528, -0.31670936,  0.16661694, -1.45385978],
       [ 0.42632643,  0.44154802,  0.61334278, -0.9025564 ,  0.23036323],
       [ 0.1217577 , -0.7056504 ,  0.62577094,  0.74957902, -0.11499762],
       [ 0.78418689, -1.62882116,  0.90893389, -1.66416216,  1.25697547],
       [-1.24687363,  0.48292205,  0.37585276,  0.18455466,  0.21320537]])

In [71]:
data = {'prodID': ['101', '102', '103', '104', '104'], 'prodname': ['X', 'Y', 'Z', 'X', 'W'], 'profit': ['2738', '2727', '3497', '7347', '3743']}

df = pd.DataFrame(data, index=['one','two','three','four','five'])

In [72]:
print(df.loc['four','prodname'])

X


In [73]:
print(df['prodname'][3])

X


In [75]:
print(df.iloc[3,1])

X


In [76]:
import numpy as RANGE

In [77]:
RANGE.arange(-0.8,0,0.2)

array([-0.8, -0.6, -0.4, -0.2])

In [80]:
import pandas as pd
df1 = pd.DataFrame([1,2,3,4,5])
df2 = df1
A=id(df1)
B=id(df2)
print(A)
print(B)
if A==B:
       print("Same Location")
else:
    print("Different location")

140696476025472
140696476025472
Same Location
