## NumPy Basics

In [1]:
!conda list numpy

# packages in environment at /Users/deepdoshi/anaconda3:
#
numpy                     1.13.3           py36h2cdce51_0  
numpydoc                  0.7.0            py36he54d08e_0  


#### NumPy v/s Python execution time comparison

In [3]:
import time
import numpy as np

In [4]:
x = np.random.random(100000000)  # create an array of 100 million random numbers

In [7]:
# Find mean using normal python
start = time.time()
sum(x)/len(x)
print(time.time() - start)

8.12062406539917


In [8]:
# Find mean using numpy
start = time.time()
np.mean(x)
print(time.time() - start)

0.06761312484741211


#### NumPy Arrays

In [9]:
# Create a 1D ndarray that contains only integers
x = np.array([1, 2, 3, 4, 5])

In [30]:
print(x)
print(type(x))
print(x.dtype)    # dtype returns the type of the element in the array

[1 2 3 4 5]
<class 'numpy.ndarray'>
int64


##### Shape of an array is the size along each of its dimensions. 

In [12]:
x.shape   # returns tuple of n positive integers that specifies sizes of each dimension

(5,)

In [14]:
y = np.array([[1, 2, 3],[4, 5, 6],[7, 8, 9]])
print(y)

[[1 2 3]
 [4 5 6]
 [7 8 9]]


In [15]:
y.shape

(3, 3)

In [16]:
y.size     # total number of elements in an array

9

In [17]:
z = np.array(['Hello', 'World'])
print(z)

['Hello' 'World']


In [19]:
# print information about z
print('shape: ', z.shape)
print('type: ', type(z))
print('dtype: ', z.dtype)

shape:  (2,)
type:  <class 'numpy.ndarray'>
dtype:  <U5


In [21]:
w = np.array([1,2,'World'])
print(w)
print('shape: ', w.shape)
print('type: ', type(w))
print('dtype: ', w.dtype)

['1' '2' 'World']
shape:  (3,)
type:  <class 'numpy.ndarray'>
dtype:  <U21


In [23]:
v = np.array([1, 2, 3.5])
print(v, v.dtype)

[ 1.   2.   3.5] float64


In [32]:
# Create a rank 1 ndarray that contains integers
a = np.array([1,2,3])

# Create a rank 1 ndarray that contains floats
b = np.array([1.0,2.0,3.0])

# Create a rank 1 ndarray that contains integers and floats. 
## Here Numpy does `Upcasting` by assigning float64 no matter presence of integer elements
c = np.array([1, 2.5, 4])

# We print the dtype of each ndarray
print('The elements in a are of type:', a.dtype)
print('The elements in b are of type:', b.dtype)
print('The elements in c are of type:', c.dtype)

# Numpy automatically assigns the dtype based upon the type of the elements of ndarray

The elements in a are of type: int64
The elements in b are of type: float64
The elements in c are of type: float64


In [24]:
# Assign dtype manually to any ndarray
s = np.array([3.5, 2.4, 5.4], dtype=np.int64)
print(s)
print('dtype: ', s.dtype)

[3 2 5]
dtype:  int64


In [34]:
# Create a rank 1 array
t = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 0])
print(t, t.dtype)

# Save it into current directory
np.save('my_array', t)

[1 2 3 4 5 6 7 8 9 0] int64


In [33]:
# Load any saved array from current directory
r = np.load('my_array.npy')
print(r, r.dtype)

[1 2 3 4 5 6 7 8 9 0] int64


#### NumPy Functions

In [37]:
# Create a 3 x 4 ndarray full of zeros
x = np.zeros((3,4))

print()
print('x = \n', x)
print()

# Print information about x
print('x has dimension: ', x.shape)
print('x is an object of type: ', type(x))
print('The element in x are of type: ', x.dtype)


x = 
 [[ 0.  0.  0.  0.]
 [ 0.  0.  0.  0.]
 [ 0.  0.  0.  0.]]

x has dimension:  (3, 4)
x is an object of type:  <class 'numpy.ndarray'>
The element in x are of type:  float64


In [39]:
# Create a 3 x 2 ndarray full of ones
y = np.ones((3,2))

print()
print('y = \n', y)

print()
print("y has dimension: ", y.shape)
print("y is an object of type: ", type(y))
print("All the elements in y are of the type: ", y.dtype)


y = 
 [[ 1.  1.]
 [ 1.  1.]
 [ 1.  1.]]

y has dimension:  (3, 2)
y is an object of type:  <class 'numpy.ndarray'>
All the elements in y are of the type:  float64


In [40]:
# Create a 2 x 3 ndarray of full of fives
# function definition: np.full(shape, constant)
z = np.full((2,3),5)

print()
print('z = \n', z)
print()

print("z has the dimension: ", z.shape)
print("z is an object of type: ", type(z))
print("All the elements in z are of the type: ", z.dtype)


z = 
 [[5 5 5]
 [5 5 5]]

z has the dimension:  (2, 3)
z is an object of type:  <class 'numpy.ndarray'>
All the elements in z are of the type:  int64


##### An identity matrix is a square matrix that has only 1 as it's main diagonal

In [41]:
# Create a 5 x 5 Identity matrix
i = np.eye(5)

print('i = \n', i)

print()
print("i has dimension: ", i.shape)
print("i is an object of the type: ", type(i))
print("All the elements in i are of the type: ", i.dtype)

i = 
 [[ 1.  0.  0.  0.  0.]
 [ 0.  1.  0.  0.  0.]
 [ 0.  0.  1.  0.  0.]
 [ 0.  0.  0.  1.  0.]
 [ 0.  0.  0.  0.  1.]]

i has dimension:  (5, 5)
i is an object of the type:  <class 'numpy.ndarray'>
All the elements in i are of the type:  float64


##### A diagonal matrix is a square matrix which only has elements on it main diagonal

In [42]:
# Create a 4 x 4 diagonal matrix
d = np.diag([1, 2, 3, 4])

print('d = \n', d)
print()

print("d has dimension: ", d.shape)
print("d is an object of the type: ", type(d))
print("All the elements in d are of the type: ", d.dtype)

d = 
 [[1 0 0 0]
 [0 2 0 0]
 [0 0 3 0]
 [0 0 0 4]]

d has dimension:  (4, 4)
d is an object of the type:  <class 'numpy.ndarray'>
All the elements in d are of the type:  int64


In [44]:
# Create a rank 1 ndarray that has sequential integers
s = np.arange(10)

print("s = ", s)

print()
print("s has dimensions:", s.shape)
print("s is an object of the type: ", type(s))
print("All the elements in s are of the type: ", s.dtype)

s =  [0 1 2 3 4 5 6 7 8 9]

s has dimensions: (10,)
s is an object of the type:  <class 'numpy.ndarray'>
All the elements in s are of the type:  int64


In [46]:
s1 = np.arange(3,10)   # start is inclusive but stop is exclusive

print("s1 = ", s1)
print()

print("s1 has dimensions: ", s1.shape)
print("s1 is an object of the type: ", type(s1))
print("All the elements in s1 are of the type: ", s1.dtype)

s1 =  [3 4 5 6 7 8 9]

s1 has dimensions:  (7,)
s1 sis an object of the type:  <class 'numpy.ndarray'>
All the elements in s1 are of the type:  int64


In [47]:
s2 = np.arange(2,20,3)  # the third argument is the step size

print("s2 = ", s2)
print()
print("s2 has dimesions: ", s2.shape)
print("s2 is an object of the type: ", type(s2))
print("All the elements in s2 are of the type: ", s2.dtype)

s2 =  [ 2  5  8 11 14 17]

s2 has dimesions:  (6,)
s2 is an object of the type:  <class 'numpy.ndarray'>
All the elements in s2 are of the type:  int64


In [49]:
# np.arange() is not good for floating point step, so np.linspace() as it focuses on intervals and not steps

l = np.linspace(2,10, 10)  
# start and stop are both inclusive i.e. it's a closed interval, with 10 elements placed at equal intervals

print("l =", l)
print()
print("l has dimensions: ", l.shape)
print("l is an object of the type: ", type(l))
print("All the elements in l are of the type: ", l.dtype)

# Inorder to exlcude endpoints

l1 = np.linspace(2,10,10, endpoint=False)  # this will change the elements of the ndarray as it's interval based
print("l1 =", l1)
print()
print("l1 has dimensions: ", l1.shape)

l = [  2.           2.88888889   3.77777778   4.66666667   5.55555556
   6.44444444   7.33333333   8.22222222   9.11111111  10.        ]

l has dimensions:  (10,)
l is an object of the type:  <class 'numpy.ndarray'>
All the elements in l are of the type:  float64
l1 = [ 2.   2.8  3.6  4.4  5.2  6.   6.8  7.6  8.4  9.2]

l1 has dimensions:  (10,)


In [52]:
# Create a rank 2 ndarray using built-in functions

r = np.arange(20)
print('Original r =', r)

print()
print("Original r has dimensions: ", r.shape)
print()

# Reshape it into 4 x 5 array
r = np.reshape(r,(4,5))
print("Reshaped r =\n", r)
print()
print("Reshaped r has dimensions: ", r.shape)

Original r = [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19]

Original r has dimensions:  (20,)

Reshaped r =
 [[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]
 [15 16 17 18 19]]

Reshaped r has dimensions:  (4, 5)


In [54]:
# . is use to append multiple functions to make them look methods
r1 = np.arange(20).reshape(4,5)
print()
print("r1 =\n", r1)
print()
print("r1 has dimension: ", r1.shape)


r1 =
 [[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]
 [15 16 17 18 19]]

r1 has dimension:  (4, 5)


##### np.random.random(shape) creates random array of the shape in the interval of [0.0, 1.0)

In [57]:
# Create a 3 x 3 random array
rf = np.random.random((3,3))
print('rf =\n', rf)
print()
print("rf has dimensions: ", rf.shape)
print("rf is an object of the type: ", type(rf))
print("All the elements in rf is of the type: ", rf.dtype)

rf =
 [[ 0.76834511  0.83227786  0.10182857]
 [ 0.15486577  0.0521477   0.73776605]
 [ 0.20842137  0.88403863  0.7315615 ]]

rf has dimensions:  (3, 3)
rf is an object of the type:  <class 'numpy.ndarray'>
All the elements in rf is of the type:  float64


###### np.random.randint(start,stop,shape) creates any int array in the close-open interval

In [66]:
# Create a 4 x 5 random array in interval [4,10)
rf1 = np.random.randint(4,10,size=(4,5))
print('rf1 =\n', rf1)
print()
print("rf1 has dimensions: ", rf1.shape)
print("rf1 is an object of the type: ", type(rf1))
print("All the elements in rf1 is of the type: ", rf1.dtype)

rf1 =
 [[7 8 7 6 9]
 [9 4 6 7 4]
 [9 7 9 5 9]
 [8 5 5 5 6]]

rf1 has dimensions:  (4, 5)
rf1 is an object of the type:  <class 'numpy.ndarray'>
All the elements in rf1 is of the type:  int64


###### np.random.normal(mean,standard deviation, size=shape) draws from a normal/gaussian distribution with given mean and standard deviation

In [67]:
norm_arr = np.random.normal(0, 0.1, size=(1000,1000))
print()
print("norm_arr = \n", norm_arr)
print()

print('norm_arr has dimensions:', norm_arr.shape)
print('norm_arr is an object of type:', type(norm_arr))
print('The elements in norm_arr are of type:', norm_arr.dtype)
print('The elements in norm_arr have a mean of:', norm_arr.mean())
print('The maximum value in norm_arr is:', norm_arr.max())
print('The minimum value in norm_arr is:', norm_arr.min())
print('norm_arr has', (norm_arr < 0).sum(), 'negative numbers')
print('norm_arr has', (norm_arr > 0).sum(), 'positive numbers')


norm_arr = 
 [[-0.09843301 -0.08784733  0.00238909 ..., -0.07763005  0.10344931
  -0.04464902]
 [ 0.0826384  -0.04401434  0.06171523 ..., -0.05631471  0.09637871
   0.06859614]
 [ 0.1560583   0.00579493  0.00194732 ..., -0.00873765  0.00132139
   0.16177084]
 ..., 
 [ 0.02834862 -0.0396386  -0.03197804 ...,  0.02152386  0.02290728
  -0.02036413]
 [ 0.08904994  0.01549135  0.00532091 ..., -0.03859146 -0.12392615
   0.1140706 ]
 [ 0.05025387  0.0854106  -0.05813109 ..., -0.03240422  0.02237646
  -0.02997709]]

norm_arr has dimensions: (1000, 1000)
norm_arr is an object of type: <class 'numpy.ndarray'>
The elements in norm_arr are of type: float64
The elements in norm_arr have a mean of: -6.35629293629e-05
The maximum value in norm_arr is: 0.511688968755
The minimum value in norm_arr is: -0.510866735332
norm_arr has 500695 negative numbers
norm_arr has 499305 positive numbers
