<a href="https://colab.research.google.com/github/Vikas-KM/machine-learning/blob/master/Practice/Numpy_Practice.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Numpy Tutorial

- http://scipy-lectures.org/intro/numpy/array_object.html

In [1]:
#importing numpy
import numpy as np

In [2]:
# create a numpy array
a = np.array([1,2,3,4])
a

array([1, 2, 3, 4])

In [3]:
print(a)

[1 2 3 4]


In [4]:
# numpy allows adding scalar to the array
a + 1

array([2, 3, 4, 5])

In [5]:
a

array([1, 2, 3, 4])

In [6]:
print(a.data)

<memory at 0x7f64337acd08>


## Numpy - memory Efficiency

In [7]:
L = range(1000)
%timeit [i**2 for i in L]

1000 loops, best of 3: 258 µs per loop


In [8]:
a = np.arange(1000)
%timeit a**2

The slowest run took 36.19 times longer than the fastest. This could mean that an intermediate result is being cached.
1000000 loops, best of 3: 1.46 µs per loop


#### Look at the time taken, numpy is 150-200 times faster

In [11]:
# create a numpy array from a given range
a = np.arange(10)
a

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [12]:
# dimensions of the numpy array
print(a.ndim)

1


In [13]:
# print the shape of the numpy array
print(a.shape)

(10,)


In [14]:
# print the size of the numpy array
print(a.size)

10


In [15]:
# what is the max value in numpy array
print(a.max())

9


In [16]:
# what is the min value in numpy array
print(a.min())

0


In [17]:
# print the length of the numpy array
print(len(a))

10


## Some Nomenclature
* 1D => Vector
* 2D => Matrix
* 3D => Tensor

In [21]:
# create a numpy array with evenly spaced values

# numpy.linspace(start, stop, num=50, endpoint=True, retstep=False, dtype=None, axis=0)[source]
# Return evenly spaced numbers over a specified interval.
# Returns num evenly spaced samples, calculated over the interval [start, stop].
# The endpoint of the interval can optionally be excluded.

np.linspace(0,10,5)

array([ 0. ,  2.5,  5. ,  7.5, 10. ])

In [23]:
# Same as above but with endpoint=False, so 10 is not considered
np.linspace(0,10,5, endpoint=False)

array([0., 2., 4., 6., 8.])

In [24]:
# create a numpy array of 1's
np.ones((2,3))

array([[1., 1., 1.],
       [1., 1., 1.]])

In [25]:
# create a numpy array of 0's
np.zeros((3,4))

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [27]:
# creates an identity vector/matrix
np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [28]:
# creates an identity matrix
np.identity(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [29]:
# creates a matrix with the given diagonal matrix
np.diag([1,2,3,4])

array([[1, 0, 0, 0],
       [0, 2, 0, 0],
       [0, 0, 3, 0],
       [0, 0, 0, 4]])

In [30]:
np.diag([1,2,3,4,5])

array([[1, 0, 0, 0, 0],
       [0, 2, 0, 0, 0],
       [0, 0, 3, 0, 0],
       [0, 0, 0, 4, 0],
       [0, 0, 0, 0, 5]])

In [32]:
# uniform distribution
np.random.rand(3)

array([0.56080974, 0.67919727, 0.79388296])

In [31]:
# gaussian/normal  distribution
np.random.randn(3)

array([ 0.0968228 , -0.54237605,  1.35339878])

In [34]:
# Return random integers from low (inclusive) to high (exclusive).
np.random.randint(10)

7

In [35]:
np.random.randint(3,6)

3

In [36]:
# This method is called when RandomState is initialized. 
# It can be called again to re-seed the generator. 

np.random.seed(42)

In [37]:
# returns uninitialized numpy array
# uninitialized as in filled with random garbage values

np.empty([2,2])

array([[4.9e-324, 9.9e-324],
       [1.5e-323, 2.0e-323]])

In [38]:
# creation of numpy array with complex values
a = np.array([1+2j, 3+4j])
a

array([1.+2.j, 3.+4.j])

In [39]:
# print the type of the numpy array
print(a.dtype)

complex128


In [44]:
a = np.array([1,2,3])
print(a.dtype)

int64


In [45]:
a = np.array([1.,2.,3.])
print(a.dtype)

float64


## Slicing and Indexing

In [48]:
# print first 10 values
a = np.arange(25)
print(a[:10])

[0 1 2 3 4 5 6 7 8 9]


In [49]:
# print last 10 values
a[-10:]

array([15, 16, 17, 18, 19, 20, 21, 22, 23, 24])

## Copying Numpy Arrays

In [51]:
# copy the first 10 values of a into b
b = a[:10]
b

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [52]:
# assigning like this will also change the original
b[0]= 100
a[:10]

array([100,   1,   2,   3,   4,   5,   6,   7,   8,   9])

In [53]:
c = a[:10].copy()
c

array([100,   1,   2,   3,   4,   5,   6,   7,   8,   9])

In [54]:
c[0] = 1
a[:10]

array([100,   1,   2,   3,   4,   5,   6,   7,   8,   9])

In [55]:
b

array([100,   1,   2,   3,   4,   5,   6,   7,   8,   9])

### a = b
- it points b to the location of a

### c = a.copy()
- a is copied into c
- here a and c are independent

In [56]:
## reversing the given array
b[::-1]

array([  9,   8,   7,   6,   5,   4,   3,   2,   1, 100])

In [57]:
a = np.array([True, False, True])
a.dtype

dtype('bool')

In [58]:
a = np.array(['python','numpy','pandas'])
a.dtype

dtype('<U6')

In [59]:
a = np.array(['python','numpy','pandas','scikit-learn'])
a.dtype

dtype('<U12')

- In the above the dtype responds with the largest string size

In [60]:
np.diag(np.arange(5))

array([[0, 0, 0, 0, 0],
       [0, 1, 0, 0, 0],
       [0, 0, 2, 0, 0],
       [0, 0, 0, 3, 0],
       [0, 0, 0, 0, 4]])

## Fancy Indexing
<img src='http://scipy-lectures.org/_images/numpy_indexing.png' />

In [63]:
# How to check if a and b, both are pointing to the same location?
# Determine if two arrays share memory.
a = np.array([1,2,3])
b = a
np.shares_memory(a,b)

True

In [64]:
c = a.copy()
np.shares_memory(a,c)

False

In [65]:
b

array([1, 2, 3])

In [67]:
# return even elements of the b array
b[b%2 == 0]

array([2])

In [68]:
a = np.arange(10)
a

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [70]:
# square the given array
a**2

array([ 0,  1,  4,  9, 16, 25, 36, 49, 64, 81])

In [71]:
# cube the given array
a**3

array([  0,   1,   8,  27,  64, 125, 216, 343, 512, 729])

In [72]:
b = np.ones(10)
b

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [74]:
# subtraction
a-b

array([-1.,  0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.])

In [75]:
# multiplication
a*b

array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])

In [77]:
# return a bool, comparing each element
a == b

array([False,  True, False, False, False, False, False, False, False,
       False])

In [78]:
a > b

array([False, False,  True,  True,  True,  True,  True,  True,  True,
        True])

In [80]:
# True if two arrays have the same shape and elements, False otherwise.
np.array_equal(a,b)

False

In [81]:
np.logical_or(a,b)

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True])

In [82]:
np.logical_and(a,b)

array([False,  True,  True,  True,  True,  True,  True,  True,  True,
        True])

In [84]:
# returns sum of the given array
np.sum(a)

45

In [85]:
a.sum()

45

In [86]:
a.min()

0

In [87]:
a.max()

9

In [88]:
np.max(a)

9

In [89]:
np.min(a)

0

In [91]:
# gives out the index of the element which has max value
print(np.argmax(a))
print(np.max(a))
print(a)

9
9
[0 1 2 3 4 5 6 7 8 9]


In [92]:
# gives out the index of the element with minimum value
print(np.argmin(a))
print(np.min(a))
print(a)

0
0
[0 1 2 3 4 5 6 7 8 9]


In [94]:
# returns True only if all values are True
np.all([True, True])

True

In [96]:
np.all([False, True])

False

In [97]:
np.any([True, False])

True

In [98]:
np.any([False, False])

False

In [99]:
# mean of the given array
np.mean(a)

4.5

In [100]:
# median of the given array
np.median(a)

4.5

In [101]:
# standard-deviation of the given array
np.std(a)

2.8722813232690143

In [102]:
a = np.array([[1,2,3],[4,5,6]])
a

array([[1, 2, 3],
       [4, 5, 6]])

In [103]:
# transpose of the given array
a.T

array([[1, 4],
       [2, 5],
       [3, 6]])

In [104]:
b

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [105]:
a = np.array([1,2,3])
a

array([1, 2, 3])

In [107]:
# numpy.tile(A, reps)
# Construct an array by repeating A the number of times given by reps.

np.tile(a,(3,1))

array([[1, 2, 3],
       [1, 2, 3],
       [1, 2, 3]])

In [109]:
# 3 indicates the rows, 2 indicates the repititions, column wise
np.tile(a,(3,2))

array([[1, 2, 3, 1, 2, 3],
       [1, 2, 3, 1, 2, 3],
       [1, 2, 3, 1, 2, 3]])

In [110]:
print(a)

[1 2 3]


In [111]:
print(a.shape)

(3,)


In [113]:
a = a.reshape(3,1)
# a = a[:, np.newaxis]
print(a.shape)

(3, 1)


In [114]:
print(a)

[[1]
 [2]
 [3]]


In [115]:
a = np.array([[1,2,3],[4,5,6]])
a

array([[1, 2, 3],
       [4, 5, 6]])

In [118]:
# Return a contiguous flattened array.
print(a.ravel())

[1 2 3 4 5 6]


In [117]:
print(a.T)

[[1 4]
 [2 5]
 [3 6]]


In [119]:
a.T.ravel()

array([1, 4, 2, 5, 3, 6])

In [120]:
a

array([[1, 2, 3],
       [4, 5, 6]])

In [121]:
a.reshape(3,2)

array([[1, 2],
       [3, 4],
       [5, 6]])

In [122]:
a

array([[1, 2, 3],
       [4, 5, 6]])

In [123]:
# sort the given array
np.sort(a)

array([[1, 2, 3],
       [4, 5, 6]])

In [124]:
a = np.array([4,2,3,1])
a

array([4, 2, 3, 1])

In [125]:
np.sort(a)

array([1, 2, 3, 4])

### random

In [139]:
# uniform distribution
np.random.rand()

0.9717120953891037

In [140]:
np.random.rand(5)

array([0.84891382, 0.72172952, 0.23598492, 0.25606832, 0.04043359])

In [141]:
np.random.rand(5,5)

array([[0.71066289, 0.11089082, 0.4393365 , 0.2017192 , 0.8957636 ],
       [0.47537022, 0.56327557, 0.69551609, 0.13933145, 0.60441738],
       [0.53984109, 0.20306122, 0.94285357, 0.59886547, 0.69478493],
       [0.88046784, 0.62435405, 0.29563369, 0.10549426, 0.45653457],
       [0.21844044, 0.41650995, 0.88328026, 0.32434502, 0.12208795]])

In [142]:
# guassian or normal distribution
np.random.randn()

1.0996469757848204

In [143]:
np.random.randn(5)

array([ 0.72408325, -0.25576464,  0.8499212 , -1.31132423, -0.87030495])

In [144]:
np.random.randn(5,5)

array([[-0.50664322, -1.30995069,  2.94366342, -1.0962658 ,  0.91488432],
       [-0.66606062, -0.51378749,  0.3013228 , -1.45851692, -0.66273831],
       [-0.14647281, -0.84601702, -0.82248937,  1.08672702,  1.00498688],
       [-0.44176602, -0.08416509,  1.23701607, -1.42647888,  0.33802266],
       [ 1.9745712 ,  1.96503476, -1.93991122, -0.76262696,  0.16865939]])

In [145]:
np.random.randint(1,10)

8

In [146]:
np.random.randint(1,10,10)

array([1, 6, 8, 5, 4, 2, 6, 6, 1, 9])

In [147]:
np.arange(25).reshape(5,5)

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24]])

In [148]:
arr = np.arange(25).reshape(5,5)
arr

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24]])

In [149]:
# element at row 1 and column 0
arr[1,0]

5

In [150]:
# element at row 1 and column 0
arr[1][0]

5

In [151]:
arr[-1,-1]

24