<a href="https://colab.research.google.com/github/Vikas-KM/machine-learning/blob/master/Practice/Numpy_Practice.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Numpy Tutorial

- http://scipy-lectures.org/intro/numpy/array_object.html

In [1]:
#importing numpy
import numpy as np

In [2]:
# create a numpy array
a = np.array([1,2,3,4])
a

array([1, 2, 3, 4])

In [3]:
print(a)

[1 2 3 4]


In [4]:
# numpy allows adding scalar to the array
a + 1

array([2, 3, 4, 5])

In [5]:
a

array([1, 2, 3, 4])

In [6]:
print(a.data)

<memory at 0x7f64337acd08>


## Numpy - memory Efficiency

In [7]:
L = range(1000)
%timeit [i**2 for i in L]

1000 loops, best of 3: 258 µs per loop


In [8]:
a = np.arange(1000)
%timeit a**2

The slowest run took 36.19 times longer than the fastest. This could mean that an intermediate result is being cached.
1000000 loops, best of 3: 1.46 µs per loop


#### Look at the time taken, numpy is 150-200 times faster

In [11]:
# create a numpy array from a given range
a = np.arange(10)
a

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [12]:
# dimensions of the numpy array
print(a.ndim)

1


In [13]:
# print the shape of the numpy array
print(a.shape)

(10,)


In [14]:
# print the size of the numpy array
print(a.size)

10


In [15]:
# what is the max value in numpy array
print(a.max())

9


In [16]:
# what is the min value in numpy array
print(a.min())

0


In [17]:
# print the length of the numpy array
print(len(a))

10


## Some Nomenclature
* 1D => Vector
* 2D => Matrix
* 3D => Tensor

In [21]:
# create a numpy array with evenly spaced values

# numpy.linspace(start, stop, num=50, endpoint=True, retstep=False, dtype=None, axis=0)[source]
# Return evenly spaced numbers over a specified interval.
# Returns num evenly spaced samples, calculated over the interval [start, stop].
# The endpoint of the interval can optionally be excluded.

np.linspace(0,10,5)

array([ 0. ,  2.5,  5. ,  7.5, 10. ])

In [23]:
# Same as above but with endpoint=False, so 10 is not considered
np.linspace(0,10,5, endpoint=False)

array([0., 2., 4., 6., 8.])

In [24]:
# create a numpy array of 1's
np.ones((2,3))

array([[1., 1., 1.],
       [1., 1., 1.]])

In [25]:
# create a numpy array of 0's
np.zeros((3,4))

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [27]:
# creates an identity vector/matrix
np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [28]:
# creates an identity matrix
np.identity(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [29]:
# creates a matrix with the given diagonal matrix
np.diag([1,2,3,4])

array([[1, 0, 0, 0],
       [0, 2, 0, 0],
       [0, 0, 3, 0],
       [0, 0, 0, 4]])

In [30]:
np.diag([1,2,3,4,5])

array([[1, 0, 0, 0, 0],
       [0, 2, 0, 0, 0],
       [0, 0, 3, 0, 0],
       [0, 0, 0, 4, 0],
       [0, 0, 0, 0, 5]])

In [32]:
# uniform distribution
np.random.rand(3)

array([0.56080974, 0.67919727, 0.79388296])

In [31]:
# gaussian/normal  distribution
np.random.randn(3)

array([ 0.0968228 , -0.54237605,  1.35339878])

In [34]:
# Return random integers from low (inclusive) to high (exclusive).
np.random.randint(10)

7

In [35]:
np.random.randint(3,6)

3

In [36]:
# This method is called when RandomState is initialized. 
# It can be called again to re-seed the generator. 

np.random.seed(42)

In [37]:
# returns uninitialized numpy array
# uninitialized as in filled with random garbage values

np.empty([2,2])

array([[4.9e-324, 9.9e-324],
       [1.5e-323, 2.0e-323]])

In [38]:
# creation of numpy array with complex values
a = np.array([1+2j, 3+4j])
a

array([1.+2.j, 3.+4.j])

In [39]:
# print the type of the numpy array
print(a.dtype)

complex128


In [44]:
a = np.array([1,2,3])
print(a.dtype)

int64


In [45]:
a = np.array([1.,2.,3.])
print(a.dtype)

float64


## Slicing and Indexing

In [48]:
# print first 10 values
a = np.arange(25)
print(a[:10])

[0 1 2 3 4 5 6 7 8 9]


In [49]:
# print last 10 values
a[-10:]

array([15, 16, 17, 18, 19, 20, 21, 22, 23, 24])

## Copying Numpy Arrays

In [51]:
# copy the first 10 values of a into b
b = a[:10]
b

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [52]:
# assigning like this will also change the original
b[0]= 100
a[:10]

array([100,   1,   2,   3,   4,   5,   6,   7,   8,   9])

In [53]:
c = a[:10].copy()
c

array([100,   1,   2,   3,   4,   5,   6,   7,   8,   9])

In [54]:
c[0] = 1
a[:10]

array([100,   1,   2,   3,   4,   5,   6,   7,   8,   9])

In [55]:
b

array([100,   1,   2,   3,   4,   5,   6,   7,   8,   9])

### a = b
- it points b to the location of a

### c = a.copy()
- a is copied into c
- here a and c are independent

In [None]:
## reversing
b[::-1]

array([  9,   8,   7,   6,   5,   4,   3,   2,   1, 100])

In [None]:
a = np.array([True, False, True])
a.dtype

dtype('bool')

In [None]:
a = np.array(['python','numpy','pandas'])
a.dtype

dtype('<U6')

In [None]:
a = np.array(['python','numpy','pandas','scikit-learn'])
a.dtype

dtype('<U12')

#### In the above the dtype responds with the largest string size

In [None]:
np.diag(np.arange(5))

array([[0, 0, 0, 0, 0],
       [0, 1, 0, 0, 0],
       [0, 0, 2, 0, 0],
       [0, 0, 0, 3, 0],
       [0, 0, 0, 0, 4]])

## Fancy Indexing
<img src='http://scipy-lectures.org/_images/numpy_indexing.png' />

In [None]:
a = np.array([1,2,3])
b = a
np.shares_memory(a,b)

True

In [None]:
c = a.copy()
np.shares_memory(a,c)

False

In [None]:
b

array([1, 2, 3])

In [None]:
b[b%2 == 0]

array([2])

In [None]:
a = np.arange(10)
a

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [None]:
a**2

array([ 0,  1,  4,  9, 16, 25, 36, 49, 64, 81], dtype=int32)

In [None]:
a**3

array([  0,   1,   8,  27,  64, 125, 216, 343, 512, 729], dtype=int32)

In [None]:
b = np.ones(10)
b

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [None]:
a-b

array([-1.,  0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.])

In [None]:
a*b

array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])

In [None]:
a == b

array([False,  True, False, False, False, False, False, False, False,
       False])

In [None]:
a > b

array([False, False,  True,  True,  True,  True,  True,  True,  True,
        True])

In [None]:
np.array_equal(a,b)

False

In [None]:
np.logical_or(a,b)

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True])

In [None]:
np.logical_and(a,b)

array([False,  True,  True,  True,  True,  True,  True,  True,  True,
        True])

In [None]:
np.sum(a)

45

In [None]:
a.sum()

45

In [None]:
a.min()

0

In [None]:
a.max()

9

In [None]:
np.max(a)

9

In [None]:
np.min(a)

0

In [None]:
# gives out the index
np.argmax(a)

9

In [None]:
# gives out the index
np.argmin(a)

0

In [None]:
np.all([True, True])

True

In [None]:
np.all([False, True])

False

In [None]:
np.any([True, False])

True

In [None]:
np.any([False, False])

False

In [None]:
np.mean(a)

4.5

In [None]:
np.median(a)

4.5

In [None]:
np.std(a)

2.8722813232690143

In [None]:
a = np.array([[1,2,3],[4,5,6]])
a

array([[1, 2, 3],
       [4, 5, 6]])

In [None]:
a.T

array([[1, 4],
       [2, 5],
       [3, 6]])

In [None]:
b

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [None]:
a = np.array([1,2,3])
a

array([1, 2, 3])

In [None]:
np.tile(a,(3,1))

array([[1, 2, 3],
       [1, 2, 3],
       [1, 2, 3]])

In [None]:
# 4 indicates the rows, 2 indicates the repitition
np.tile(a,(4,2))

array([[1, 2, 3, 1, 2, 3],
       [1, 2, 3, 1, 2, 3],
       [1, 2, 3, 1, 2, 3],
       [1, 2, 3, 1, 2, 3]])

In [None]:
a

array([1, 2, 3])

In [None]:
a.shape

(3,)

In [None]:
a = a[:, np.newaxis]
a.shape

(3, 1)

In [None]:
a

array([[1],
       [2],
       [3]])

In [None]:
a = np.array([[1,2,3],[4,5,6]])
a

array([[1, 2, 3],
       [4, 5, 6]])

In [None]:
a.ravel()

array([1, 2, 3, 4, 5, 6])

In [None]:
a.T

array([[1, 4],
       [2, 5],
       [3, 6]])

In [None]:
a.T.ravel()

array([1, 4, 2, 5, 3, 6])

In [None]:
a

array([[1, 2, 3],
       [4, 5, 6]])

In [None]:
a.reshape(3,2)

array([[1, 2],
       [3, 4],
       [5, 6]])

In [None]:
a

array([[1, 2, 3],
       [4, 5, 6]])

In [None]:
np.sort(a)

array([[1, 2, 3],
       [4, 5, 6]])

In [None]:
a = np.array([4,2,3,1])
a

array([4, 2, 3, 1])

In [None]:
np.sort(a)

array([1, 2, 3, 4])

### random

In [None]:
# uniform distribution
np.random.rand()

0.17052412368729153

In [None]:
np.random.rand(5)

array([0.95071431, 0.73199394, 0.59865848, 0.15601864, 0.15599452])

In [None]:
np.random.rand(5,5)

array([[0.05808361, 0.86617615, 0.60111501, 0.70807258, 0.02058449],
       [0.96990985, 0.83244264, 0.21233911, 0.18182497, 0.18340451],
       [0.30424224, 0.52475643, 0.43194502, 0.29122914, 0.61185289],
       [0.13949386, 0.29214465, 0.36636184, 0.45606998, 0.78517596],
       [0.19967378, 0.51423444, 0.59241457, 0.04645041, 0.60754485]])

In [None]:
# guassian distribution
np.random.randn()

-0.600638689918805

In [None]:
np.random.randn(5)

array([-0.29169375, -0.60170661,  1.85227818, -0.01349722, -1.05771093])

In [None]:
np.random.randn(5,5)

array([[ 0.82254491, -1.22084365,  0.2088636 , -1.95967012, -1.32818605],
       [ 0.19686124,  0.73846658,  0.17136828, -0.11564828, -0.3011037 ],
       [-1.47852199, -0.71984421, -0.46063877,  1.05712223,  0.34361829],
       [-1.76304016,  0.32408397, -0.38508228, -0.676922  ,  0.61167629],
       [ 1.03099952,  0.93128012, -0.83921752, -0.30921238,  0.33126343]])

In [None]:
np.random.randint(1,10)

2

In [None]:
np.random.randint(1,10,10)

array([1, 7, 7, 8, 5, 3, 8, 6, 3, 1])

In [None]:
np.arange(25).reshape(5,5)

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24]])

In [None]:
arr = np.arange(25).reshape(5,5)
arr

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24]])

In [None]:
# element at row 1 and column 0
arr[1,0]

5

In [None]:
# element at row 1 and column 0
arr[1][0]

5

In [None]:
arr[-1,-1]

24