In [3]:
import numpy as np
import sys

# NumPy is pure gold. It is fast, easy to learn, feature-rich, and 
# therefore at the core of almost all popular scientific packages in the 
# Python universe (including SciPy and Pandas, two most widely used packages
# for data science and statistical modeling). 

In [4]:
# LISTs are good, you can change values but you cannot apply mathematical 
# functions

In [5]:
distance = [10, 15, 17, 26]
time     = [1,   2,  3,  4]

In [6]:
speed = distance / time

TypeError: unsupported operand type(s) for /: 'list' and 'list'

In [7]:
np_distance = np.array(distance)
np_time     = np.array(time)

In [8]:
speed = np_distance/np_time
speed

array([10.        ,  7.5       ,  5.66666667,  6.5       ])

In [None]:
# why use numpy
# compare numpy array with standard python LIST type array

In [9]:
for i in range(5):
    print(i)

0
1
2
3
4


In [10]:
# PYTHON LIST
data = range(1000)

print(sys.getsizeof(5)*len(data))

# NUMPY ARRAY
data_np = np.arange(1000)
print(data_np.size * data_np.itemsize)

28000
4000


In [None]:
# The above output shows that the memory allocated by list is 28000 whereas the 
# memory allocated by the numpy array is just 4000. 
# From this, you can conclude that there is a major difference between the two and 
# this makes python numpy array as the preferred choice over list.

In [11]:
# Example - getsizeof
print('Integer memory size : ', sys.getsizeof(5) )
print('Integer memory size : ', sys.getsizeof(56666666) )
print('Integer memory size : ', sys.getsizeof(56666669559956) )
print('Float   memory size : ', sys.getsizeof(5.05) )
print('Float   memory size : ', sys.getsizeof(5.058585885) )

print('\nString  memory size : ', sys.getsizeof('1') )
print('String  memory size : ', sys.getsizeof('A') )
print('String  memory size : ', sys.getsizeof('AA') )
print('String  memory size : ', sys.getsizeof('AAA') )
print('String  memory size : ', sys.getsizeof('AAAA') )
print('String  memory size : ', sys.getsizeof('AAAAA') )

Integer memory size :  28
Integer memory size :  28
Integer memory size :  32
Float   memory size :  24
Float   memory size :  24

String  memory size :  50
String  memory size :  50
String  memory size :  51
String  memory size :  52
String  memory size :  53
String  memory size :  54


In [12]:
import time
import sys
 
SIZE = 1000000
 
L1= range(SIZE)
L2= range(SIZE)
A1= np.arange(SIZE)
A2=np.arange(SIZE)
 
start= time.time()
result=[(x,y) for x,y in zip(L1,L2)]
print((time.time()-start)*1000)
 
start=time.time()
result= A1+A2
print((time.time()-start)*1000)

256.84237480163574
83.47630500793457


In [13]:
SIZE = 10
L1= range(SIZE)
L2= range(SIZE)
[(x,y) for x,y in zip(L1,L2)]

[(0, 0),
 (1, 1),
 (2, 2),
 (3, 3),
 (4, 4),
 (5, 5),
 (6, 6),
 (7, 7),
 (8, 8),
 (9, 9)]

In [14]:
np_arr = np.arange(1000000)
normal_arr = list(range(1000000))

In [15]:
%%time 
# loop using numpy array
for i in range(5):
    np_arr2 = np_arr * 2

Wall time: 18 ms


In [16]:
%%time 
# loop using normal python array, list
for i in range(5):
    normal_arr2 = [x * 2 for x in normal_arr]

Wall time: 732 ms


In [18]:
print(np_arr2)

[      0       2       4 ... 1999994 1999996 1999998]


In [31]:
# NumPy-based algorithms are generally 10 to 100 times faster (or more) 
# than their pure Python counterparts and use significantly less memory.

In [32]:
# NDIM

In [19]:
a = np.array([(1,2,3), (4,5,6)])

print(a.ndim)     # number of dimensions in the array
print(a.itemsize) # So every element occupies 4 byte in the above numpy array.
print(a.size)
print(a.shape)

2
4
6
(2, 3)


In [20]:
a

array([[1, 2, 3],
       [4, 5, 6]])

In [34]:
# linespace
# Equally-spaced numbers

In [19]:
np.linspace(1, 10, 10)

array([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.])

In [20]:
np.linspace(0, 5, 10)

array([0.        , 0.55555556, 1.11111111, 1.66666667, 2.22222222,
       2.77777778, 3.33333333, 3.88888889, 4.44444444, 5.        ])

In [37]:
# Random
# The random module provides functions to generate random numbers 
# (and also statistical distributions) of any given shape.

# numpy.random.randn generates samples from the normal distribution, 
# numpy.random.rand  generates samples from the unifrom (in range [0,1)).

In [22]:
# Random numbers between [0,1) of shape 2,3
print(np.random.rand(2, 3))

[[0.27846376 0.16291135 0.5847559 ]
 [0.43460699 0.13558126 0.01416757]]


In [26]:
# Random integers between [0, 10) of shape 2,2
print(np.random.randint(0, 10, size=[2, 2]))

[[3 5]
 [8 7]]


In [27]:
# One random number between [0, 1)
print(np.random.random())

0.2584905379058928


In [28]:
# Random numbers between [0,1) of shape 2,2
print(np.random.random(size=[2, 2]))

[[0.24129768 0.28911739]
 [0.11941939 0.69446231]]


In [26]:
# Pick 10 items from a given list, with equal probability
print(np.random.choice(['deepak', 'raj', 'rakesh', 'owl', 'monkey'], size=4, replace=False))  

['owl' 'monkey' 'rakesh' 'deepak']


In [28]:
# If you want to repeat the same set of random numbers every time, you need to set the seed 
# or the random state. The see can be any value. The only requirement is you must set the seed 
# to the same value every time you want to generate the same set of random numbers.

# Once np.random.RandomState is created, all the functions of the np.random module becomes 
# available to the created randomstate object.

In [29]:
# Create the random state
rn = np.random.RandomState(4)

# Create random numbers between [0,1) of shape 2,2
print(rn.rand(2,2))

[[ 0.96702984  0.54723225]
 [ 0.97268436  0.71481599]]


In [31]:
# Set the random seed
np.random.seed(100)

# Create random numbers between [0,1) of shape 2,2
print(np.random.rand(2,2))

[[ 0.54340494  0.27836939]
 [ 0.42451759  0.84477613]]


In [24]:
# How to get the unique items and the counts?
# The np.unique method can be used to get the unique items. 
# If you want the repetition counts of each item, set the return_counts parameter to True.

# Create random integers of size 10 between [0,10)
np.random.seed(1090)
arr_rand = np.random.randint(0, 10, size=4)
print(arr_rand)

[6 3 2 2]


In [22]:
# Get the unique items and their counts
uniqs, counts = np.unique(arr_rand, return_counts=True)
print("Unique items : ", uniqs)
print("Counts       : ", counts)

Unique items :  [2 3 6]
Counts       :  [2 1 1]


In [37]:
# Normal distribution with mean=0 and variance=1
data = np.random.randn(3, 3)

In [38]:
data * 10

array([[-6.78895104,  8.79499189,  0.91104009],
       [ 8.83324595, -4.13365488, -3.69983809],
       [ 4.18106874, -3.57328321, -4.89801752]])

In [22]:
data + data

array([[ 0.00574665, -0.95513324,  1.56886792],
       [-1.64228776,  2.47950663, -4.9951036 ],
       [ 1.70217168,  1.9754035 , -1.84033404]])

In [39]:
data.shape

(3, 3)

In [198]:
data.dtype

dtype('float64')

In [40]:
# creating ndarray
data1 = [6, 7.1, 85, 0, 1]

In [41]:
arr1 = np.array(data1)

In [42]:
arr1

array([ 6. ,  7.1, 85. ,  0. ,  1. ])

In [44]:
# nested array
data2 = [[1, 2.3, 3, 4], [5, 6, 7, 8]]

In [45]:
arr2 = np.array(data2)

In [46]:
arr2

array([[1. , 2.3, 3. , 4. ],
       [5. , 6. , 7. , 8. ]])

In [47]:
arr2.shape

(2, 4)

In [48]:
arr2.ndim

2

In [49]:
arr2.dtype

dtype('float64')

In [33]:
# other methods of creating arrays

In [50]:
# zeros
a = np.zeros(10)
a

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [51]:
b = np.zeros((3, 6))
b

array([[0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.]])

In [52]:
c = np.zeros_like(b)
c

array([[0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.]])

In [53]:
# ones
a = np.ones(10)
a

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [54]:
b = np.ones((3, 6))
b

array([[1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1.]])

In [47]:
c = np.ones_like(b)
c

array([[ 1.,  1.,  1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.,  1.,  1.]])

In [55]:
# arange
a = np.arange(15)
a

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [49]:
a.dtype

dtype('int32')

In [56]:
# convert the data type
b = a.astype(np.float64)
b

array([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12.,
       13., 14.])

In [74]:
# Arithmetic with NumPy Arrays
# Any arithmetic operations between equal-size arrays applies the 
# operation element-wise:

In [51]:
arr = np.array([[1., 2., 3.], [4., 5., 6.]])

In [52]:
arr

array([[ 1.,  2.,  3.],
       [ 4.,  5.,  6.]])

In [53]:
arr + 10

array([[ 11.,  12.,  13.],
       [ 14.,  15.,  16.]])

In [54]:
arr + arr

array([[  2.,   4.,   6.],
       [  8.,  10.,  12.]])

In [55]:
arr - arr

array([[ 0.,  0.,  0.],
       [ 0.,  0.,  0.]])

In [56]:
# Comparisons between arrays of the same size yield boolean arrays:
arr2 = np.array([[0., 4., 1.], [7., 2., 12.]])

In [57]:
arr2

array([[  0.,   4.,   1.],
       [  7.,   2.,  12.]])

In [57]:
arr2 > 5

array([[False, False, False, False],
       [False,  True,  True,  True]])

In [58]:
arr2 > arr

NameError: name 'arr' is not defined

In [25]:
# Basic Indexing and Slicing
arr = np.arange(10)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [26]:
arr[5]

5

In [27]:
arr[5:8]

array([5, 6, 7])

In [28]:
# broadcasting a value to multiple array positions
arr[5:8] = 12

In [76]:
arr

array([ 0,  1,  2,  3,  4, 12, 12, 12,  8,  9])

In [29]:
arr_slice = arr[5:8]
arr_slice

array([12, 12, 12])

In [68]:
# change values in arr_slice, the mutations are reflected in the original 
# array arr
arr_slice[0] = 12345

In [69]:
arr_slice

array([12345,    12,    12])

In [70]:
arr

array([    0,     1,     2,     3,     4, 12345,    12,    12,     8,     9])

In [231]:
# using : only means all values
arr_slice[1:2] = 9999

In [232]:
arr_slice

array([12345,  9999,    12])

In [233]:
arr

array([    0,     1,     2,     3,     4, 12345,  9999,    12,     8,     9])

In [None]:
# if you just assign a portion of an array to another array, the new array you just 
# created actually refers to the parent array in memory.
# That means, if you make any changes to the new array, it will reflect in the parent 
# array as well.

# So to avoid disturbing the parent array, you need to make a copy of it using copy(). 
# All numpy arrays come with the copy() method.

In [234]:
# If you want a copy of a slice of an ndarray instead of a view, you
# will need to explicitly copy the array — for example,
# arr[5:8].copy().
arr_slice = arr[5:8].copy()

In [235]:
arr_slice

array([12345,  9999,    12])

In [236]:
arr_slice[:] = 888

In [237]:
arr_slice

array([888, 888, 888])

In [238]:
# the original array is not changed
arr

array([    0,     1,     2,     3,     4, 12345,  9999,    12,     8,     9])

In [103]:
# Create a 2d array with 3 rows and 4 columns
list2 = [[1, 2, 3, 4],[3, 4, 5, 6], [5, 6, 7, 8]]

arr2 = np.array(list2, dtype='float')
arr2

array([[ 1.,  2.,  3.,  4.],
       [ 3.,  4.,  5.,  6.],
       [ 5.,  6.,  7.,  8.]])

In [104]:
arr2a = arr2[:2,:2]  
arr2a[:1, :1] = 100  # 100 will reflect in arr2
arr2

array([[ 100.,    2.,    3.,    4.],
       [   3.,    4.,    5.,    6.],
       [   5.,    6.,    7.,    8.]])

In [106]:
# Copy portion of arr2 to arr2b
arr2b = arr2[:2, :2].copy()
arr2b[:1, :1] = 101  # 101 will not reflect in arr2
arr2

array([[ 100.,    2.,    3.,    4.],
       [   3.,    4.,    5.,    6.],
       [   5.,    6.,    7.,    8.]])

In [239]:
# Indexing with slices
arr = np.arange(10)

In [240]:
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [241]:
arr[1:6]

array([1, 2, 3, 4, 5])

In [244]:
arr2d = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])

In [245]:
arr2d

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [246]:
arr2d[2]

array([7, 8, 9])

In [6]:
arr2d[0][2]

3

In [115]:
# expression arr2d[:2] as “select the first two rows of arr2d.”
arr2d[:2]

array([[1, 2, 3],
       [4, 5, 6]])

In [116]:
# first 2 rows and all columns from 1st pos
arr2d[:2, 1:]

array([[2, 3],
       [5, 6]])

In [117]:
# 1st row, all columns 0th and 1st
arr2d[1, :2]

array([4, 5])

In [118]:
arr2d[:2, 2]

array([3, 6])

In [120]:
arr2d

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [119]:
arr2d[:, 1]

array([2, 5, 8])

In [121]:
arr2d[:2, 1:] = 0

In [122]:
arr2d

array([[1, 0, 0],
       [4, 0, 0],
       [7, 8, 9]])

In [60]:
# Boolean Indexing
names = np.array(['Rajat', 'Maruthi', 'Dinesh', 'Rajat', 'Bhanu', 'Viswa', 'Kamal'])
data = np.random.randn(7, 4)

In [63]:
names.dtype

dtype('<U7')

In [65]:
data

array([[-5.25131420e-02,  9.66919254e-01,  1.25733159e+00,
         1.84060156e+00],
       [ 9.95160734e-02,  1.83623803e+00, -6.62693164e-01,
         1.71028470e+00],
       [-1.41028159e+00,  2.25898433e+00, -7.39260730e-01,
        -4.47324580e-01],
       [-4.38415185e-01,  1.24547084e-01, -1.37590435e+00,
        -1.08643372e+00],
       [ 8.68781186e-01,  1.57640560e-03,  1.23778716e-01,
        -1.44422843e+00],
       [ 2.78820906e-01, -2.43301125e+00,  1.12646986e+00,
         2.73083706e-02],
       [-1.87194615e-01,  7.73843137e-01, -9.94807993e-02,
        -1.97962708e-01]])

In [66]:
# Suppose each name corresponds to a row in the data array and we wanted to 
# select all the rows with corresponding name 'Rajat'.
names == 'Rajat'

array([ True, False, False,  True, False, False, False])

In [67]:
data[names == 'Rajat']

array([[-0.05251314,  0.96691925,  1.25733159,  1.84060156],
       [-0.43841519,  0.12454708, -1.37590435, -1.08643372]])

In [40]:
# The boolean array must be of the same length as the array axis it’s 
# indexing.

In [68]:
# what happens if they are of different lengths
names = np.array(['Rajat', 'Maruthi', 'Rajat', 'Bhanu', 'Viswa' ])
data = np.random.randn(7, 4)

In [69]:
data[names == 'Rajat']

IndexError: boolean index did not match indexed array along dimension 0; dimension is 7 but corresponding boolean dimension is 5

In [130]:
names = np.array(['Rajat', 'Maruthi', 'Dinesh', 'Rajat', 'Bhanu', 'Viswa', 'Kamal'])
data = np.random.randn(7, 4)
data

array([[ 1.56464366, -2.6197451 ,  0.8219025 ,  0.08704707],
       [-0.29900735,  0.09176078, -1.98756891, -0.21967189],
       [ 0.35711257,  1.47789404, -0.51827022, -0.8084936 ],
       [-0.50175704,  0.91540212,  0.32875111, -0.5297602 ],
       [ 0.51326743,  0.09707755,  0.96864499, -0.70205309],
       [-0.32766215, -0.39210815, -1.46351495,  0.29612028],
       [ 0.26105527,  0.00511346, -0.23458713, -1.41537074]])

In [131]:
data[names == 'Rajat', :2]

array([[ 1.56464366, -2.6197451 ],
       [-0.50175704,  0.91540212]])

In [132]:
data[names == 'Rajat', 2]

array([ 0.8219025 ,  0.32875111])

In [133]:
# To select everything but 'Rajat', you can either use != or negate 
# the condition using ~

In [134]:
names != 'Rajat'

array([False,  True,  True, False,  True,  True,  True], dtype=bool)

In [135]:
data[names != 'Rajat']

array([[-0.29900735,  0.09176078, -1.98756891, -0.21967189],
       [ 0.35711257,  1.47789404, -0.51827022, -0.8084936 ],
       [ 0.51326743,  0.09707755,  0.96864499, -0.70205309],
       [-0.32766215, -0.39210815, -1.46351495,  0.29612028],
       [ 0.26105527,  0.00511346, -0.23458713, -1.41537074]])

In [136]:
data[~(names == 'Rajat')]

array([[-0.29900735,  0.09176078, -1.98756891, -0.21967189],
       [ 0.35711257,  1.47789404, -0.51827022, -0.8084936 ],
       [ 0.51326743,  0.09707755,  0.96864499, -0.70205309],
       [-0.32766215, -0.39210815, -1.46351495,  0.29612028],
       [ 0.26105527,  0.00511346, -0.23458713, -1.41537074]])

In [137]:
cond = names == 'Rajat'

In [138]:
data[cond]

array([[ 1.56464366, -2.6197451 ,  0.8219025 ,  0.08704707],
       [-0.50175704,  0.91540212,  0.32875111, -0.5297602 ]])

In [139]:
data[~cond]

array([[-0.29900735,  0.09176078, -1.98756891, -0.21967189],
       [ 0.35711257,  1.47789404, -0.51827022, -0.8084936 ],
       [ 0.51326743,  0.09707755,  0.96864499, -0.70205309],
       [-0.32766215, -0.39210815, -1.46351495,  0.29612028],
       [ 0.26105527,  0.00511346, -0.23458713, -1.41537074]])

In [140]:
# Selecting two of the three names to combine multiple boolean conditions, 
# use boolean arithmetic operators like & (and) and | (or):

In [141]:
# The Python keywords and and or do not work with boolean arrays.
# Use & (and) and | (or) instead.
cond = (names == 'Rajat') | (names == 'Bhanu')

In [142]:
data[cond]

array([[ 1.56464366, -2.6197451 ,  0.8219025 ,  0.08704707],
       [-0.50175704,  0.91540212,  0.32875111, -0.5297602 ],
       [ 0.51326743,  0.09707755,  0.96864499, -0.70205309]])

In [143]:
cond = data > 0

In [144]:
data[cond]

array([ 1.56464366,  0.8219025 ,  0.08704707,  0.09176078,  0.35711257,
        1.47789404,  0.91540212,  0.32875111,  0.51326743,  0.09707755,
        0.96864499,  0.29612028,  0.26105527,  0.00511346])

In [145]:
data[cond] = 0

In [146]:
data

array([[ 0.        , -2.6197451 ,  0.        ,  0.        ],
       [-0.29900735,  0.        , -1.98756891, -0.21967189],
       [ 0.        ,  0.        , -0.51827022, -0.8084936 ],
       [-0.50175704,  0.        ,  0.        , -0.5297602 ],
       [ 0.        ,  0.        ,  0.        , -0.70205309],
       [-0.32766215, -0.39210815, -1.46351495,  0.        ],
       [ 0.        ,  0.        , -0.23458713, -1.41537074]])

In [147]:
# change data for Rajat = 7
data[names != 'Rajat'] = 7

In [148]:
data

array([[ 0.        , -2.6197451 ,  0.        ,  0.        ],
       [ 7.        ,  7.        ,  7.        ,  7.        ],
       [ 7.        ,  7.        ,  7.        ,  7.        ],
       [-0.50175704,  0.        ,  0.        , -0.5297602 ],
       [ 7.        ,  7.        ,  7.        ,  7.        ],
       [ 7.        ,  7.        ,  7.        ,  7.        ],
       [ 7.        ,  7.        ,  7.        ,  7.        ]])

In [149]:
# Transposing Arrays and Swapping Axes
# Transposing is a special form of reshaping that similarly returns a view on the underlying
# data without copying anything

In [88]:
a = np.array([(8,9,10),(11,12,13)])
print(a)

a=a.reshape(6,1)
print(a)

[[ 8  9 10]
 [11 12 13]]
[[ 8]
 [ 9]
 [10]
 [11]
 [12]
 [13]]


In [150]:
arr = np.arange(15)
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [151]:
arr = np.arange(15).reshape((3, 5))
arr

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [152]:
arr.T

array([[ 0,  5, 10],
       [ 1,  6, 11],
       [ 2,  7, 12],
       [ 3,  8, 13],
       [ 4,  9, 14]])

In [153]:
# Universal Functions: Fast Element-Wise Array Functions


In [154]:
arr = np.arange(10)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [155]:
np.sqrt(arr)

array([ 0.        ,  1.        ,  1.41421356,  1.73205081,  2.        ,
        2.23606798,  2.44948974,  2.64575131,  2.82842712,  3.        ])

In [156]:
np.exp(arr)

array([  1.00000000e+00,   2.71828183e+00,   7.38905610e+00,
         2.00855369e+01,   5.45981500e+01,   1.48413159e+02,
         4.03428793e+02,   1.09663316e+03,   2.98095799e+03,
         8.10308393e+03])

In [157]:
np.floor(arr)

array([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.])

In [158]:
# useful in data anlytics
# is not a number
np.isnan(arr)

array([False, False, False, False, False, False, False, False, False, False], dtype=bool)

In [159]:
# binary functions
x = np.random.randn(8)
y = np.random.randn(8)

In [160]:
np.maximum(x, y)

array([-0.07444592, -0.34271452, -0.02651388,  0.06023021,  2.46324211,
        1.8861859 ,  0.30154734,  0.25755039])

In [161]:
np.minimum(x, y)

array([-0.42064532, -1.91877122, -0.80227727, -0.16128571,  0.40405086,
       -0.19236096,  0.17457781, -0.03471177])

In [None]:
# Array-Oriented Programming with Arrays
# NumPy arrays enables you to express many kinds of data processing tasks 
# as concise array expressions that might otherwise require writing loops

In [None]:
# This practice of replacing explicit loops with array expressions 
# is commonly referred to as vectorization.

In [None]:
# vectorized array operations will often be one or two (or more) orders
# of magnitude faster than their pure Python equivalents

In [26]:
# to evaluate the function sqrt(x^2 + y^2) 
# across a regular grid of values.

In [162]:
points = np.arange(-5, 5, 0.01) # 1000 equally spaced points

In [163]:
x = np.arange(10)
y = np.arange(10)

In [164]:
x

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [165]:
y

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [166]:
np.sqrt(np.add(x ** 2 , y ** 2))

array([  0.        ,   1.41421356,   2.82842712,   4.24264069,
         5.65685425,   7.07106781,   8.48528137,   9.89949494,
        11.3137085 ,  12.72792206])

In [None]:
# random number **********************************************

In [253]:
# mean at 0 and standard deviation of 1
# This will return random floats in the half-open interval [0.0, 1.0) 
# following the “continuous uniform” distribution.
np.random.randn(5)

array([ 0.09176078, -1.98756891, -0.21967189,  0.35711257,  1.47789404])

In [254]:
np.random.random((2,2))

array([[ 0.22879817,  0.07697991],
       [ 0.28975145,  0.16122129]])

In [255]:
# Generate A Random Number From The Normal Distribution
np.random.normal()

-0.5182702182736474

In [256]:
# Generate Four Random Numbers From The Normal Distribution
np.random.normal(size=4)

array([ 0.91540212,  0.32875111, -0.5297602 ,  0.51326743])

In [172]:
# Random number from distribution with mean 15, variance 2
np.random.normal(15, 2)

15.137125949612054

In [257]:
# an array of 10 points randomly sampled from a normal distribution
# loc=mean, scale=std deviation
np.random.normal(loc=0.0, scale=1.0, size=10)

array([ 0.09707755,  0.96864499, -0.70205309, -0.32766215, -0.39210815,
       -1.46351495,  0.29612028,  0.26105527,  0.00511346, -0.23458713])

In [174]:
# To return a 8 by 5 array of random numbers from the same distribution: 
np.random.normal(15, 2, size=(3, 4))

array([[ 11.78503353,  15.36926772,  15.51976559,  16.56364574],
       [ 12.52609858,  12.35908677,  16.04388313,  15.59396935],
       [ 15.5009857 ,  15.69289642,  13.63995056,  15.46450739]])

In [176]:
# A 5 by 3 array of random numbers from the standard normal distribution 
# with mean 1 and variance 1:

np.random.normal(size=(2, 4))

array([[ 1.15859558, -0.82068232,  0.96337613,  0.41278093],
       [ 0.82206016,  1.89679298, -0.24538812, -0.75373616]])

In [177]:
# Generate Four Random Integers Between 1 and 100
np.random.randint(low=1, high=100, size=4)

array([22, 30, 38, 51])

In [178]:
# Randomly constructing 1D array
array = np.random.rand(5)
print("1D Array filled with random values : \n", array)

1D Array filled with random values : 
 [ 0.89204656  0.63113863  0.7948113   0.50263709  0.57690388]


In [182]:
# Randomly constructing 1D array following Gaussian Distribution

# 1D Array
array = np.random.normal(0.0, 1.0, 5)
print("1D Array filled with random values "
      "as per gaussian distribution : \n", array)

# 3D array
array = np.random.normal(0.0, 1.0, (2, 3))
print("\n\n2D Array filled with random values "
      "as per gaussian distribution : \n", array)

1D Array filled with random values as per gaussian distribution : 
 [-0.60390437  0.30444912  0.25720749  0.02393181  0.87191399]


3D Array filled with random values as per gaussian distribution : 
 [[ 1.43735633  0.00730637  1.33088133]
 [ 0.98820261  0.23229616  0.17618092]]


In [None]:
# Making random numbers predictable *******************

In [183]:
# Set the state of the random number generator
np.random.seed(42)

In [184]:
# One set of random numbers
first_random_arr = np.random.normal(size=(2, 2))
print(first_random_arr)

# Another set
second_random_arr = np.random.normal(size=(2, 2))
print(second_random_arr)

# Reset the state of the random number generator
np.random.seed(42)

# The same as "first_random_arr" above.
print(np.random.normal(size=(2, 2)))

# The same as "second_random_arr" above.
print(np.random.normal(size=(2, 2)))

[[ 0.49671415 -0.1382643 ]
 [ 0.64768854  1.52302986]]
[[-0.23415337 -0.23413696]
 [ 1.57921282  0.76743473]]
[[ 0.49671415 -0.1382643 ]
 [ 0.64768854  1.52302986]]
[[-0.23415337 -0.23413696]
 [ 1.57921282  0.76743473]]


In [89]:
# Create an object array to hold numbers as well as strings
arr1d_obj = np.array([1, 'a'], dtype='object')
arr1d_obj

array([1, 'a'], dtype=object)

In [90]:
# Convert an array back to a list
arr1d_obj.tolist()

[1, 'a']

In [91]:
# Create a 2d array with 3 rows and 4 columns
list2 = [[1, 2, 3, 4],[3, 4, 5, 6], [5, 6, 7, 8]]

arr2 = np.array(list2, dtype='float')
arr2

array([[ 1.,  2.,  3.,  4.],
       [ 3.,  4.,  5.,  6.],
       [ 5.,  6.,  7.,  8.]])

In [92]:
# Reverse only the row positions
arr2[::-1, ]

array([[ 5.,  6.,  7.,  8.],
       [ 3.,  4.,  5.,  6.],
       [ 1.,  2.,  3.,  4.]])

In [93]:
# Reverse the row and column positions
arr2[::-1, ::-1]

array([[ 8.,  7.,  6.,  5.],
       [ 6.,  5.,  4.,  3.],
       [ 4.,  3.,  2.,  1.]])

In [98]:
# represent missing values and infinite?
# Missing values can be represented using np.nan object, while np.inf represents infinite. 
# Let’s place some in arr2d.

# Insert a nan and an inf
arr2[1,1] = np.nan  # not a number
arr2[1,2] = np.inf  # infinite
arr2

array([[  1.,   2.,   3.,   4.],
       [  3.,  nan,  inf,   6.],
       [  5.,   6.,   7.,   8.]])

In [99]:
# Replace nan and inf with -1. 
missing_bool = np.isnan(arr2) | np.isinf(arr2)
print(missing_bool)

arr2[missing_bool] = -1  
arr2

[[False False False False]
 [False  True  True False]
 [False False False False]]


array([[ 1.,  2.,  3.,  4.],
       [ 3., -1., -1.,  6.],
       [ 5.,  6.,  7.,  8.]])

In [100]:
# compute mean, min, max on the ndarray
# mean, max and min
print("Mean value is: ", arr2.mean())
print("Max value is: ", arr2.max())
print("Min value is: ", arr2.min())

Mean value is:  3.58333333333
Max value is:  8.0
Min value is:  -1.0


In [102]:
# compute the minimum values row wise or column wise, use the np.amin 

# Row wise and column wise min
print("Column wise minimum : ", np.amin(arr2, axis=0))
print("Row wise minimum    : ", np.amin(arr2, axis=1))

Column wise minimum :  [ 1. -1. -1.  4.]
Row wise minimum    :  [ 1. -1.  5.]


In [None]:
# Reshaping and Flattening Multidimensional arrays
# Reshaping is changing the arrangement of items so that shape of the array changes 
# while maintaining the same number of dimensions.

# Flattening, however, will convert a multi-dimensional array to a flat 1d array. 
# And not any other shape.

# First, let’s reshape the arr2 array from 3×4 to 4×3 shape.

In [107]:
# Reshape a 3x4 array to 4x3 array
arr2.reshape(4, 3)

array([[ 100.,    2.,    3.],
       [   4.,    3.,    4.],
       [   5.,    6.,    5.],
       [   6.,    7.,    8.]])

In [None]:
# What is the difference between flatten() and ravel()?
# There are 2 popular ways to implement flattening. That is using the flatten() method 
# and the other using the ravel() method.

# The difference between ravel and flatten is, the new array created using ravel is 
# actually a reference to the parent array. 
# So, any changes to the new array will affect the parent as well. 
# But is memory efficient since it does not create a copy.

In [108]:
# Flatten it to a 1d array
arr2.flatten()

array([ 100.,    2.,    3.,    4.,    3.,    4.,    5.,    6.,    5.,
          6.,    7.,    8.])

In [111]:
# Changing the flattened array does not change parent
b1 = arr2.flatten()  
b1[0] = 105  # changing b1 does not affect arr2
arr2

array([[ 100.,    2.,    3.,    4.],
       [   3.,    4.,    5.,    6.],
       [   5.,    6.,    7.,    8.]])

In [112]:
b1

array([ 105.,    2.,    3.,    4.,    3.,    4.,    5.,    6.,    5.,
          6.,    7.,    8.])

In [113]:
# Changing the raveled array changes the parent also.
b2 = arr2.ravel()  
b2[0] = 101  # changing b2 changes arr2 also
arr2


array([[ 101.,    2.,    3.,    4.],
       [   3.,    4.,    5.,    6.],
       [   5.,    6.,    7.,    8.]])

In [114]:
# create sequences, repetitions 
# Lower limit is 0 be default
print(np.arange(5))  

[0 1 2 3 4]


In [115]:
# 0 to 9
print(np.arange(0, 10))  

[0 1 2 3 4 5 6 7 8 9]


In [116]:
# 0 to 9 with step of 2
print(np.arange(0, 10, 2))  

[0 2 4 6 8]


In [117]:
# 10 to 1, decreasing order
print(np.arange(10, 0, -1))

[10  9  8  7  6  5  4  3  2  1]


In [119]:
# create repeating sequences?
# np.tile will repeat a whole list or array n times. 
# Whereas, np.repeat repeats each item n times.

a = [1,2,3] 

# Repeat whole of 'a' two times
print('Tile:   ', np.tile(a, 2))

# Repeat each element of 'a' two times
print('Repeat: ', np.repeat(a, 2))

Tile:    [1 2 3 1 2 3]
Repeat:  [1 1 2 2 3 3]


In [None]:
# Arrays support vectorised operations, while lists don’t.
# Once an array is created, you cannot change its size. You will have to create a new array 
# or overwrite the existing one.
# Every array has one and only one dtype. All items in it should be of that dtype.
# An equivalent numpy array occupies much less space than a python list of lists.