# Numpy
Author: Amanpreet Singh
(aman64039@gmail.com)

In [None]:

Installation of numpy

conda install numpy

In [6]:
import numpy  as np
np.__version__

'1.12.1'

### How to create ndarrays?

** Let us start with 1-Dimensional (1-D) array: **
    
It is pretty straight-forward - just call function "array" and this function accepts any sequence-like
objects such as list as parameters.

In [11]:
temp_list = [1,2,3]
temp_arr_1d = np.array(temp_list)

In [9]:
temp_arr_1d

array([1, 2, 3])

<b> Built-in attributes </b>


In [12]:
temp_arr_1d.ndim  # shows the dimension

1

In [13]:
temp_arr_1d.shape   # shape of the array

(3,)

In [10]:
temp_arr_1d.dtype   # datatype of the array  -- int32

dtype('int32')

** 2-Dimensional (2-D) array: **

In [25]:
temp_arr_2d = np.array([[1,2,3],
                       [4,5,6]])
temp_arr_2d

array([[1, 2, 3],
       [4, 5, 6]])

In [26]:
temp_arr_2d.shape

(2, 3)

** 3- Dimensional (3-D) array **

In [27]:
temp_arr_3d = np.array([[[1,2,3],
                         [4,5,6]],
                        [[11,21,31],
                         [41,51,61]]])
temp_arr_3d

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[11, 21, 31],
        [41, 51, 61]]])


Simple way to see what is the dimension is to see the number of square brackets in the start -- 3 brackets indicate that this is 3-D array.


In [28]:
temp_arr_3d.shape

(2, 2, 3)

### Creating array by explicitly mentioning the data type

In [30]:
np.array([1.0,2.0,3.0], dtype= 'int32')

array([1, 2, 3])

In [31]:
np.array([1.0,2.0,3.0], dtype= 'float64')

array([ 1.,  2.,  3.])

In [32]:
# If we dont explicitly mention the data type, numpy infers the data type based on the value
np.array([1.0,2.0,3.0]).dtype    # Array of float

dtype('float64')

In [33]:
np.array(['xyz','a','b']).dtype   # Unicode string with number of characters

dtype('<U3')

For more information on data types, refer

https://docs.scipy.org/doc/numpy-1.13.0/reference/arrays.dtypes.html

### Other ways of creating arrays

In [35]:
np.zeros(10)
# Create a length-10 integer array filled with zeros

array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.])

In [38]:
np.ones(5)   # 1-D array of ones of length 5.

array([ 1.,  1.,  1.,  1.,  1.])

In [37]:
np.ones((5,5))   # 2-D array of shape 5,5, which is mentioned as a tuple.

array([[ 1.,  1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.,  1.]])

In [36]:
# Numpy has an equivalent function to built-in Python range function:
np.arange(15)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [39]:
# Array of five values evenly spaced between 0 and 10
np.linspace(0, 10, 5)

array([  0. ,   2.5,   5. ,   7.5,  10. ])

In [40]:
# 3x3 identity matrix
np.eye(3)

array([[ 1.,  0.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  0.,  1.]])

Besides the need to just initialize arrays with zeros or ones, we might need to initialize the arrays with some random values.

In [42]:
# Create a 3x3 array of uniformly distributed random values between 0 and 1
np.random.random((4, 4))

array([[ 0.84637407,  0.66872347,  0.98020195,  0.52586767],
       [ 0.67475621,  0.30095376,  0.44845315,  0.58829923],
       [ 0.91984719,  0.24101943,  0.63351599,  0.3339903 ],
       [ 0.82532002,  0.11518916,  0.61363956,  0.66716507]])

In [43]:
# Array of random integers between 0 (inclusive) and 10 (exclusive)

np.random.randint(0, 10, (4, 4))
# np.random.randint(0, 11, (4, 4))

array([[7, 9, 9, 7],
       [6, 2, 6, 7],
       [8, 1, 7, 6],
       [8, 8, 0, 5]])

In [41]:
# Reshape function allows us to change the shape of the array, for instance, 1-D array could be converted to 2-D array.

np.arange(15).reshape((3,5))

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

## Vectorization

Let us say, we would want to multiple a scalar value 2 to each of the element of the array.

Programmatically, we would iterate through each element in a for loop and multiply by 2. However, numpy arrays, does this operation efficiently and easily.

In [44]:
temp_array_2d = np.array([[1., 2., 3.], 
                          [4., 5., 6.]])
    
temp_array_2d * 2   # Does element-wise operation.
 

array([[  2.,   4.,   6.],
       [  8.,  10.,  12.]])

In [45]:
# Squaring the elements of the array
temp_array_2d * temp_array_2d

array([[  1.,   4.,   9.],
       [ 16.,  25.,  36.]])

In [46]:
temp_array_2d ** 0.5

array([[ 1.        ,  1.41421356,  1.73205081],
       [ 2.        ,  2.23606798,  2.44948974]])

## Access

Accessing of elements in an array is very similar to the way we saw in Python list.

** 1-D array **

In [48]:
temp_array_1d = np.arange(10)
temp_array_1d

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [49]:
temp_array_1d[:5]

array([0, 1, 2, 3, 4])

In [50]:
temp_array_1d[:-1]  # Gets all the elements till the last item (exclusive)

array([0, 1, 2, 3, 4, 5, 6, 7, 8])

In [51]:
temp_array_1d[-5:]  # Last 5 items

array([5, 6, 7, 8, 9])

In [52]:
temp_array_1d[1::2]  # Alternate element, starting with second element.

array([1, 3, 5, 7, 9])

** 2-D array **

In [68]:
temp_array_2d = np.arange(10).reshape(2,5)
temp_array_2d

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

In [69]:
temp_array_2d[0, 0]   # Accessing the zeroth cell i.e. cell at intersection of 1st row and 1st column.

0

In [70]:
temp_array_2d[:, 2]     # ALL rows, 3rd column

array([2, 7])

In [71]:
temp_array_2d[1, :]     # Second row, all columns

array([5, 6, 7, 8, 9])

In [72]:
temp_array_2d[1::2]    # Alternative row

array([[5, 6, 7, 8, 9]])

In [61]:
temp_array_2d[::-1, ::-1]   # Reversing the sub-array

array([[9, 8, 7, 6, 5],
       [4, 3, 2, 1, 0]])

In [66]:
temp_array_2d = temp_array_2d.reshape(5,2)
temp_array_2d

array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7],
       [8, 9]])

To select out a subset of the rows in a particular order, you can simply pass a list or ndarray of integers specifying the desired order:

In [64]:
temp_array_2d[[2, 3, 0]]   # Fancy Indexing

array([[4, 5],
       [6, 7],
       [0, 1]])

** Boolean Indexing **

In [73]:
# Logical Indexing
temp_array_1d > 5   # Output: Boolean array based on the condition

array([False, False, False, False, False, False,  True,  True,  True,  True], dtype=bool)

In [None]:
temp_array_1d[temp_array_1d > 5]  # This returns the array of those positions whose value was true.

# This type of filtering is commonly used when we want to filter an array
# based on values of another array.

In [None]:
temp_data = np.arange(10).reshape(5,2)
cond_array = np.array([1,2,3,4,5])

temp_data[cond_array > 2]

In [None]:
#Boolean values are coerced to 1 (True) and 0 (False) in the above methods

(arr > 0).sum()

In [None]:
# One of the common operation we do in data analysis is replacing some of the value of
# an array to specific value (for instance, 0).

temp_array_1d = np.arange(-5,10)
temp_array_1d[temp_array_1d < 0] = 0   # Replacing all negative values with 0.
temp_array_1d

# Note: This replaces the actual array itself and does not create a copy. So
# be careful before performing this operation.

# There are some vectorized functions available built-in Numpy
# These perform element-wise operation

# Universal Functions: Fast Element-wise Array Functions
#    min, max, across axis
    
# Common statistical functions

temp_array_1d=np.arange(5)
np.sqrt(temp_array_1d)

np.log(temp_array_1d)
np.min(temp_array_1d)
np.max(temp_array_1d)
np.mean(temp_array_1d)
np.sum(temp_array_1d)

temp_array_1d_1 = np.random.randint(0,10, 10)
temp_array_1d_1

temp_array_1d_2 = np.random.randint(0,10, 10)
temp_array_1d_2

np.max(temp_array_1d_1, temp_array_1d_2)

In [None]:
temp_array_2d = np.arange(15).reshape(5,3)
temp_array_2d
np.min(temp_array_2d)

np.min(temp_array_2d, axis=0)
np.min(temp_array_2d, axis=1)

# axis parameter could be provided for functions such as max, sum, mean etc.

temp_array_1d = np.arange(10)

# If you want to identify the index which has the maximum value.
temp_array_1d.argmax()

temp_array_1d = np.array([2, 1, 4, 3, 5])
i = np.argsort(temp_array_1d)
print(i)

# Storing of arrays
# The arrays can be stored in disk as well. 

temp_array_1d = np.arange(10)    
np.save('temp_array_uncompressed', temp_array_1d)   # Uncompressed raw binary format

load_array_uncomp = np.load('temp_array_uncompressed.npy')
load_array_uncomp

# Compressed

np.savez('temp_array_compressed.npz', arr = temp_array_1d)
load_array_compressed = np.load('temp_array_compressed.npz')
load_array_compressed['arr']

In [None]:
##  Broadcasting -- in Linear Algebra
# https://jakevdp.github.io/PythonDataScienceHandbook/02.05-computation-on-arrays-broadcasting.html

# Operations between differently sized arrays is called broadcasting and will be discussed
# in more detail in Chapter 12 -> Python for data anlysis

In [None]:
# Show transpose in linear algebra
temp_array_1d = np.arange(15).reshape(3,5)
temp_array_1d.T


temp_array_1d = np.arange(9).reshape(3,3)
temp_array_1d
np.trace(temp_array_1d)


temp_array_1d_1 = np.arange(9).reshape(3,3)
temp_array_1d_1
temp_array_1d_1.shape

temp_array_1d_2 = np.arange(10,19).reshape(3,3)
temp_array_1d_2
temp_array_1d_1.shape

# For matrix multiplication, the outer dimension of the first array should
# match with inner dimension of second array.

temp_array_1d_1.dot(temp_array_1d_2)