# WHY NUMPY
* Much Faster
* Less Memory than Python Lists
* More Efficient
* More Flexible
* More Robust
* More Versatile
* More Expressive
* More Interoperable
* More Extensible


### that is because it is a library that performs operation in chunks rather than one at a time/iterations, it also uses vectorized operations, which is more efficient than loops.


In [1]:
# NUMPY Arrays
import numpy as np
a = [1, 2, 3, 4, 5]
b = np.array(a)
print(b)
print(type(b))
print(b.shape)
print(b.dtype)


[1 2 3 4 5]
<class 'numpy.ndarray'>
(5,)
int32


In [2]:
# NUMPY Arrays
import numpy as np
a = [1, 2, 3, 4, 5, "hello"]
b = np.array(a)
print(b)
# numpy arrays can only contain homogenious data types
# hence everything in the array is the same as string


['1' '2' '3' '4' '5' 'hello']


In [5]:
# to convert numpy array into integer values
a = [1, 2, 3, 4, 5, "6", 7.6]
b = np.array(a, dtype=int)
print(b)


[1 2 3 4 5 6 7]


In [6]:
a=[1,2,3,4,5,"6",7.6]
b=np.array(a*3)
print(b)

['1' '2' '3' '4' '5' '6' '7.6' '1' '2' '3' '4' '5' '6' '7.6' '1' '2' '3'
 '4' '5' '6' '7.6']


In [11]:
# NUMPY ARRAY FUNCTIONS
b=np.ones(3, dtype=int) # by default, dtype is set to float64
print(b)
b=np.ones(3, dtype=np.float64)
print(b)


[1 1 1]
[1. 1. 1.]
[1 1 1]


In [13]:
b=np.zeros((3,4))
b

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [16]:
b=np.full((5,5),1)
b

array([[1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1]])

In [26]:
b = np.empty(4, dtype=int)  # Create an array of 4 random elements
b = np.arange(4)  # array([0,1,2,3])
b


array([0, 1, 2, 3])

### .arange

In [28]:
b = np.arange(2, 10)
b


array([2, 3, 4, 5, 6, 7, 8, 9])

In [29]:
b = np.arange(2, 10, 2)
b


array([2, 4, 6, 8])

### .linespace 

In [67]:
b = np.linspace(2, 10,10,dtype=int)
print(b)
# by default it takes the step size to be equally spaced. and numebr of samples is 50

## if we want to know the step size then we can use the following command
print(b[1]-b[0])
##we can also use the following command
print(np.diff(b))
##we can also use the following command
print(np.diff(b, n=1))

[ 2  2  3  4  5  6  7  8  9 10]
0
[0 1 1 1 1 1 1 1 1]
[0 1 1 1 1 1 1 1 1]


In [68]:
b = np.linspace(2, 10,10,dtype=int,endpoint=False) # last value is not included
print(b)

[2 2 3 4 5 6 6 7 8 9]


In [75]:
b=np.random.rand(3) # random array of 3 elements ranges from 0 to 1
b=np.random.rand(3,2) # random array of 3x2 elements ranges from 0 to 1
b=np.random.rand(3,2,5) # random array of 3x2x5 elements ranges from 0 to 1
b

array([[[0.79942908, 0.65343437, 0.02547025, 0.94359532, 0.03170676],
        [0.07651936, 0.33138461, 0.52904408, 0.69418192, 0.49003474]],

       [[0.68501631, 0.09525589, 0.66248232, 0.27003014, 0.25820756],
        [0.97678486, 0.2829559 , 0.47709775, 0.86616629, 0.51982083]],

       [[0.2638006 , 0.78903015, 0.7508358 , 0.13649651, 0.66568267],
        [0.35311895, 0.21981422, 0.58469434, 0.94567622, 0.38521284]]])

In [79]:
b=np.random.rand(10)*10
b

array([4.20354325, 5.26389132, 1.52751048, 9.885656  , 2.22349471,
       9.23962556, 5.08329183, 9.88806615, 7.14315293, 5.35527055])

In [84]:
b=np.random.randint(0,10,size=(3,4))
b

array([[9, 6, 7, 8],
       [4, 0, 6, 9],
       [6, 5, 0, 8]])

In [91]:
b=np.round(np.linspace(0,5,9),2)
b

array([0.  , 0.62, 1.25, 1.88, 2.5 , 3.12, 3.75, 4.38, 5.  ])

In [116]:
li=[[ i*np.random.randint(1,5) for i in range(3)]*3 for i in range(5)]
print(li)
arr=np.array(li)
print(arr.data)
print(arr.shape)
print(arr.dtype)
print(arr.ndim)
print(arr.itemsize)
print(arr.size)
print(arr.strides)
# strides will print number of bytes to be skipped to get to next row, and number of bytes to skip to get to next element


[[0, 1, 6, 0, 1, 6, 0, 1, 6], [0, 4, 2, 0, 4, 2, 0, 4, 2], [0, 4, 2, 0, 4, 2, 0, 4, 2], [0, 2, 8, 0, 2, 8, 0, 2, 8], [0, 3, 4, 0, 3, 4, 0, 3, 4]]
<memory at 0x0000019920C2FE10>
(5, 9)
int32
2
4
45
(36, 4)


In [117]:
print(arr)
print(arr[1][0])
print(arr[1, 0])
print(arr[1, 0:2])
print(arr[1:3, 0:2])

[[0 1 6 0 1 6 0 1 6]
 [0 4 2 0 4 2 0 4 2]
 [0 4 2 0 4 2 0 4 2]
 [0 2 8 0 2 8 0 2 8]
 [0 3 4 0 3 4 0 3 4]]
0
0
[0 4]
[[0 4]
 [0 4]]


## Normal Lists in python have strange way to acess the elements for 2D arrays.
## arr[x][y] would be same as getting arr[x] first and then getting the yth index out of that
## But for 2D arrays, we need to use arr[x][y] to get the 2D array element.
## Hence we use numpy arays to access the arr[x][y]th element dierectly.
# this usually happens when we use slicing in 2D arrays. for [x:y][a:b] ranges

In [143]:
i=1
a=[]

for k in range(5):
    b=[]
    for j in range(5):
        b.append(i)
        i+=1
    a.append(b)
print(a)
arr=np.array(a)
print(arr)
# get 11,12,13
a[2][:3]
# get 9,14,19
a[1:][2] # in lsit we cant 
arr[1:,3] # in numpy array we can
# last two rows
a[3:]
arr[3:]
# 7,8,12,13 making square
a[1:3][1:3] # in list we cant
arr[1:3,1:3] # in numpy array we can

[[1, 2, 3, 4, 5], [6, 7, 8, 9, 10], [11, 12, 13, 14, 15], [16, 17, 18, 19, 20], [21, 22, 23, 24, 25]]
[[ 1  2  3  4  5]
 [ 6  7  8  9 10]
 [11 12 13 14 15]
 [16 17 18 19 20]
 [21 22 23 24 25]]


array([[ 7,  8],
       [12, 13]])

In [149]:
# In list we cant directly write list=list+1
# we will have to iterate over list and add 1 to each element
# then we will have to convert list to np.array
# instead we can use np.array(list) + 1 or directly arr=arr+1
# as these are vectorised these are fast and dont requrie looping
a=np.array([1,2,3,4,5])
print(a)
print(a+1)
print(a.mean())
print(a.std())
print(a.var())
print(a.max())
print(a.min())
print(a.sum())
print(a.argmax()) # index of max element
print(a.argmin()) # index of min value
print(a.cumsum())
print(a.cumprod())
print(np.logical_not(a)) # logical not
a>a+1


[1 2 3 4 5]
[2 3 4 5 6]
3.0
1.4142135623730951
2.0
5
1
15
4
0
[ 1  3  6 10 15]
[  1   2   6  24 120]
[False False False False False]


array([False, False, False, False, False])

## Boolean Indexing
## 1D

In [161]:
a=[i+1 for i in range (50)]
a=np.array(a)
boolA=a>40
b=a[boolA]
print(b)
b=a[(a>40)&(a<45)]
print(b)
b[:2]=69
print(b)
index=np.where(b==69)
print(index)
a[index]

[41 42 43 44 45 46 47 48 49 50]
[41 42 43 44]
[69 69 43 44]
(array([0, 1], dtype=int64),)


array([1, 2])

In [15]:
import numpy as np
a=[1,3,5,7,9,11,13,15,17,19,20,20]
a=np.array(a)
# Find Multiple of 3
b=a[a%3==0]
print(b)
index=np.where(a%3==0)
print(index)
# Change odd numbers to -1
a[a%2==1]=-1
print(a)
# change first occurence of max to 0
b=a.argmax()
a[b]=0
# change all occurences of max to 0
# a[a==a.max()]=0
print(a)

[ 3  9 15]
(array([1, 4, 7], dtype=int64),)
[-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 20 20]
[-1 -1 -1 -1 -1 -1 -1 -1 -1 -1  0 20]


## Boolean Indexing 
# 2D

In [25]:
a=np.random.randint(0,50,size=(5,4))
print(a)

[[ 8 17 47 32]
 [26 22 27  5]
 [24 22 13 26]
 [ 3 19 49 44]
 [28 10 17 36]]


In [18]:
a>20

array([[ True,  True, False,  True],
       [ True,  True, False,  True],
       [ True,  True, False,  True],
       [False,  True, False, False],
       [False,  True,  True, False]])

In [33]:
# incase we need to change values in a 2d array specific to some row or column 
# this is a good example of how to do that
# suppose we need to change third column values to 100 where values are less than 18

boolB=a[:,2]<18
a[boolB,2]=100
a

array([[  8,  17,  32,  47],
       [ 26,  22,  27,   5],
       [ 24,  22, 100,  26],
       [  3,  19,  49,  44],
       [ 28,  10, 100,  36]])

In [40]:
a[:,3].sort()
a
a[0].size

4

## NumPy Broadcasting


In [41]:
import numpy as np
x=np.random.randint(1,10,(3,3))
y=np.random.randint(1,10,(3,3))
print(x)
print(y)

[[2 5 7]
 [9 7 9]
 [8 2 8]]
[[1 6 4]
 [9 5 3]
 [2 1 8]]


In [43]:
ans=x-y
print(ans)
## this was the simple as dimensions are compatible

[[ 1 -1  3]
 [ 0  2  6]
 [ 6  1  0]]


In [44]:
import numpy as np
x=np.random.randint(1,10,(3,3))
y=np.random.randint(1,10,(3))
print(x)
print(y)

[[9 4 1]
 [4 2 2]
 [1 8 5]]
[1 1 4]


In [45]:
ans=x-y
print(ans)

# in this the 1D array is repeated thrice to make it compatible for element by element operations/subtraction here


[[ 8  3 -3]
 [ 3  1 -2]
 [ 0  7  1]]


In [47]:
x=np.random.randint(1,10,(3,2))
y=np.random.randint(1,10,(2,3))
print(x)
print(y)

[[3 2]
 [8 5]
 [9 8]]
[[6 7 2]
 [7 3 6]]


In [50]:
#another example
ans=x-y
print(ans)

ValueError: operands could not be broadcast together with shapes (3,2) (2,3) 

In [56]:
# one of the options to make two arrrays compatible is to use the np.transpose, np.reshape or np.resize functions
#another example
x=np.random.randint(1,10,(3,2))
y=np.random.randint(1,10,(2,3))
print(x)
print(y)
y=np.transpose(y)
print(y)
ans=x-y
print(ans)

[[7 7]
 [8 4]
 [3 2]]
[[2 8 2]
 [8 9 6]]
[[2 8]
 [8 9]
 [2 6]]
[[ 5 -1]
 [ 0 -5]
 [ 1 -4]]


In [None]:
# so this was broadcasting in numpy where array is usually made automatically compatible to another np.array
# for example if there is an array of (1,3) and (1,2) then (1,2) will be broadcasted to (1,3)

# APplying everything we learnt till now to a csv file.

In [10]:
import numpy as np
import csv

fileObj = open('year2017.csv')
data = csv.DictReader(fileObj, skipinitialspace=True)

killed = []
wounded = []
country = []
# getting as separate arrays from daata
for row in data:
    killed.append(row['Killed'])
    wounded.append(row['Wounded'])
    country.append(row['Country'])

# conversion of arrays into np arrays
np_killed = np.array(killed)
np_wounded = np.array(wounded)

# values converted to 0.0 where there are '' as we are not able to convert them into flaot
np_killed[np_killed == ''] = '0.0'
np_wounded[np_wounded == ''] = '0.0'

# finally the conversion of all valeus into flaot
np_killed = np.array(np_killed, dtype=float)
np_wounded = np.array(np_wounded, dtype=float)

# final kill wounded SUm
killWounded = np_killed+np_wounded

# extration of data countrywise
np_country = np.array(country)
bool_arr = np_country == 'India'
ans=killWounded[bool_arr]

# total number of killed and wounded people from India
print(sum(ans))
# total number of killed and wounded people irrespective of their countries 
print(sum(killWounded))

1167.0
51372.0
