# NumPy Basics: Arrays and Vectorized Computation

In [2]:
import numpy as np

# 1. The NumPy ndarray
An ndarray is a generic multidimensional container for homogeneous data; that is, all its elements must be of the same type.

In [4]:
# Generate an ndarray containing some random data
data = np.random.rand(2, 3)

In [5]:
data

array([[0.58474246, 0.14572465, 0.15603257],
       [0.0635169 , 0.40668985, 0.17769668]])

In [6]:
data.shape 
# to see the row and colums.

(2, 3)

In [7]:
data.dtype
# type of data what is this type of data

dtype('float64')

## Creating ndarrays
The easiest way to create a numpy array is to use the np.array function. 

In [8]:
data1 = [3, 9, 12, 7, 4, 80, 3]

In [9]:
arr1 = np.array(data1)

In [10]:
arr1

array([ 3,  9, 12,  7,  4, 80,  3])

In [None]:
type(arr1)

In [11]:
arr1.shape
# amount of numbers in the list

(7,)

np.array tries to infer a good data type for the array that it creates. 

In [12]:
arr1.dtype

dtype('int32')

In [13]:
data2 = [[1, 2, 3, 4], [5, 6, 7, 8]]

In [14]:
arr2 = np.array(data2)

In [15]:
arr2

array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

In [16]:
arr2.shape
# check the row and colums

(2, 4)

In [17]:
arr2.dtype

dtype('int32')

In [18]:
np.zeros(5)

array([0., 0., 0., 0., 0.])

In [19]:
np.zeros((3, 2))

array([[0., 0.],
       [0., 0.],
       [0., 0.]])

In [20]:
range(10)

range(0, 10)

In [21]:
list(range(10))

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [22]:
np.arange(10)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [23]:
list(range(4, 10))

[4, 5, 6, 7, 8, 9]

## Exercise
Create a vector (i.e., an np array) with values ranging from 10 to 49

In [27]:
np.arange(10,50)

array([10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
       27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
       44, 45, 46, 47, 48, 49])

## 2. Arithmetics with NumPy arrays

### Any arithmetic operations apply element-wise

In [33]:
arr = np.array([[1, 2, 3], [4, 5, 6]])

In [34]:
arr

array([[1, 2, 3],
       [4, 5, 6]])

In [31]:
arr + arr

array([[ 2,  4,  6],
       [ 8, 10, 12]])

In [32]:
arr - arr

array([[0, 0, 0],
       [0, 0, 0]])

#### Arrays are important because they enable you to express batch operations on data without writing any for loops.

In [36]:
b = np.array([[7, 1, 2], [8, 3, 0]])

In [37]:
arr + b

array([[ 8,  3,  5],
       [12,  8,  6]])

In [38]:
arr * arr

array([[ 1,  4,  9],
       [16, 25, 36]])

In [39]:
arr

array([[1, 2, 3],
       [4, 5, 6]])

In [40]:
1/arr

array([[1.        , 0.5       , 0.33333333],
       [0.25      , 0.2       , 0.16666667]])

## 3. Basic Indexing and Slicing

### Selecting a subset of your data

In [41]:
arr = np.arange(10)

In [42]:
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [43]:
arr[5]

5

In [44]:
arr[5:8]

array([5, 6, 7])

In [46]:
arr[5] = 0

In [47]:
arr

array([0, 1, 2, 3, 4, 0, 6, 7, 8, 9])

In [50]:
arr[5:8] = 10

In [51]:
arr

array([ 0,  1,  2,  3,  4, 10, 10, 10,  8,  9])

### Array slices are views, i.e., modifications of views are reflected in the source array

In [52]:
arr

array([ 0,  1,  2,  3,  4, 10, 10, 10,  8,  9])

In [53]:
arr_slice = arr[2:4]

In [54]:
arr_slice

array([2, 3])

In [55]:
arr_slice[0] = 12

In [56]:
arr_slice

array([12,  3])

In [57]:
arr

array([ 0,  1, 12,  3,  4, 10, 10, 10,  8,  9])

In [58]:
arr_copy = arr[3:5].copy()

In [59]:
arr_copy

array([3, 4])

In [60]:
arr_copy[0] = 100
arr_copy

array([100,   4])

In [61]:
arr

array([ 0,  1, 12,  3,  4, 10, 10, 10,  8,  9])

In [62]:
arr2d = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])

In [63]:
arr2d

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [64]:
arr2d[0][2]

3

### You can pass a comma-separated list of indices to select individual elements

In [65]:
arr2d[0, 2]

3

![alt text](indexing.png "Indexing")


## 4. Boolean Indexing

In [80]:
names = np.array(['Aaron', 'Bob', 'Cate', 'Aaron', 'Bob', 'Aaron', 'Bob'])

In [76]:
names

array(['Aaron', 'Bob', 'Cate', 'Aaron', 'Bob', 'Aaron', 'Bob'],
      dtype='<U5')

In [77]:
names == 'Bob'

array([False,  True, False, False,  True, False,  True])

In [82]:
data = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12], [13, 14, 15], [16, 17, 18], [19, 20, 21]])

In [79]:
data

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12],
       [13, 14, 15],
       [16, 17, 18],
       [19, 20, 21]])

In [83]:
data[[True, False, False, False, False, False, True]]

array([[ 1,  2,  3],
       [19, 20, 21]])

In [84]:
names == 'Bob'

array([False,  True, False, False,  True, False,  True])

In [None]:
data[names == 'Bob']

numpy.nonzero (_a_): 
    
Return the indices of the elements in _a_ that are non-zero.

In [85]:
np.nonzero(names == 'Bob')

(array([1, 4, 6], dtype=int64),)

In [86]:
data[names == 'Bob']

array([[ 4,  5,  6],
       [13, 14, 15],
       [19, 20, 21]])

In [87]:
data[names == 'Bob', 1]

array([ 5, 14, 20])

In [None]:
data[names == 'Bob', 1:]

In [None]:
data

In [None]:
data[:,0]

In [None]:
cond = names == 'Bob'
data[cond]

In [None]:
cond

In [None]:
~cond

In [None]:
data[~cond]

In [None]:
data

In [None]:
data > 10

In [None]:
data[data > 10] = 0

In [73]:
data

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12],
       [13, 14, 15],
       [16, 17, 18],
       [19, 20, 21]])

In [None]:

deaths = np.array([4352, 233, 3245, 256, 2394])

indices = np.where(deaths < 500)[0]
print(f"Indices of battles with less than 500 deaths: {indices}")
deaths_less_than_500 = deaths[deaths < 500]
total_deaths = np.sum(deaths_less_than_500)
print(f"Total civilian deaths in battles with less than 500 deaths: {total_deaths}")


## Exercise
1. Create a one-dimensional array of civilian deaths in 5 battles:  
    4352, 233, 3245, 256, 2394
2. Find the index of battles with less than 500 deaths
3. Find the number of civilian deaths in battles with less than 500 deaths using boolean indexing


In [90]:
deaths=np.array([4352,233,3245,256,2394])

In [101]:
deaths_less_than_500=deaths[deaths<500]
deaths

array([4352,  233, 3245,  256, 2394])

In [103]:
deaths<500

array([False,  True, False,  True, False])

In [106]:
np.nonzero(deaths<500)

(array([1, 3], dtype=int64),)

In [116]:
indices = np.where(deaths < 500)[0]
indices

array([1, 3], dtype=int64)

In [66]:
data = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12], [13, 14, 15], [16, 17, 18], [19, 20, 21]])

In [89]:
data

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12],
       [13, 14, 15],
       [16, 17, 18],
       [19, 20, 21]])

In [68]:
data[[2, 4]]

array([[ 7,  8,  9],
       [13, 14, 15]])

In [117]:
data[[4, 1]]

array([[13, 14, 15],
       [ 4,  5,  6]])

In [118]:
data[4, 1:]

array([14, 15])

In [119]:
data[:2]

array([[1, 2, 3],
       [4, 5, 6]])

In [120]:
data[:2, 1:]

array([[2, 3],
       [5, 6]])

In [121]:
data.T

array([[ 1,  4,  7, 10, 13, 16, 19],
       [ 2,  5,  8, 11, 14, 17, 20],
       [ 3,  6,  9, 12, 15, 18, 21]])

## 5. Universal Functions: Fast Element-Wise Array Functions
A function that performs element-wise operations on data in ndarrays.

In [None]:
data

In [None]:
np.sqrt(data)

In [None]:
np.exp(data)

In [None]:
np.square(data)

In [None]:
np.mean(data)

In [None]:
data.mean()

In [None]:
data.sum()

In [None]:
np.add(data, data)

In [None]:
data + data

In [None]:
arr = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
arr

### arr.sum(1) means “compute sum across the columns” whereas arr.sum(0) means “compute sum down the rows.”

In [None]:
arr.sum(axis = 0)

In [None]:
arr.sum(0)

In [None]:
arr.sum(axis = 1)

In [None]:
arr.sum()

In [None]:
arr.mean(axis = 0)

In [None]:
arr.mean(axis = 1)

## 6. Expressing Conditional Logic as Array Operations

In [122]:
xarr = np.array([1, 2, 3, 4, 5])
yarr = np.array([11, 12, 13, 14, 15])

In [123]:
xarr

array([1, 2, 3, 4, 5])

In [124]:
yarr

array([11, 12, 13, 14, 15])

In [125]:
cond = np.array([True, False, True, False, False])

In [None]:
result = np.where(cond, xarr, yarr)

In [None]:
result

In [None]:
arr = np.array([1, 3, -2, 5, -7])

In [None]:
arr

In [None]:
arr > 0 

In [None]:
# Calculate number of positive values
(arr>0).sum() 

In [None]:
arr.sort()

In [None]:
arr

In [None]:
sorted(arr)

In [None]:
arr = np.array([1, 3, -2, 5, -7])
sorted(arr)
arr

In [126]:
arr = np.array([1, 3, -2, 5, -7])
arr.sort()
arr

array([-7, -2,  1,  3,  5])

## Exercise
Write a NumPy program to sort 4 elements from beginning of a given array of 10 elements.

In [132]:
arr = np.array([4, 9, 1, -3, 31, 23, 4, 0, 7, 2])

In [133]:
# Expected result
# result = np.array([-3, 1, 4, 9, 31, 23, 4, 0, 7, 2])

In [136]:
arr[:4]=np.sort(arr[:4])
print(arr)

[-3  1  4  9 31 23  4  0  7  2]


In [137]:
arr_slice=arr[0:4]
arr_slice

array([-3,  1,  4,  9])

In [139]:
arr_slice.sort()
arr_slice

array([-3,  1,  4,  9])

In [140]:
arr

array([-3,  1,  4,  9, 31, 23,  4,  0,  7,  2])

In [135]:
ints = np.array([3, 1, 8, 1, 3, 2, 8, 4])

In [None]:
ints.sort()

In [None]:
ints

In [None]:
ints = np.array([3, 1, 8, 1, 3, 2, 8, 4])

In [None]:
np.unique(ints)