In [1]:
import numpy as np

In [2]:
np.__version__

'1.18.1'

## Array Indexing

The important thing to remember is that indexing in Python starts at zero.|

In [3]:
x1 = np.array([4,3,4,4,8,4])

In [4]:
x1

array([4, 3, 4, 4, 8, 4])

In [5]:
# assess value to index zero
x1[0]

4

In [6]:
# assess fifth value
x1[4]

8

In [7]:
# get the last value
x1[-1]

4

In [8]:
# get the second last value
x1[-2]

8

In [11]:
# in a multidimensional array , we need to specify row and column index
x2 = np.array([[3,7,5,5],[0,1,5,9],[3,0,5,0]])

In [12]:
x2

array([[3, 7, 5, 5],
       [0, 1, 5, 9],
       [3, 0, 5, 0]])

In [13]:
# 1st row and 2nd column value
x2[2,3]

0

In [16]:
x2[1,3]

9

In [20]:
# 3rd row and last value from the 3rd column
x2[2,-1]

0

In [21]:
# replace value at 0,0 index
x2[0,0] = 13

In [22]:
x2

array([[13,  7,  5,  5],
       [ 0,  1,  5,  9],
       [ 3,  0,  5,  0]])

## Array Slicing


Now, we'll learn to access multiple or a range of elements from an array

In [24]:
y = np.arange(10)
y

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [25]:
# from start to 4th position
y[:5]

array([0, 1, 2, 3, 4])

In [27]:
# from 4th to end
y[4:]

array([4, 5, 6, 7, 8, 9])

In [28]:
# from 4th to 6th position
y[4:7]

array([4, 5, 6])

In [32]:
# return elements at even place
y[::2]

array([0, 2, 4, 6, 8])

In [33]:
# return elements from first position step by two
y[1::2]

array([1, 3, 5, 7, 9])

In [34]:
# reverse the array
y[::-1]

array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])

## Array Concatenation

In [36]:
x = np.array([3,4,5])
grid = np.array([[1,2,3],[17,18,19]])
np.vstack([x,grid])

array([[ 3,  4,  5],
       [ 1,  2,  3],
       [17, 18, 19]])

In [40]:
# similarly, you can add an array using np.hstack
grid = np.array([[1,2,3],[17,18,19]])
z = np.array([[9],[9]])
np.hstack([grid,z])

array([[ 1,  2,  3,  9],
       [17, 18, 19,  9]])

## Getting Started with Pandas

In [41]:
import pandas as pd

In [42]:
data = pd.DataFrame({'Country':
                     ['Russia','Colombia','Chile','Equador','Nigeria'],
                     'Rank':[121,40,100,130,11]})
data

Unnamed: 0,Country,Rank
0,Russia,121
1,Colombia,40
2,Chile,100
3,Equador,130
4,Nigeria,11


In [43]:
data.describe()

Unnamed: 0,Rank
count,5.0
mean,80.4
std,52.300096
min,11.0
25%,40.0
50%,100.0
75%,121.0
max,130.0


In [45]:
# Let's create another data frame.
data = pd.DataFrame({'group':['a','a','a','b','b','b','c','c','c'],'ounces':[4,3,12,6,7.5,8,3,5,6]})
data

Unnamed: 0,group,ounces
0,a,4.0
1,a,3.0
2,a,12.0
3,b,6.0
4,b,7.5
5,b,8.0
6,c,3.0
7,c,5.0
8,c,6.0


In [46]:
# let's sort the data frame by ounces - inplace = true will make changes to the data
data.sort_values(by=['ounces'],ascending=True,inplace=False)

Unnamed: 0,group,ounces
1,a,3.0
6,c,3.0
0,a,4.0
7,c,5.0
3,b,6.0
8,c,6.0
4,b,7.5
5,b,8.0
2,a,12.0


Still, you can sort the data by not just one column but numerous columns as
well.


In [47]:
data.sort_values(by=['group','ounces'],ascending=[True,False],inplace=False)

Unnamed: 0,group,ounces
2,a,12.0
0,a,4.0
1,a,3.0
5,b,8.0
4,b,7.5
3,b,6.0
8,c,6.0
7,c,5.0
6,c,3.0


Typically, we get data sets with duplicate rows, which are just noise. As a
result, before we train the model, we need to ensure we eliminate such
inconsistencies within the data set. Here is how we can remove duplicate
rows.

In [49]:
# sort values
data.sort_values(by='group')

Unnamed: 0,group,ounces
0,a,4.0
1,a,3.0
2,a,12.0
3,b,6.0
4,b,7.5
5,b,8.0
6,c,3.0
7,c,5.0
8,c,6.0


In this case, we will follow these steps:

In [50]:
data = pd.DataFrame({'food':['bacon','pulled pork','bacon','Pastrami','corned beef','Becon','pastrami','honey ham','nova lox'],'ounces':[4,3,12,6,7.5,8,3,5,5]})
data

Unnamed: 0,food,ounces
0,bacon,4.0
1,pulled pork,3.0
2,bacon,12.0
3,Pastrami,6.0
4,corned beef,7.5
5,Becon,8.0
6,pastrami,3.0
7,honey ham,5.0
8,nova lox,5.0
