# What is numpy 
Numpy is a Python module for doing calculations on tables of data. Pandas was actually built using Numpy as it’s foundation.

## Converting from a Pandas Series to a Numpy Array
The values attribute of a Pandas Series give the data as a numpy array.

In [1]:
import pandas as pd
df = pd.read_csv("titanic.csv")
df.head()

Unnamed: 0,Survived,Pclass,Sex,Age,Siblings/Spouses,Parents/Children,Fare
0,0,3,male,22.0,1,0,7.25
1,1,1,female,38.0,1,0,71.2833
2,1,3,female,26.0,0,0,7.925
3,1,1,female,35.0,1,0,53.1
4,0,3,male,35.0,0,0,8.05


In [5]:
# get np array by values() method on dataframe
np_array = df['Fare'].head().values
np_array

array([ 7.25  , 71.2833,  7.925 , 53.1   ,  8.05  ])

In [6]:
type(np_array)

numpy.ndarray

## Converting from a Pandas DataFrame to a Numpy Array
The values attribute of a Pandas DataFrame give the data as a 2d numpy array.

In [7]:
df[["Pclass","Fare","Age"]].head()

Unnamed: 0,Pclass,Fare,Age
0,3,7.25,22.0
1,1,71.2833,38.0
2,3,7.925,26.0
3,1,53.1,35.0
4,3,8.05,35.0


In [9]:
nd_array = df[["Pclass","Fare","Age"]].values
nd_array

array([[ 3.    ,  7.25  , 22.    ],
       [ 1.    , 71.2833, 38.    ],
       [ 3.    ,  7.925 , 26.    ],
       ...,
       [ 3.    , 23.45  ,  7.    ],
       [ 1.    , 30.    , 26.    ],
       [ 3.    ,  7.75  , 32.    ]])

In [11]:
type(nd_array)

numpy.ndarray

In [12]:
df.shape

(887, 7)

In [13]:
nd_array.shape

(887, 3)

## Numpy Shape Attribute
Use the shape attribute to find the number of rows and number columns for a Numpy array. You can also use the shape attribute on a pandas DataFrame (df.shape).

In [15]:
arr = df[['Pclass','Fare','Age']].values
arr

array([[ 3.    ,  7.25  , 22.    ],
       [ 1.    , 71.2833, 38.    ],
       [ 3.    ,  7.925 , 26.    ],
       ...,
       [ 3.    , 23.45  ,  7.    ],
       [ 1.    , 30.    , 26.    ],
       [ 3.    ,  7.75  , 32.    ]])

In [16]:
arr.shape

(887, 3)

## Select from a Numpy Array


In [18]:
arr = df[['Pclass','Fare','Age']].values
arr

array([[ 3.    ,  7.25  , 22.    ],
       [ 1.    , 71.2833, 38.    ],
       [ 3.    ,  7.925 , 26.    ],
       ...,
       [ 3.    , 23.45  ,  7.    ],
       [ 1.    , 30.    , 26.    ],
       [ 3.    ,  7.75  , 32.    ]])

In [19]:
# select single element
arr[0,1]

7.25

In [21]:
# select single row
arr[0]

array([ 3.  ,  7.25, 22.  ])

In [22]:
# select single column
arr[:,2]

array([22.  , 38.  , 26.  , 35.  , 35.  , 27.  , 54.  ,  2.  , 27.  ,
       14.  ,  4.  , 58.  , 20.  , 39.  , 14.  , 55.  ,  2.  , 23.  ,
       31.  , 22.  , 35.  , 34.  , 15.  , 28.  ,  8.  , 38.  , 26.  ,
       19.  , 24.  , 23.  , 40.  , 48.  , 18.  , 66.  , 28.  , 42.  ,
       18.  , 21.  , 18.  , 14.  , 40.  , 27.  ,  3.  , 19.  , 30.  ,
       20.  , 27.  , 16.  , 18.  ,  7.  , 21.  , 49.  , 29.  , 65.  ,
       46.  , 21.  , 28.5 ,  5.  , 11.  , 22.  , 38.  , 45.  ,  4.  ,
       64.  ,  7.  , 29.  , 19.  , 17.  , 26.  , 32.  , 16.  , 21.  ,
       26.  , 32.  , 25.  , 23.  , 28.  ,  0.83, 30.  , 22.  , 29.  ,
       31.  , 28.  , 17.  , 33.  , 16.  , 20.  , 23.  , 24.  , 29.  ,
       20.  , 46.  , 26.  , 59.  , 22.  , 71.  , 23.  , 34.  , 34.  ,
       28.  , 29.  , 21.  , 33.  , 37.  , 28.  , 21.  , 29.  , 38.  ,
       28.  , 47.  , 14.5 , 22.  , 20.  , 17.  , 21.  , 70.5 , 29.  ,
       24.  ,  2.  , 21.  , 19.  , 32.5 , 32.5 , 54.  , 12.  , 19.  ,
       24.  ,  2.  ,

## Masking
A mask is a boolean array (True/False values) that tells us which values from the array we’re interested in.

In [25]:
arr = df[['Pclass','Fare','Age']].values

In [26]:
arr

array([[ 3.    ,  7.25  , 22.    ],
       [ 1.    , 71.2833, 38.    ],
       [ 3.    ,  7.925 , 26.    ],
       ...,
       [ 3.    , 23.45  ,  7.    ],
       [ 1.    , 30.    , 26.    ],
       [ 3.    ,  7.75  , 32.    ]])

In [27]:
# get column Age
arr[:,2]

array([22.  , 38.  , 26.  , 35.  , 35.  , 27.  , 54.  ,  2.  , 27.  ,
       14.  ,  4.  , 58.  , 20.  , 39.  , 14.  , 55.  ,  2.  , 23.  ,
       31.  , 22.  , 35.  , 34.  , 15.  , 28.  ,  8.  , 38.  , 26.  ,
       19.  , 24.  , 23.  , 40.  , 48.  , 18.  , 66.  , 28.  , 42.  ,
       18.  , 21.  , 18.  , 14.  , 40.  , 27.  ,  3.  , 19.  , 30.  ,
       20.  , 27.  , 16.  , 18.  ,  7.  , 21.  , 49.  , 29.  , 65.  ,
       46.  , 21.  , 28.5 ,  5.  , 11.  , 22.  , 38.  , 45.  ,  4.  ,
       64.  ,  7.  , 29.  , 19.  , 17.  , 26.  , 32.  , 16.  , 21.  ,
       26.  , 32.  , 25.  , 23.  , 28.  ,  0.83, 30.  , 22.  , 29.  ,
       31.  , 28.  , 17.  , 33.  , 16.  , 20.  , 23.  , 24.  , 29.  ,
       20.  , 46.  , 26.  , 59.  , 22.  , 71.  , 23.  , 34.  , 34.  ,
       28.  , 29.  , 21.  , 33.  , 37.  , 28.  , 21.  , 29.  , 38.  ,
       28.  , 47.  , 14.5 , 22.  , 20.  , 17.  , 21.  , 70.5 , 29.  ,
       24.  ,  2.  , 21.  , 19.  , 32.5 , 32.5 , 54.  , 12.  , 19.  ,
       24.  ,  2.  ,

In [28]:
mask = arr[:,2] < 18

In [29]:
mask

array([False, False, False, False, False, False, False,  True, False,
        True,  True, False, False, False,  True, False,  True, False,
       False, False, False, False,  True, False,  True, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False,  True, False, False,  True, False, False,
       False, False,  True, False,  True, False, False, False, False,
       False, False, False,  True,  True, False, False, False,  True,
       False,  True, False, False,  True, False, False,  True, False,
       False, False, False, False, False,  True, False, False, False,
       False, False,  True, False,  True, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False,  True, False, False,  True, False, False, False,
       False,  True, False, False, False, False, False,  True, False,
       False,  True,

In [30]:
arr[mask]

array([[  3.    ,  21.075 ,   2.    ],
       [  2.    ,  30.0708,  14.    ],
       [  3.    ,  16.7   ,   4.    ],
       [  3.    ,   7.8542,  14.    ],
       [  3.    ,  29.125 ,   2.    ],
       [  3.    ,   8.0292,  15.    ],
       [  3.    ,  21.075 ,   8.    ],
       [  3.    ,  11.2417,  14.    ],
       [  2.    ,  41.5792,   3.    ],
       [  3.    ,  21.6792,  16.    ],
       [  3.    ,  39.6875,   7.    ],
       [  2.    ,  27.75  ,   5.    ],
       [  3.    ,  46.9   ,  11.    ],
       [  3.    ,  27.9   ,   4.    ],
       [  3.    ,  15.2458,   7.    ],
       [  3.    ,   7.925 ,  17.    ],
       [  3.    ,  46.9   ,  16.    ],
       [  2.    ,  29.    ,   0.83  ],
       [  2.    ,  10.5   ,  17.    ],
       [  3.    ,  34.375 ,  16.    ],
       [  3.    ,  14.4542,  14.5   ],
       [  3.    ,  14.4583,  17.    ],
       [  3.    ,  31.275 ,   2.    ],
       [  3.    ,  11.2417,  12.    ],
       [  3.    ,  22.3583,   2.    ],
       [  3.    ,   9.216

In [31]:
arr_mask = arr[mask]

In [32]:
arr_mask

array([[  3.    ,  21.075 ,   2.    ],
       [  2.    ,  30.0708,  14.    ],
       [  3.    ,  16.7   ,   4.    ],
       [  3.    ,   7.8542,  14.    ],
       [  3.    ,  29.125 ,   2.    ],
       [  3.    ,   8.0292,  15.    ],
       [  3.    ,  21.075 ,   8.    ],
       [  3.    ,  11.2417,  14.    ],
       [  2.    ,  41.5792,   3.    ],
       [  3.    ,  21.6792,  16.    ],
       [  3.    ,  39.6875,   7.    ],
       [  2.    ,  27.75  ,   5.    ],
       [  3.    ,  46.9   ,  11.    ],
       [  3.    ,  27.9   ,   4.    ],
       [  3.    ,  15.2458,   7.    ],
       [  3.    ,   7.925 ,  17.    ],
       [  3.    ,  46.9   ,  16.    ],
       [  2.    ,  29.    ,   0.83  ],
       [  2.    ,  10.5   ,  17.    ],
       [  3.    ,  34.375 ,  16.    ],
       [  3.    ,  14.4542,  14.5   ],
       [  3.    ,  14.4583,  17.    ],
       [  3.    ,  31.275 ,   2.    ],
       [  3.    ,  11.2417,  12.    ],
       [  3.    ,  22.3583,   2.    ],
       [  3.    ,   9.216

In [33]:
new_mask = arr[arr[:,2] < 18]

In [34]:
new_mask

array([[  3.    ,  21.075 ,   2.    ],
       [  2.    ,  30.0708,  14.    ],
       [  3.    ,  16.7   ,   4.    ],
       [  3.    ,   7.8542,  14.    ],
       [  3.    ,  29.125 ,   2.    ],
       [  3.    ,   8.0292,  15.    ],
       [  3.    ,  21.075 ,   8.    ],
       [  3.    ,  11.2417,  14.    ],
       [  2.    ,  41.5792,   3.    ],
       [  3.    ,  21.6792,  16.    ],
       [  3.    ,  39.6875,   7.    ],
       [  2.    ,  27.75  ,   5.    ],
       [  3.    ,  46.9   ,  11.    ],
       [  3.    ,  27.9   ,   4.    ],
       [  3.    ,  15.2458,   7.    ],
       [  3.    ,   7.925 ,  17.    ],
       [  3.    ,  46.9   ,  16.    ],
       [  2.    ,  29.    ,   0.83  ],
       [  2.    ,  10.5   ,  17.    ],
       [  3.    ,  34.375 ,  16.    ],
       [  3.    ,  14.4542,  14.5   ],
       [  3.    ,  14.4583,  17.    ],
       [  3.    ,  31.275 ,   2.    ],
       [  3.    ,  11.2417,  12.    ],
       [  3.    ,  22.3583,   2.    ],
       [  3.    ,   9.216

In [35]:
mask.shape

(887,)

In [36]:
arr_mask.shape

(130, 3)

In [37]:
new_mask.shape

(130, 3)

## Summing and Counting
Summing an array of boolean values gives the count of the number of True values.

In [39]:
# using true false value from masking
arr = df[['Pclass','Fare','Age']].values
mask = arr[:,2] < 18
mask.sum()

130

In [40]:
(arr[:,2] < 18).sum()

130