# Introduction to pandas

# Foundations: NumPy
## NumPy Array

In [1]:
matrix = [[1, 2, 3],
          [4, 5, 6],
          [7, 8, 9]]

In [2]:
[[i + 1 for i in row] for row in matrix]

[[2, 3, 4], [5, 6, 7], [8, 9, 10]]

In [3]:
# First, let's import NumPy
import numpy as np

In [4]:
# Constructing an array with a simple list results in a 1d array
array1 = np.array([10, 100, 1000.])

In [5]:
# Constructing an array with a nested list results in a 2d array
array2 = np.array([[1., 2., 3.],
                   [4., 5., 6.]])

In [6]:
array1.dtype

dtype('float64')

In [7]:
float(array1[0])

10.0

## Vectorization and Broadcasting

In [8]:
array2 + 1

array([[2., 3., 4.],
       [5., 6., 7.]])

In [9]:
array2 * array2

array([[ 1.,  4.,  9.],
       [16., 25., 36.]])

In [10]:
array2 * array1

array([[  10.,  200., 3000.],
       [  40.,  500., 6000.]])

In [11]:
array2 @ array2.T  # array2.T is a shortcut for array2.transpose()

array([[14., 32.],
       [32., 77.]])

## Universal Functions (ufunc)

In [12]:
import math

In [13]:
math.sqrt(array2)  # This will raise en Error

TypeError: only size-1 arrays can be converted to Python scalars

In [None]:
np.array([[math.sqrt(i) for i in row] for row in array2])

array([[1.        , 1.41421356, 1.73205081],
       [2.        , 2.23606798, 2.44948974]])

In [None]:
np.sqrt(array2)

array([[1.        , 1.41421356, 1.73205081],
       [2.        , 2.23606798, 2.44948974]])

In [None]:
array2.sum(axis=0)  # Returns a 1d array

array([5., 7., 9.])

In [None]:
array2.sum()

21.0

## Creating and Manipulating Arrays

## Getting and Setting Array Elements

In [None]:
array1[2]  # Returns a scalar

1000.0

In [None]:
array2[0, 0]  # Returns a scalar

1.0

In [None]:
array2[:, 1:]  # Returns a 2d array

array([[2., 3.],
       [5., 6.]])

In [None]:
array2[:, 1]  # Returns a 1d array

array([2., 5.])

In [None]:
array2[1, :2]  # Returns a 1d array

array([4., 5.])

## Useful Array Constructors

In [None]:
np.arange(2 * 5).reshape(2, 5)  # 2 rows, 5 columns

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

In [None]:
np.random.randn(2, 3)  # 2 rows, 3 columns

array([[ 0.61812777,  2.26317414,  0.9239904 ],
       [-1.24787376,  0.08600425, -0.66763111]])

## View vs. Copy

In [None]:
array2

array([[1., 2., 3.],
       [4., 5., 6.]])

In [None]:
subset = array2[:, :2]
subset

array([[1., 2.],
       [4., 5.]])

In [None]:
subset[0, 0] = 1000

In [None]:
subset

array([[1000.,    2.],
       [   4.,    5.]])

In [None]:
array2

array([[1000.,    2.,    3.],
       [   4.,    5.,    6.]])