# A Quick Introduction to Numerical Data Manipulation with Python and NumPy

In [1]:
import numpy as np

## 1. DataTypes and attributes

**NOTE:** important to remember the main type in NumPy is `ndarray`, even seemingly different kinds of arrays are still `ndarray`'s. This means an operation you do on one array, will work on another.

In [2]:
a1 = np.array([1,2,3])
a1

array([1, 2, 3])

In [3]:
# type
type(a1)

numpy.ndarray

In [16]:
a2 = np.array([[1,2.2,3.0],[4,5,6]])
a2

array([[1. , 2.2, 3. ],
       [4. , 5. , 6. ]])

In [17]:
a3 = np.array([[[1,2,3],[4,5,6],[7,8,9]],[[10,11,12],[13,14,15],[16,17,18]]])
a3

array([[[ 1,  2,  3],
        [ 4,  5,  6],
        [ 7,  8,  9]],

       [[10, 11, 12],
        [13, 14, 15],
        [16, 17, 18]]])

In [18]:
# shape

print(a1.shape)
print(a2.shape)
print(a3.shape)

(3,)
(2, 3)
(2, 3, 3)


In [19]:
# ndim

a1.ndim, a2.ndim, a3.ndim

(1, 2, 3)

In [22]:
# dtype

a1.dtype, a2.dtype, a3.dtype

(dtype('int32'), dtype('float64'), dtype('int32'))

In [23]:
# size

a1.size, a2.size, a3.size

(3, 6, 18)

In [28]:
# pandas Dataframe and NumPy arrays
import pandas as pd

df = pd.DataFrame(a2)
df

Unnamed: 0,0,1,2
0,1.0,2.2,3.0
1,4.0,5.0,6.0


In [30]:
df2 = pd.DataFrame(a2, columns = ["a","b","c"])
df2

Unnamed: 0,a,b,c
0,1.0,2.2,3.0
1,4.0,5.0,6.0


## 2. Creating arrays

In [31]:
sample_array = np.array([1,2,3])
sample_array

array([1, 2, 3])

In [36]:
# np.ones()

ones = np.ones((5,2))
ones

array([[1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.]])

In [39]:
# np.zeros()

zeros = np.zeros((2,6))
zeros

array([[0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.]])

In [40]:
# dtype with zeros or ones

zerosInt = np.zeros((2,3), dtype = int)
zerosInt

array([[0, 0, 0],
       [0, 0, 0]])

In [46]:
# range array

range_array = np.arange(0,10)
range_array

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [55]:
# random

random_array = np.random.randint(2,10,(2,2))
random_array

array([[8, 7],
       [8, 6]])

In [59]:
random_array2 = np.random.random((5,3))
random_array2

array([[0.99029286, 0.11903301, 0.15641022],
       [0.89232523, 0.12370648, 0.67631235],
       [0.7889151 , 0.95963116, 0.38115774],
       [0.00533503, 0.29141823, 0.25678661],
       [0.16615362, 0.00658664, 0.47260477]])

In [64]:
random_array3 = np.random.rand(2,10)
random_array3

array([[0.68935284, 0.34026086, 0.7729689 , 0.45441185, 0.23160959,
        0.67329425, 0.44730489, 0.16790836, 0.66270911, 0.79307365],
       [0.3442448 , 0.77788684, 0.65950012, 0.19868662, 0.3509101 ,
        0.03073763, 0.06660428, 0.93638856, 0.62408913, 0.68014432]])

NumPy uses pseudo-random numbers, which means, the numbers look random but aren't really, they're predetermined.

For consistency, you might want to keep the random numbers you generate similar throughout experiments.

To do this, you can use `np.random.seed().`

In [90]:
# seed
np.random.seed(1)

random_array4 = np.random.randint(100,size=(2,4))
random_array4


array([[37, 12, 72,  9],
       [75,  5, 79, 64]])

In [96]:
np.random.seed(3)

random_array4 = np.random.random((2,3))
random_array4

array([[0.5507979 , 0.70814782, 0.29090474],
       [0.51082761, 0.89294695, 0.89629309]])

## 3. Viewing arrays and matrices (indexing)