## Data processing with NumPy

In [3]:
import numpy as np

list_of_ints  = [1,2,3]
array_1 = np.array(list_of_ints)
array_1

array([1, 2, 3])

In [5]:
array_1[1]

2

In [10]:
print(type(array_1))
type(array_1[0])

<class 'numpy.ndarray'>


numpy.int32

In [12]:
array_1.nbytes

12

In [16]:
array_1_b = np.array(list_of_ints, dtype='int8')
print(type(array_1_b[0]))

<class 'numpy.int8'>


In [18]:
array_1c = array_1.astype('float32')
array_1c

array([ 1.,  2.,  3.], dtype=float32)

In [26]:
complex_list = [1,2,3] + [1.,2.,3.] + ['a', 'b','c']
array_2 = np.array(complex_list[:3])
print(complex_list[:3], array_2.dtype)

array_2 = np.array(complex_list[:6])
print(complex_list[:6], array_2.dtype)

array_2 = np.array(complex_list)
print(complex_list[:], array_2.dtype)

print(isinstance(array_2[0], np.number))

[1, 2, 3] int32
[1, 2, 3, 1.0, 2.0, 3.0] float64
[1, 2, 3, 1.0, 2.0, 3.0, 'a', 'b', 'c'] <U32
False


In [28]:
a_list_of_lists = [[1,2,3], [4,5,6], [7,8,9]]
array_2d = np.array(a_list_of_lists)
print(array_2d[1,2])
array_2d

6


array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [31]:
a_list_of_lists_of_lists = [[[1,2], [3,4], [5,6]],
                           [[7,8], [9,10], [11,12]]]
array_3d = np.array(a_list_of_lists_of_lists)
array_3d

array([[[ 1,  2],
        [ 3,  4],
        [ 5,  6]],

       [[ 7,  8],
        [ 9, 10],
        [11, 12]]])

In [33]:
np.array({1:2, 3:4, 5:6}.items())

array(dict_items([(1, 2), (3, 4), (5, 6)]), dtype=object)

In [35]:
original_array = np.array([1,2,3,4,5,6,7,8])
array_a = original_array.reshape(4,2)
array_b = original_array.reshape(4,2).copy()
array_c = original_array.reshape(2,2,2)

original_array[0] = -1

print(array_a)
print(array_b)
print(array_c)

[[-1  2]
 [ 3  4]
 [ 5  6]
 [ 7  8]]
[[1 2]
 [3 4]
 [5 6]
 [7 8]]
[[[-1  2]
  [ 3  4]]

 [[ 5  6]
  [ 7  8]]]


In [38]:
original_array.resize(4,2)
original_array

array([[-1,  2],
       [ 3,  4],
       [ 5,  6],
       [ 7,  8]])

In [39]:
original_array.shape = (4,2)
original_array

array([[-1,  2],
       [ 3,  4],
       [ 5,  6],
       [ 7,  8]])

In [42]:
ordinal_values = np.arange(9).reshape(3,3)
ordinal_values

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [44]:
np.arange(9)[::-1]

array([8, 7, 6, 5, 4, 3, 2, 1, 0])

In [47]:
np.random.randint(low = 1, high=10, size=(3,3)).reshape(3,3)

array([[9, 7, 3],
       [6, 9, 8],
       [9, 5, 3]])

In [48]:
np.zeros((3,3))

array([[ 0.,  0.,  0.],
       [ 0.,  0.,  0.],
       [ 0.,  0.,  0.]])

In [49]:
np.ones((3,3))

array([[ 1.,  1.,  1.],
       [ 1.,  1.,  1.],
       [ 1.,  1.,  1.]])

In [50]:
np.eye(3)

array([[ 1.,  0.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  0.,  1.]])

In [54]:
fractions = np.linspace(start= 0, stop=1, num=10)
growth = np.logspace(start=0, stop=1,num=10, base =10.0)

std_gaussian = np.random.normal(size=(3,3))
gaussian = np.random.normal(loc = 1.0, scale =3.0, size = (3,3))

rand = np.random.uniform(low=0.0, high = 1.0, size = (3,3))
print(fractions)
print(growth)

[ 0.          0.11111111  0.22222222  0.33333333  0.44444444  0.55555556
  0.66666667  0.77777778  0.88888889  1.        ]
[  1.           1.29154967   1.66810054   2.15443469   2.7825594
   3.59381366   4.64158883   5.9948425    7.74263683  10.        ]


In [57]:
housing = np.loadtxt('../data/regression-datasets-housing.csv', delimiter = ',', dtype =float)
housing

array([[  6.32000000e-03,   1.80000000e+01,   2.31000000e+00, ...,
          3.96900000e+02,   4.98000000e+00,   2.40000000e+01],
       [  2.73100000e-02,   0.00000000e+00,   7.07000000e+00, ...,
          3.96900000e+02,   9.14000000e+00,   2.16000000e+01],
       [  2.72900000e-02,   0.00000000e+00,   7.07000000e+00, ...,
          3.92830000e+02,   4.03000000e+00,   3.47000000e+01],
       ..., 
       [  6.07600000e-02,   0.00000000e+00,   1.19300000e+01, ...,
          3.96900000e+02,   5.64000000e+00,   2.39000000e+01],
       [  1.09590000e-01,   0.00000000e+00,   1.19300000e+01, ...,
          3.93450000e+02,   6.48000000e+00,   2.20000000e+01],
       [  4.74100000e-02,   0.00000000e+00,   1.19300000e+01, ...,
          3.96900000e+02,   7.88000000e+00,   1.19000000e+01]])

In [59]:
np.loadtxt('../data/datasets-uci-iris.csv', delimiter=',', dtype=float)

ValueError: could not convert string to float: b'<!DOCTYPE html>'

In [61]:
import pandas as pd
housing_filename = '../data/regression-datasets-housing.csv'
housing =pd.read_csv(housing_filename, header = None)
housing_array = housing.values
housing_array.dtype

dtype('float64')