# NumPy DataTypes

In [1]:
import numpy as np

### Ndarrays

In [2]:
array_a = np.array([1,2,3])

In [8]:
array_a 

array([1, 2, 3])

In [3]:
print(array_a)

[1 2 3]


In [4]:
type(array_a)

numpy.ndarray

In [5]:
array_a.shape

## "shape" is an attribute of the ndarray, rather than a method, so it's not callable (doesn't need "()" at the end)

(3,)

In [9]:
array_b = np.array([[7,8,9],[10,11,12]])

In [10]:
type(array_b)

numpy.ndarray

In [11]:
print(array_b)

[[ 7  8  9]
 [10 11 12]]


In [12]:
array_b.shape

(2, 3)

In [13]:
array_b.shape[0]

2

In [15]:
array_b.shape[1]

3

In [16]:
array_b.shape
#array_b.shape[0]
#array_b.shape[1]

## We can use indexing to get a specific part of the output tuple. 

(2, 3)

In [18]:
array_c = np.array(13)
array_c

array(13)

In [19]:
type(array_c)

## array_c is a 0-D array

numpy.ndarray

In [20]:
print(array_c)

13


In [21]:
array_d = np.array([15])

## We can add square brackets to assign the right number of dimensions to the array we're creating.

In [22]:
type(array_d)

numpy.ndarray

In [23]:
print(array_d)

[15]


In [24]:
array_d.shape

(1,)

In [26]:
array_e = np.array([[15]])
array_e

array([[15]])

### List vs Array

In [46]:
list_a = [[1,2,3,4,5,6]]

In [55]:
array_a = np.array(list_a)

In [28]:
len(list_a)

1

In [51]:
list_a = [[1,2,3],[4,5,6]]

## Syntax for creating lists is similar to syntax for creating arrays.

In [30]:
len(list_a)

2

In [31]:
array_a.shape

(3,)

In [32]:
len(list_a[1])

3

In [52]:
list_a

[[1, 2, 3], [4, 5, 6]]

In [34]:
array_a

array([1, 2, 3])

In [53]:
list_b = list_a[0] + list_a[1]
list_b

[1, 2, 3, 4, 5, 6]

In [56]:
array_b = array_a[0] + array_a[1]

In [40]:
print(list_b)

[1, 2, 3, 4, 5, 6]


In [57]:
print(array_b)

[5 7 9]


In [42]:
array_a

array([1, 2, 3])

In [60]:
import math
# array_e = math.sqrt(list_a)       # We can't provide an entire list as the input.
# array_e = math.sqrt(list_a[1,0])  # Even individual elements of a list result in an error. 
array_e = math.sqrt(array_a[1,0])
print(array_e)

## The "e" in "array_e" stands for "example".

2.0


In [61]:
np.sqrt(array_a)

array([[1.        , 1.41421356, 1.73205081],
       [2.        , 2.23606798, 2.44948974]])

### String vs Object vs Numbers

In [67]:
lending_co_it = np.genfromtxt("lending-co-LT.csv", delimiter = ',')
print(lending_co_it)

[[      nan       nan       nan ...       nan       nan       nan]
 [1.000e+00       nan       nan ...       nan       nan 1.660e+04]
 [2.000e+00       nan       nan ...       nan       nan 1.660e+04]
 ...
 [1.041e+03       nan       nan ...       nan       nan 1.660e+04]
 [1.042e+03       nan       nan ...       nan       nan 1.560e+04]
 [1.043e+03       nan       nan ...       nan       nan 1.660e+04]]


In [68]:
lending_co_it = np.genfromtxt("lending-co-LT.csv", delimiter = ',',
                             dtype = np.int32 )
print(lending_co_it)

[[   -1    -1    -1 ...    -1    -1    -1]
 [    1    -1    -1 ...    -1    -1 16600]
 [    2    -1    -1 ...    -1    -1 16600]
 ...
 [ 1041    -1    -1 ...    -1    -1 16600]
 [ 1042    -1    -1 ...    -1    -1 15600]
 [ 1043    -1    -1 ...    -1    -1 16600]]


In [70]:
lending_co_it[0,0] + lending_co_it[0,1]

-2

In [71]:
lending_co_it = np.genfromtxt("lending-co-LT.csv", delimiter = ',',
                             dtype = np.str )
print(lending_co_it)

[['LoanID' 'StringID' 'Product' ... 'Location' 'Region' 'TotalPrice']
 ['1' 'id_1' 'Product B' ... 'Location 2' 'Region 2' '16600.0']
 ['2' 'id_2' 'Product B' ... 'Location 3' '' '16600.0']
 ...
 ['1041' 'id_1041' 'Product B' ... 'Location 23' 'Region 4' '16600.0']
 ['1042' 'id_1042' 'Product C' ... 'Location 52' 'Region 6' '15600.0']
 ['1043' 'id_1043' 'Product B' ... 'Location 142' 'Region 6' '16600.0']]


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  dtype = np.str )


In [72]:
lending_co_it = np.genfromtxt("lending-co-LT.csv", delimiter = ',',
                             dtype = np.object )
print(lending_co_it)

[[b'LoanID' b'StringID' b'Product' ... b'Location' b'Region'
  b'TotalPrice']
 [b'1' b'id_1' b'Product B' ... b'Location 2' b'Region 2' b'16600.0']
 [b'2' b'id_2' b'Product B' ... b'Location 3' b'' b'16600.0']
 ...
 [b'1041' b'id_1041' b'Product B' ... b'Location 23' b'Region 4'
  b'16600.0']
 [b'1042' b'id_1042' b'Product C' ... b'Location 52' b'Region 6'
  b'15600.0']
 [b'1043' b'id_1043' b'Product B' ... b'Location 142' b'Region 6'
  b'16600.0']]


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  dtype = np.object )


In [77]:
lending_co_lt = np.genfromtxt("lending-co-LT.csv", 
                              delimiter = ',', dtype = (np.int32, np.str, np.str, np.str, np.str, np.str, np.int32)
                             )
print(lending_co_lt)

# The same dataset is imported differently based on the datatype we define. 

[(  -1, '', '', '', '', '',    -1) (   1, '', '', '', '', '', 16600)
 (   2, '', '', '', '', '', 16600) ... (1041, '', '', '', '', '', 16600)
 (1042, '', '', '', '', '', 15600) (1043, '', '', '', '', '', 16600)]


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  delimiter = ',', dtype = (np.int32, np.str, np.str, np.str, np.str, np.str, np.int32)
