In [1]:
import numpy as np

### loadtxt is faster but will result to an error if there are incomplete data genfromtxt is slower but can handle missing values

In [2]:
lending_co_data_numeric_1 = np.loadtxt('Lending-Company-Numeric-Data.csv',delimiter=',')
lending_co_data_numeric_1

array([[ 2000.,    40.,   365.,  3121.,  4241., 13621.],
       [ 2000.,    40.,   365.,  3061.,  4171., 15041.],
       [ 1000.,    40.,   365.,  2160.,  3280., 15340.],
       ...,
       [ 2000.,    40.,   365.,  4201.,  5001., 16600.],
       [ 1000.,    40.,   365.,  2080.,  3320., 15600.],
       [ 2000.,    40.,   365.,  4601.,  4601., 16600.]])

In [3]:
lending_co_data_numeric_2 = np.genfromtxt('Lending-Company-Numeric-Data.csv',delimiter=',')
lending_co_data_numeric_2

array([[ 2000.,    40.,   365.,  3121.,  4241., 13621.],
       [ 2000.,    40.,   365.,  3061.,  4171., 15041.],
       [ 1000.,    40.,   365.,  2160.,  3280., 15340.],
       ...,
       [ 2000.,    40.,   365.,  4201.,  5001., 16600.],
       [ 1000.,    40.,   365.,  2080.,  3320., 15600.],
       [ 2000.,    40.,   365.,  4601.,  4601., 16600.]])

In [4]:
# Check if the two arrays are of equal content
np.array_equal(lending_co_data_numeric_1,lending_co_data_numeric_2)

True

### Testing on data with missing values, we must change the delimiter to ;

In [5]:
lending_co_data_numeric_NAN = np.genfromtxt('Lending-Company-Numeric-Data-NAN.csv',delimiter=';')
lending_co_data_numeric_NAN
# lending_co_data_numeric_NAN = np.loadtxt('Lending-Company-Numeric-Data-NAN.csv',delimiter=';')
# ValueError: could not convert string '' to float64 at row 11, column 4, which means python encounters a symbol 
# or an empty space instead of a number

array([[ 2000.,    40.,   365.,  3121.,  4241., 13621.],
       [ 2000.,    40.,   365.,  3061.,  4171., 15041.],
       [ 1000.,    40.,   365.,  2160.,  3280., 15340.],
       ...,
       [   nan,    40.,   365.,  4201.,  5001., 16600.],
       [ 1000.,    40.,   365.,  2080.,  3320., 15600.],
       [ 2000.,    40.,   365.,  4601.,  4601., 16600.]])

### Or use this format, add the dtype=str argument, the NANs will be outputted as space

In [6]:
lending_co_data_numeric_NAN = np.loadtxt('Lending-Company-Numeric-Data-NAN.csv',delimiter=';',dtype=str)
lending_co_data_numeric_NAN

array([['2000', '40', '365', '3121', '4241', '13621'],
       ['2000', '40', '365', '3061', '4171', '15041'],
       ['1000', '40', '365', '2160', '3280', '15340'],
       ...,
       ['', '40', '365', '4201', '5001', '16600'],
       ['1000', '40', '365', '2080', '3320', '15600'],
       ['2000', '40', '365', '4601', '4601', '16600']], dtype='<U5')

### Skipping headers in genfromtxt

In [7]:
lending_co_data_numeric_NAN = np.genfromtxt('Lending-Company-Numeric-Data-NAN.csv',delimiter=';',skip_header=2)
lending_co_data_numeric_NAN

array([[ 1000.,    40.,   365.,  2160.,  3280., 15340.],
       [ 2000.,    40.,   365.,  3041.,  4241., 15321.],
       [ 2000.,    50.,   365.,  3470.,  4820., 13720.],
       ...,
       [   nan,    40.,   365.,  4201.,  5001., 16600.],
       [ 1000.,    40.,   365.,  2080.,  3320., 15600.],
       [ 2000.,    40.,   365.,  4601.,  4601., 16600.]])

### Skipping footers in genfromtxt

In [10]:
lending_co_data_numeric_NAN = np.genfromtxt('Lending-Company-Numeric-Data-NAN.csv',delimiter=';',dtype=str,skip_footer=2)
lending_co_data_numeric_NAN

array([['2000', '40', '365', '3121', '4241', '13621'],
       ['2000', '40', '365', '3061', '4171', '15041'],
       ['1000', '40', '365', '2160', '3280', '15340'],
       ...,
       ['2000', '40', '365', '3401', '', '16600'],
       ['2000', '40', '365', '', '5440', '16600'],
       ['', '40', '365', '4201', '5001', '16600']], dtype='<U5')

### Choosing specific columns

In [17]:
lending_co_data_numeric_NAN = np.genfromtxt('Lending-Company-Numeric-Data-NAN.csv',delimiter=';',dtype=str,usecols=(5,0,1))
lending_co_data_numeric_NAN

array([['13621', '2000', '40'],
       ['15041', '2000', '40'],
       ['15340', '1000', '40'],
       ...,
       ['16600', '', '40'],
       ['15600', '1000', '40'],
       ['16600', '2000', '40']], dtype='<U5')

### Combinations

In [18]:
lending_co_data_numeric_NAN = np.genfromtxt('Lending-Company-Numeric-Data-NAN.csv',delimiter=';',dtype=str,usecols=(5,0,1),skip_footer=(9))
lending_co_data_numeric_NAN

array([['13621', '2000', '40'],
       ['15041', '2000', '40'],
       ['15340', '1000', '40'],
       ...,
       ['16600', '2000', '40'],
       ['', '', '50'],
       ['54625', '9000', '125']], dtype='<U5')