In [1]:
import numpy as np

In [2]:
file_name = 'Lending-Company-Numeric-Data.csv'

In [3]:
# Loads data faster, but cant handle nan (code breaks or gives error)
lending_co_data_numeric_1 = np.loadtxt(file_name, delimiter = ',')
lending_co_data_numeric_1

array([[ 2000.,    40.,   365.,  3121.,  4241., 13621.],
       [ 2000.,    40.,   365.,  3061.,  4171., 15041.],
       [ 1000.,    40.,   365.,  2160.,  3280., 15340.],
       ...,
       [ 2000.,    40.,   365.,  4201.,  5001., 16600.],
       [ 1000.,    40.,   365.,  2080.,  3320., 15600.],
       [ 2000.,    40.,   365.,  4601.,  4601., 16600.]])

In [4]:
# handles null/nan but much slower than .loadtxt
lending_co_data_numeric_2 = np.genfromtxt(file_name, delimiter = ',')
lending_co_data_numeric_2

array([[ 2000.,    40.,   365.,  3121.,  4241., 13621.],
       [ 2000.,    40.,   365.,  3061.,  4171., 15041.],
       [ 1000.,    40.,   365.,  2160.,  3280., 15340.],
       ...,
       [ 2000.,    40.,   365.,  4201.,  5001., 16600.],
       [ 1000.,    40.,   365.,  2080.,  3320., 15600.],
       [ 2000.,    40.,   365.,  4601.,  4601., 16600.]])

In [5]:
# checks if the two generated variables are equal
np.array_equal(lending_co_data_numeric_1, lending_co_data_numeric_2)

True

In [6]:
nan_file_name = 'Lending-Company-Numeric-Data-NAN.csv'

In [7]:
lending_co_data_numeric_nan1 = np.genfromtxt(nan_file_name, delimiter = ';')
lending_co_data_numeric_nan1

array([[ 2000.,    40.,   365.,  3121.,  4241., 13621.],
       [ 2000.,    40.,   365.,  3061.,  4171., 15041.],
       [ 1000.,    40.,   365.,  2160.,  3280., 15340.],
       ...,
       [   nan,    40.,   365.,  4201.,  5001., 16600.],
       [ 1000.,    40.,   365.,  2080.,  3320., 15600.],
       [ 2000.,    40.,   365.,  4601.,  4601., 16600.]])

In [8]:
lending_co_data_numeric_nan2 = np.genfromtxt(nan_file_name, delimiter = ',')
lending_co_data_numeric_nan2

array([nan, nan, nan, ..., nan, nan, nan])

### Partial Cleaning

In [9]:
lending_co_data_numeric_nan = np.genfromtxt(nan_file_name, delimiter = ';')
lending_co_data_numeric_nan

array([[ 2000.,    40.,   365.,  3121.,  4241., 13621.],
       [ 2000.,    40.,   365.,  3061.,  4171., 15041.],
       [ 1000.,    40.,   365.,  2160.,  3280., 15340.],
       ...,
       [   nan,    40.,   365.,  4201.,  5001., 16600.],
       [ 1000.,    40.,   365.,  2080.,  3320., 15600.],
       [ 2000.,    40.,   365.,  4601.,  4601., 16600.]])

In [10]:
# skip header = disregard the first columns based on the input
# skip footer = disregard the last columns based on the input
lending_co_data_numeric_nan1 = np.genfromtxt(nan_file_name, 
                                             delimiter = ';',
                                             skip_header=2,
                                             skip_footer=3)
lending_co_data_numeric_nan1

array([[ 1000.,    40.,   365.,  2160.,  3280., 15340.],
       [ 2000.,    40.,   365.,  3041.,  4241., 15321.],
       [ 2000.,    50.,   365.,  3470.,  4820., 13720.],
       ...,
       [ 2000.,    nan,   365.,  3081.,  4341., 16600.],
       [ 2000.,    40.,   365.,  3401.,    nan, 16600.],
       [ 2000.,    40.,   365.,    nan,  5440., 16600.]])

In [11]:
#  use cols = set the cols u want to display or use
lending_co_data_numeric_nan1 = np.genfromtxt(nan_file_name, 
                                             delimiter = ';',
                                             usecols = (0,1,5),
                                             skip_footer=2,
                                             skip_header=2)
lending_co_data_numeric_nan1

array([[ 1000.,    40., 15340.],
       [ 2000.,    40., 15321.],
       [ 2000.,    50., 13720.],
       ...,
       [ 2000.,    40., 16600.],
       [ 2000.,    40., 16600.],
       [   nan,    40., 16600.]])

In [12]:
#  unpack - set the value of cols in a variable
lending_co_data_0, lending_co_data_1, lending_co_data_5 = np.genfromtxt(nan_file_name, 
                                             delimiter = ';',
                                             usecols = (0,1,5),
                                             skip_footer=2,
                                             skip_header=2,
                                             unpack=True)
print(lending_co_data_0)
print(lending_co_data_1)
print(lending_co_data_5)

[1000. 2000. 2000. ... 2000. 2000.   nan]
[40. 40. 50. ... 40. 40. 40.]
[15340. 15321. 13720. ... 16600. 16600. 16600.]
