# Problem statement: Analyze the given dataset , perform preprocessing , data cleaning and exploration.

## Importing Library

In [791]:
import numpy as np
# np.set_printoptions is used to set how to display the array
np.set_printoptions(suppress=True, linewidth=100, precision=2)

In [792]:
# giving a file path
filepath=("C:/Users/0863a/OneDrive/Desktop/projects/project_3_loan_databy_numpy/loan-data.csv")

In [793]:
# genfromtxt using because if we have missing vlues it will still be loaded
raw_data = np.genfromtxt( filepath, delimiter = ';', skip_header = 1)

In [794]:
# viewing data
raw_data

array([[48010226.  ,         nan,    35000.  , ...,         nan,         nan,     9452.96],
       [57693261.  ,         nan,    30000.  , ...,         nan,         nan,     4679.7 ],
       [59432726.  ,         nan,    15000.  , ...,         nan,         nan,     1969.83],
       ...,
       [50415990.  ,         nan,    10000.  , ...,         nan,         nan,     2185.64],
       [46154151.  ,         nan,         nan, ...,         nan,         nan,     3199.4 ],
       [66055249.  ,         nan,    10000.  , ...,         nan,         nan,      301.9 ]])

In [795]:
# checking the shape of data we have skipped header so the total rows are 100001
raw_data.shape

(10000, 14)

In [796]:
# checking for null values but in numpy we have string as nan too so we will columnwise count it out
np.isnan(raw_data).sum()

88005

In [797]:
# calculating max of each column
temp_max = np.nanmax(raw_data,axis=0)
temp_max

  temp_max = np.nanmax(raw_data,axis=0)


array([68616519.  ,         nan,    35000.  ,         nan,    35000.  ,         nan,       28.99,
           1372.97,         nan,         nan,         nan,         nan,         nan,    41913.62])

In [798]:
# calculating mean of each column
temp_mean = np.nanmean(raw_data , axis=0)
temp_mean

  temp_mean = np.nanmean(raw_data , axis=0)


array([54015809.19,         nan,    15273.46,         nan,    15311.04,         nan,       16.62,
            440.92,         nan,         nan,         nan,         nan,         nan,     3143.85])

In [799]:
# calculating min of each column
temp_min = np.nanmin(raw_data, axis=0)
temp_min

  temp_min = np.nanmin(raw_data, axis=0)


array([373332.  ,       nan,   1000.  ,       nan,   1000.  ,       nan,      6.  ,     31.42,
             nan,       nan,       nan,       nan,       nan,      0.  ])

In [800]:
# observing the statistics
temp_statistics=np.array([temp_max,
                         temp_mean,
                         temp_min])
temp_statistics

array([[68616519.  ,         nan,    35000.  ,         nan,    35000.  ,         nan,       28.99,
            1372.97,         nan,         nan,         nan,         nan,         nan,    41913.62],
       [54015809.19,         nan,    15273.46,         nan,    15311.04,         nan,       16.62,
             440.92,         nan,         nan,         nan,         nan,         nan,     3143.85],
       [  373332.  ,         nan,     1000.  ,         nan,     1000.  ,         nan,        6.  ,
              31.42,         nan,         nan,         nan,         nan,         nan,        0.  ]])

# Splitting the columns

In [801]:
# getting the indices of column where nan values exist in mean
column_strings= np.argwhere(np.isnan(temp_mean)).squeeze()
column_strings

array([ 1,  3,  5,  8,  9, 10, 11, 12], dtype=int64)

In [802]:
# getting the indices of column where nan values does not exist in mean
colum_numerics=np.argwhere(~np.isnan(temp_mean)).squeeze()
colum_numerics

array([ 0,  2,  4,  6,  7, 13], dtype=int64)

# Reimporting the dataset

In [803]:
# loading numeric data
numeric_data=np.genfromtxt(filepath,delimiter=';',  usecols=colum_numerics, skip_header=1)
numeric_data

array([[48010226.  ,    35000.  ,    35000.  ,       13.33,     1184.86,     9452.96],
       [57693261.  ,    30000.  ,    30000.  ,         nan,      938.57,     4679.7 ],
       [59432726.  ,    15000.  ,    15000.  ,         nan,      494.86,     1969.83],
       ...,
       [50415990.  ,    10000.  ,    10000.  ,         nan,         nan,     2185.64],
       [46154151.  ,         nan,    10000.  ,       16.55,      354.3 ,     3199.4 ],
       [66055249.  ,    10000.  ,    10000.  ,         nan,      309.97,      301.9 ]])

In [804]:
# loading string data
string_data=np.genfromtxt(filepath,delimiter=';', usecols= column_strings, skip_header=1, dtype=np.str_)
string_data

array([['May-15', 'Current', ' 36 months', ..., 'Verified',
        'https://www.lendingclub.com/browse/loanDetail.action?loan_id=48010226', 'CA'],
       ['', 'Current', ' 36 months', ..., 'Source Verified',
        'https://www.lendingclub.com/browse/loanDetail.action?loan_id=57693261', 'NY'],
       ['Sep-15', 'Current', ' 36 months', ..., 'Verified',
        'https://www.lendingclub.com/browse/loanDetail.action?loan_id=59432726', 'PA'],
       ...,
       ['Jun-15', 'Current', ' 36 months', ..., 'Source Verified',
        'https://www.lendingclub.com/browse/loanDetail.action?loan_id=50415990', 'CA'],
       ['Apr-15', 'Current', ' 36 months', ..., 'Source Verified',
        'https://www.lendingclub.com/browse/loanDetail.action?loan_id=46154151', 'OH'],
       ['Dec-15', 'Current', ' 36 months', ..., '',
        'https://www.lendingclub.com/browse/loanDetail.action?loan_id=66055249', 'IL']],
      dtype='<U69')

In [805]:
# getting the full header
full_header = np.genfromtxt(filepath , 
                            delimiter = ";" , 
                            autostrip = True, 
                            skip_footer = 10000,
                            dtype = np.str_)
full_header                                 

array(['id', 'issue_d', 'loan_amnt', 'loan_status', 'funded_amnt', 'term', 'int_rate',
       'installment', 'grade', 'sub_grade', 'verification_status', 'url', 'addr_state',
       'total_pymnt'], dtype='<U19')

In [806]:
header_strings , header_numeric = full_header[column_strings] , full_header[colum_numerics]

In [807]:
header_strings

array(['issue_d', 'loan_status', 'term', 'grade', 'sub_grade', 'verification_status', 'url',
       'addr_state'], dtype='<U19')

In [808]:
header_numeric

array(['id', 'loan_amnt', 'funded_amnt', 'int_rate', 'installment', 'total_pymnt'], dtype='<U19')

# Manipulating string columns

In [809]:
header_strings

array(['issue_d', 'loan_status', 'term', 'grade', 'sub_grade', 'verification_status', 'url',
       'addr_state'], dtype='<U19')

In [810]:
header_strings[0]= 'issue_date'

In [811]:
string_data[:,0]

array(['May-15', '', 'Sep-15', ..., 'Jun-15', 'Apr-15', 'Dec-15'], dtype='<U69')

In [812]:
np.unique(string_data[:,0])

array(['', 'Apr-15', 'Aug-15', 'Dec-15', 'Feb-15', 'Jan-15', 'Jul-15', 'Jun-15', 'Mar-15',
       'May-15', 'Nov-15', 'Oct-15', 'Sep-15'], dtype='<U69')

In [813]:
string_data[:,0]=np.chararray.strip(string_data[:,0],'-15')
string_data[:,0]

array(['May', '', 'Sep', ..., 'Jun', 'Apr', 'Dec'], dtype='<U69')

In [814]:
months = np.array(['' , 'Jan', 'Feb' ,'Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'])

In [815]:
for i in range(13):
    string_data[:,0]=np.where(string_data[:,0] == months[i],
                                   i,
                              string_data[:,0])
                                

In [816]:
np.unique(string_data[:,0])

array(['0', '1', '10', '11', '12', '2', '3', '4', '5', '6', '7', '8', '9'], dtype='<U69')

# Loan status

In [817]:
header_strings

array(['issue_date', 'loan_status', 'term', 'grade', 'sub_grade', 'verification_status', 'url',
       'addr_state'], dtype='<U19')

In [818]:
np.unique(string_data[:,1])

array(['', 'Charged Off', 'Current', 'Default', 'Fully Paid', 'In Grace Period', 'Issued',
       'Late (16-30 days)', 'Late (31-120 days)'], dtype='<U69')

In [819]:
status_bad = np.array(['Charged Off' , 'Default' , 'Late (31-120 days)'])

In [820]:
string_data[:,1] = np.where(np.isin(string_data[:,1],status_bad),0,1)

In [821]:
string_data[:,1]

array(['1', '1', '1', ..., '1', '1', '1'], dtype='<U69')

In [822]:
np.unique(string_data[:,1])

array(['0', '1'], dtype='<U69')

# Term

In [823]:
header_strings

array(['issue_date', 'loan_status', 'term', 'grade', 'sub_grade', 'verification_status', 'url',
       'addr_state'], dtype='<U19')

In [824]:
string_data[:,2]

array([' 36 months', ' 36 months', ' 36 months', ..., ' 36 months', ' 36 months', ' 36 months'],
      dtype='<U69')

In [825]:
np.unique(string_data[:,2])

array(['', ' 36 months', ' 60 months'], dtype='<U69')

In [826]:
string_data[:,2] = np.chararray.strip(string_data[:,2],"months")

In [827]:
string_data[:,2]

array([' 36 ', ' 36 ', ' 36 ', ..., ' 36 ', ' 36 ', ' 36 '], dtype='<U69')

In [828]:
header_strings[2]='Term_month'

In [829]:
np.unique(string_data[:,2])

array(['', ' 36 ', ' 60 '], dtype='<U69')

In [830]:
string_data[:,2]=np.where(string_data[:,2]== '',  ' 60 '  , string_data[:,2])

In [831]:
np.unique(string_data[:,2])

array([' 36 ', ' 60 '], dtype='<U69')

# Grade and subgrade

In [832]:
header_strings

array(['issue_date', 'loan_status', 'Term_month', 'grade', 'sub_grade', 'verification_status',
       'url', 'addr_state'], dtype='<U19')

In [833]:
string_data[:,3]

array(['C', 'A', 'B', ..., 'A', 'D', 'A'], dtype='<U69')

In [834]:
np.unique(string_data[:,3])

array(['', 'A', 'B', 'C', 'D', 'E', 'F', 'G'], dtype='<U69')

In [835]:
string_data[:,4]

array(['C3', 'A5', 'B5', ..., 'A5', 'D2', 'A4'], dtype='<U69')

In [836]:
np.unique(string_data[:,4])

array(['', 'A1', 'A2', 'A3', 'A4', 'A5', 'B1', 'B2', 'B3', 'B4', 'B5', 'C1', 'C2', 'C3', 'C4',
       'C5', 'D1', 'D2', 'D3', 'D4', 'D5', 'E1', 'E2', 'E3', 'E4', 'E5', 'F1', 'F2', 'F3', 'F4',
       'F5', 'G1', 'G2', 'G3', 'G4', 'G5'], dtype='<U69')

In [837]:
np.unique(string_data[: , 3])[1:]

array(['A', 'B', 'C', 'D', 'E', 'F', 'G'], dtype='<U69')

In [838]:
for i in np.unique(string_data[: , 3])[1:]:
    string_data[:,4]=np.where((string_data[:,4] == '') & (string_data[:,3]== i),  i + '5',
                                  string_data[: , 4])
    

In [839]:
np.unique(string_data[: , 4], return_counts=True)

(array(['', 'A1', 'A2', 'A3', 'A4', 'A5', 'B1', 'B2', 'B3', 'B4', 'B5', 'C1', 'C2', 'C3', 'C4',
        'C5', 'D1', 'D2', 'D3', 'D4', 'D5', 'E1', 'E2', 'E3', 'E4', 'E5', 'F1', 'F2', 'F3', 'F4',
        'F5', 'G1', 'G2', 'G3', 'G4', 'G5'], dtype='<U69'),
 array([  9, 285, 278, 239, 323, 592, 509, 517, 530, 553, 633, 629, 567, 586, 564, 577, 391, 267,
        250, 255, 288, 235, 162, 171, 139, 160,  94,  52,  34,  43,  24,  19,  10,   3,   7,   5],
       dtype=int64))

In [840]:
string_data[: , 4] = np.where(string_data[: , 4] == '',
                                              'H1',
                                              string_data[: , 4])


In [841]:
np.unique(string_data[: , 4])

array(['A1', 'A2', 'A3', 'A4', 'A5', 'B1', 'B2', 'B3', 'B4', 'B5', 'C1', 'C2', 'C3', 'C4', 'C5',
       'D1', 'D2', 'D3', 'D4', 'D5', 'E1', 'E2', 'E3', 'E4', 'E5', 'F1', 'F2', 'F3', 'F4', 'F5',
       'G1', 'G2', 'G3', 'G4', 'G5', 'H1'], dtype='<U69')

# Removing grade

In [842]:
string_data=np.delete(string_data, 3 , axis= 1)
string_data

array([['5', '1', ' 36 ', ..., 'Verified',
        'https://www.lendingclub.com/browse/loanDetail.action?loan_id=48010226', 'CA'],
       ['0', '1', ' 36 ', ..., 'Source Verified',
        'https://www.lendingclub.com/browse/loanDetail.action?loan_id=57693261', 'NY'],
       ['9', '1', ' 36 ', ..., 'Verified',
        'https://www.lendingclub.com/browse/loanDetail.action?loan_id=59432726', 'PA'],
       ...,
       ['6', '1', ' 36 ', ..., 'Source Verified',
        'https://www.lendingclub.com/browse/loanDetail.action?loan_id=50415990', 'CA'],
       ['4', '1', ' 36 ', ..., 'Source Verified',
        'https://www.lendingclub.com/browse/loanDetail.action?loan_id=46154151', 'OH'],
       ['12', '1', ' 36 ', ..., '',
        'https://www.lendingclub.com/browse/loanDetail.action?loan_id=66055249', 'IL']],
      dtype='<U69')

In [843]:
header_strings=np.delete(header_strings, 3)

In [844]:
header_strings

array(['issue_date', 'loan_status', 'Term_month', 'sub_grade', 'verification_status', 'url',
       'addr_state'], dtype='<U19')

# Verification status

In [845]:
string_data[:,4]

array(['Verified', 'Source Verified', 'Verified', ..., 'Source Verified', 'Source Verified', ''],
      dtype='<U69')

In [846]:
np.unique(string_data[:,4])

array(['', 'Not Verified', 'Source Verified', 'Verified'], dtype='<U69')

# url

In [847]:
string_data[:,5]

array(['https://www.lendingclub.com/browse/loanDetail.action?loan_id=48010226',
       'https://www.lendingclub.com/browse/loanDetail.action?loan_id=57693261',
       'https://www.lendingclub.com/browse/loanDetail.action?loan_id=59432726', ...,
       'https://www.lendingclub.com/browse/loanDetail.action?loan_id=50415990',
       'https://www.lendingclub.com/browse/loanDetail.action?loan_id=46154151',
       'https://www.lendingclub.com/browse/loanDetail.action?loan_id=66055249'], dtype='<U69')

In [848]:
string_data[:,5]=np.chararray.strip(string_data[:,5], 'https://www.lendingclub.com/browse/loanDetail.action?loan_id=' )

In [849]:
string_data[:,5]

array(['48010226', '57693261', '59432726', ..., '50415990', '46154151', '66055249'], dtype='<U69')

In [850]:
numeric_data[:,0]

array([48010226., 57693261., 59432726., ..., 50415990., 46154151., 66055249.])

In [851]:
numeric_data[:,0]=numeric_data[:,0].astype(dtype = np.int32)

In [852]:
#np.array_equal(numeric_data[:,0].astype(dtype = np.int32),string_data[:,5].astype(dtype =np.int32))

In [853]:
string_data = np.delete(string_data , 5 , axis = 1)
string_data

array([['5', '1', ' 36 ', 'C3', 'Verified', 'CA'],
       ['0', '1', ' 36 ', 'A5', 'Source Verified', 'NY'],
       ['9', '1', ' 36 ', 'B5', 'Verified', 'PA'],
       ...,
       ['6', '1', ' 36 ', 'A5', 'Source Verified', 'CA'],
       ['4', '1', ' 36 ', 'D2', 'Source Verified', 'OH'],
       ['12', '1', ' 36 ', 'A4', '', 'IL']], dtype='<U69')

In [854]:
header_strings = np.delete(header_strings , 5)
header_strings

array(['issue_date', 'loan_status', 'Term_month', 'sub_grade', 'verification_status', 'addr_state'],
      dtype='<U19')

# State address

In [855]:
string_data[:,5]

array(['CA', 'NY', 'PA', ..., 'CA', 'OH', 'IL'], dtype='<U69')

In [856]:
np.unique(string_data[:,5])

array(['', 'AK', 'AL', 'AR', 'AZ', 'CA', 'CO', 'CT', 'DC', 'DE', 'FL', 'GA', 'HI', 'IL', 'IN',
       'KS', 'KY', 'LA', 'MA', 'MD', 'ME', 'MI', 'MN', 'MO', 'MS', 'MT', 'NC', 'ND', 'NE', 'NH',
       'NJ', 'NM', 'NV', 'NY', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX', 'UT', 'VA',
       'VT', 'WA', 'WI', 'WV', 'WY'], dtype='<U69')

In [857]:
state_names , state_counts = np.unique(string_data[:,5] , return_counts=True)


In [858]:
state_names 

array(['', 'AK', 'AL', 'AR', 'AZ', 'CA', 'CO', 'CT', 'DC', 'DE', 'FL', 'GA', 'HI', 'IL', 'IN',
       'KS', 'KY', 'LA', 'MA', 'MD', 'ME', 'MI', 'MN', 'MO', 'MS', 'MT', 'NC', 'ND', 'NE', 'NH',
       'NJ', 'NM', 'NV', 'NY', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX', 'UT', 'VA',
       'VT', 'WA', 'WI', 'WV', 'WY'], dtype='<U69')

In [859]:
state_counts

array([ 500,   26,  119,   74,  220, 1336,  201,  143,   27,   27,  690,  321,   44,  389,  152,
         84,   84,  116,  210,  222,   10,  267,  156,  160,   61,   28,  261,   16,   25,   58,
        341,   57,  130,  777,  312,   83,  108,  320,   40,  107,   24,  143,  758,   74,  242,
         17,  216,  148,   49,   27], dtype=int64)

In [860]:
#state_counts_sorted = np.argsort(-state_counts)
#state_counts_sorted

array([ 5, 33, 42, 10,  0, 13, 30, 11, 37, 34, 21, 26, 44, 19,  4, 46, 18,  6, 23, 22, 14, 47,  7,
       41, 32,  2, 17, 36, 39, 16, 15, 35, 43,  3, 24, 29, 31, 48, 12, 38, 25,  9,  8, 49,  1, 28,
       40, 45, 27, 20], dtype=int64)

In [861]:
string_data[:,5] = np.where(string_data[:,5] == '',
                                 0,
                                 string_data[:,5])

In [862]:
header_strings

array(['issue_date', 'loan_status', 'Term_month', 'sub_grade', 'verification_status', 'addr_state'],
      dtype='<U19')

# manipulating numeric columns

In [863]:
numeric_data[:,0]

array([48010226., 57693261., 59432726., ..., 50415990., 46154151., 66055249.])

In [864]:
np.isnan(numeric_data[:,0]).sum()

0

In [865]:
header_numeric[0]

'id'

# loan_amount

In [866]:
numeric_data[:,1]

array([35000., 30000., 15000., ..., 10000.,    nan, 10000.])

In [867]:
header_numeric[1]

'loan_amnt'

In [868]:
mean=np.nanmean(numeric_data[:,1]).round()
mean

15273.0

In [869]:
row=np.sum(np.isnan(numeric_data), axis=0)
print(row)

[   0  500  500 6004  501  500]


In [870]:
np.isnan(numeric_data[:,1]).sum()

500

In [871]:
numeric_data[:,1]=np.nan_to_num(numeric_data[:,1], nan= 15273.0)

# funded amount

In [872]:
numeric_data[:,2]

array([35000., 30000., 15000., ..., 10000., 10000., 10000.])

In [873]:
header_numeric[2]

'funded_amnt'

In [874]:
np.isnan(numeric_data[:,2]).sum()

500

In [875]:
mean=np.nanmean(numeric_data[:,2]).round()
mean

15311.0

In [876]:
numeric_data[:,2]=np.nan_to_num(numeric_data[:,2], nan=15311.0)

In [877]:
numeric_data[:,2].astype(int)

array([35000, 30000, 15000, ..., 10000, 10000, 10000])

# interest rate

In [878]:
header_numeric[3]

'int_rate'

In [879]:
numeric_data[:,3]

array([13.33,   nan,   nan, ...,   nan, 16.55,   nan])

In [880]:
min=np.nanmin(numeric_data[:,3])
min

6.0

In [881]:
np.isnan(numeric_data[:,3]).sum()

6004

In [882]:
row=np.sum(np.isnan(numeric_data), axis=0)
print(row)

[   0    0    0 6004  501  500]


In [883]:
numeric_data[:,3]=np.nan_to_num(numeric_data[:,3], nan=6.0)

# installment

In [884]:
header_numeric[4]

'installment'

In [885]:
numeric_data[:,4]

array([1184.86,  938.57,  494.86, ...,     nan,  354.3 ,  309.97])

In [886]:
np.isnan(numeric_data[:,4]).sum()

501

In [887]:
row=np.sum(np.isnan(numeric_data), axis=0)
print(row)

[  0   0   0   0 501 500]


In [888]:
np.unique(numeric_data[:,4])

array([  31.42,   31.88,   32.27, ..., 1330.7 , 1372.97,     nan])

In [889]:
min=np.nanmin(numeric_data[:,4])
min

31.42

In [890]:
numeric_data[:,4]=np.nan_to_num(numeric_data[:,4], nan=31.42)

# total payment

In [891]:
header_numeric[5]

'total_pymnt'

In [892]:
numeric_data[:,5]

array([9452.96, 4679.7 , 1969.83, ..., 2185.64, 3199.4 ,  301.9 ])

In [893]:
min=np.nanmean(numeric_data[:,5]).round()
min

3144.0

In [894]:
numeric_data[:,5]=np.nan_to_num(numeric_data[:,5],nan=3144.0)

# final dataset

In [895]:
numeric_data=numeric_data.astype(int)
numeric_data

array([[48010226,    35000,    35000,       13,     1184,     9452],
       [57693261,    30000,    30000,        6,      938,     4679],
       [59432726,    15000,    15000,        6,      494,     1969],
       ...,
       [50415990,    10000,    10000,        6,       31,     2185],
       [46154151,    15273,    10000,       16,      354,     3199],
       [66055249,    10000,    10000,        6,      309,      301]])

In [912]:
data=np.hstack((numeric_data , string_data))
data

array([['48010226', '35000', '35000', ..., 'C3', 'Verified', 'CA'],
       ['57693261', '30000', '30000', ..., 'A5', 'Source Verified', 'NY'],
       ['59432726', '15000', '15000', ..., 'B5', 'Verified', 'PA'],
       ...,
       ['50415990', '10000', '10000', ..., 'A5', 'Source Verified', 'CA'],
       ['46154151', '15273', '10000', ..., 'D2', 'Source Verified', 'OH'],
       ['66055249', '10000', '10000', ..., 'A4', '', 'IL']], dtype='<U69')

In [913]:
header=np.append(header_numeric,header_strings)
header

array(['id', 'loan_amnt', 'funded_amnt', 'int_rate', 'installment', 'total_pymnt', 'issue_date',
       'loan_status', 'Term_month', 'sub_grade', 'verification_status', 'addr_state'], dtype='<U19')

In [914]:
header=np.array(['id', 'loan_amnt', 'funded_amnt', 'int_rate', 'installment', 'total_pymnt', 'issue_date',
       'loan_status', 'Term_month', 'sub_grade', 'verification_status', 'addr_state'])

In [915]:
data = np.vstack((header, data))

In [916]:
data

array([['id', 'loan_amnt', 'funded_amnt', ..., 'sub_grade', 'verification_status', 'addr_state'],
       ['48010226', '35000', '35000', ..., 'C3', 'Verified', 'CA'],
       ['57693261', '30000', '30000', ..., 'A5', 'Source Verified', 'NY'],
       ...,
       ['50415990', '10000', '10000', ..., 'A5', 'Source Verified', 'CA'],
       ['46154151', '15273', '10000', ..., 'D2', 'Source Verified', 'OH'],
       ['66055249', '10000', '10000', ..., 'A4', '', 'IL']], dtype='<U69')

In [903]:
np.savez('loan_data.csv', data)