## Import Libraries and Data

In [1]:
import spacy
import pandas as pd
import numpy as np
import re
from sklearn.decomposition import PCA
#For counting of tokens
from sklearn.feature_extraction.text import CountVectorizer
vect = CountVectorizer(stop_words = 'english', max_df = 2)
#Load english corpus
en_nlp = spacy.load('en')
#Add additional stopwords
more_stopwords = ['ziisha', 'ziidisha', 'zidsisha',
       'zidishq', 'zidishia', 'zidishi', 'zidisher', 'zidishans',
       'zidisaha', 'zidisa', 'zidihsa', 'zididsha', 'zididisha', 'zididha',
       'zidiasha', 'ziddisha', 'zdisha', 'loans', 'loan', 'thank', 'you', 
        'business', 'funding', 'fund']
for word in more_stopwords:
    en_nlp.vocab[word].is_stop = True
#Load data from Stefan
dat = pd.read_csv('Regression_Dataset_20161221(na).csv')
#so you can see more columns
pd.options.display.max_columns = 500
pd.options.display.max_rows = 200

%matplotlib inline

## Define Helper Functions

In [2]:
def get_top_tokens(pandas_series, n=200):
    """
    gives you a count of most common tokens in text field
    """
    
    vect.fit(pandas_series.dropna())
    counts = list(vect.vocabulary_.items())
    df = pd.DataFrame(counts, columns=['token', 'count']).set_index('token').sort_values('count', ascending=False)
    return df.head(n)


def clean_text(text_string):
    """ 
    remove punctuation, convert to lowercase and remove stop words
    """
    doc =  en_nlp(text_string)
    return(' '.join([token.text.lower() for token in doc if token.is_stop == False and token.is_punct ==False]))


def vectorize_text(text_string):
    """
    return GloVe vectors for words, document vector is the average of the word vectors
    reference: http://nlp.stanford.edu/projects/glove/
    
    spacy library tokenizes, cleans, etc. so you don't have to do this yourself. 
    """
    if type(text_string) != str:
        return np.zeros(300)  # return array of 300 zeros as built-in GloVe embedding has 300 dimensions
    
    doc =  en_nlp(text_string)
    return np.round(doc.vector, 3) #reduce size of datatset by reducing digits

In [3]:
def attach_vectorized_column(df, column_name_list):
    """
    append vectorized GloVe columns to dataframe
    
    warning: will add 300 columns for each text field
    """
    dataframes = []
    dataframes.append(df)
    
    for column_name in column_name_list:
        print('parsing {}'.format(column_name))
        
        col_names = [column_name+'_txtvec_'+str(x) for x in range(1, 301)]
        
        vecdf = pd.DataFrame.from_records((df[column_name].
        apply(vectorize_text)),
        columns = col_names)
        
        dataframes.append(vecdf)
    
    return pd.concat(dataframes, axis = 1)

In [4]:
## Save for later
#pca = PCA(n_components=25)
#pca.fit(df)
#for i,v in enumerate(np.cumsum(pca.explained_variance_ratio_)):
#    print(i,v)

# Preview Dataset

In [5]:
dat.head()

Unnamed: 0,id,borrower_id,usd_amount,len_proposal,len_about_me,len_about_business,len_address,missing_natl_id,missing_referred_by,application_time,default_flag,fraud_flag,nonfraud_default,friends_count,country_id,name,category_id,invited_flag,made_pmts,missed_pmts,sift_science_score,english_flag,business_years,usd_installment_amount,reserve_fee_pct,sift_labeled_bad,prior_loans,country_internet_users,country_life_expectancy,country_literacy,country_gdppc,about_me_field,about_business_field,proposal_field
0,4863,15031,50.0,422,573.0,835.0,16,0,1,,0,0,0,41.0,32,Kenya,,0,13,0,,1,,,0.0,1,0,46,62,78,1429,My\r\n name is Julius Moturi aged 44 and i run...,My\r\n business revolves around sourcing and s...,The loan will be used in purchasing of drugs a...
1,5687,15031,150.0,748,573.0,835.0,16,0,1,,1,0,1,41.0,32,Kenya,,0,10,8,,1,,,0.0,1,1,46,62,78,1429,My\r\n name is Julius Moturi aged 44 and i run...,My\r\n business revolves around sourcing and s...,The loan will be used to purchase more drugs f...
2,4895,15057,250.0,504,1374.0,627.0,15,0,1,,0,0,0,,32,Kenya,,1,41,0,,1,,,0.0,0,0,46,62,78,1429,I grew up in Nyamira Count in the Republic of ...,I will spend the loan money on purchasing drug...,"First and foremost, i would want to take this ..."
3,10181,15057,244.0,627,1374.0,627.0,15,0,1,,0,0,0,,32,Kenya,9.0,1,35,0,,1,,,0.0,0,1,46,62,78,1429,I grew up in Nyamira Count in the Republic of ...,I will spend the loan money on purchasing drug...,I will spend the loan money on purchasing drug...
4,20720,15057,220.0,627,1374.0,627.0,15,0,1,,0,0,0,,32,Kenya,9.0,1,32,0,0.7,1,,31.08,0.0,0,2,46,62,78,1429,I grew up in Nyamira Count in the Republic of ...,I will spend the loan money on purchasing drug...,I will spend the loan money on purchasing drug...


#### See top tokens in proposal field

In [6]:
get_top_tokens(dat.proposal_field, 20)

Unnamed: 0_level_0,count
token,Unnamed: 1_level_1
ön,18308
être,18307
été,18306
énorme,18305
élevage,18304
électricité,18303
écouler,18302
zte,18301
zorgho,18300
zoomtech,18299


# Vectorize Text Fields

Using this technique called Glove to vectorize text:  http://nlp.stanford.edu/projects/glove/

Using this library:  https://spacy.io/  , where GloVe has been pretrained on the Common Crawl corpus

#### Warning:  computing the vectors can take 15-30 minutes, and is fairly memory hungry; if you are playing with this maybe just skip this part and load from csv

In [7]:
df = attach_vectorized_column(dat, ['about_me_field', 'about_business_field', 'proposal_field'])

parsing about_me_field
parsing about_business_field
parsing proposal_field


**Preview dataset with vectorized columns**

In [9]:
df.head()

Unnamed: 0,id,borrower_id,usd_amount,len_proposal,len_about_me,len_about_business,len_address,missing_natl_id,missing_referred_by,application_time,default_flag,fraud_flag,nonfraud_default,friends_count,country_id,name,category_id,invited_flag,made_pmts,missed_pmts,sift_science_score,english_flag,business_years,usd_installment_amount,reserve_fee_pct,sift_labeled_bad,prior_loans,country_internet_users,country_life_expectancy,country_literacy,country_gdppc,about_me_field,about_business_field,proposal_field,about_me_field_txtvec_1,about_me_field_txtvec_2,about_me_field_txtvec_3,about_me_field_txtvec_4,about_me_field_txtvec_5,about_me_field_txtvec_6,about_me_field_txtvec_7,about_me_field_txtvec_8,about_me_field_txtvec_9,about_me_field_txtvec_10,about_me_field_txtvec_11,about_me_field_txtvec_12,about_me_field_txtvec_13,about_me_field_txtvec_14,about_me_field_txtvec_15,about_me_field_txtvec_16,about_me_field_txtvec_17,about_me_field_txtvec_18,about_me_field_txtvec_19,about_me_field_txtvec_20,about_me_field_txtvec_21,about_me_field_txtvec_22,about_me_field_txtvec_23,about_me_field_txtvec_24,about_me_field_txtvec_25,about_me_field_txtvec_26,about_me_field_txtvec_27,about_me_field_txtvec_28,about_me_field_txtvec_29,about_me_field_txtvec_30,about_me_field_txtvec_31,about_me_field_txtvec_32,about_me_field_txtvec_33,about_me_field_txtvec_34,about_me_field_txtvec_35,about_me_field_txtvec_36,about_me_field_txtvec_37,about_me_field_txtvec_38,about_me_field_txtvec_39,about_me_field_txtvec_40,about_me_field_txtvec_41,about_me_field_txtvec_42,about_me_field_txtvec_43,about_me_field_txtvec_44,about_me_field_txtvec_45,about_me_field_txtvec_46,about_me_field_txtvec_47,about_me_field_txtvec_48,about_me_field_txtvec_49,about_me_field_txtvec_50,about_me_field_txtvec_51,about_me_field_txtvec_52,about_me_field_txtvec_53,about_me_field_txtvec_54,about_me_field_txtvec_55,about_me_field_txtvec_56,about_me_field_txtvec_57,about_me_field_txtvec_58,about_me_field_txtvec_59,about_me_field_txtvec_60,about_me_field_txtvec_61,about_me_field_txtvec_62,about_me_field_txtvec_63,about_me_field_txtvec_64,about_me_field_txtvec_65,about_me_field_txtvec_66,about_me_field_txtvec_67,about_me_field_txtvec_68,about_me_field_txtvec_69,about_me_field_txtvec_70,about_me_field_txtvec_71,about_me_field_txtvec_72,about_me_field_txtvec_73,about_me_field_txtvec_74,about_me_field_txtvec_75,about_me_field_txtvec_76,about_me_field_txtvec_77,about_me_field_txtvec_78,about_me_field_txtvec_79,about_me_field_txtvec_80,about_me_field_txtvec_81,about_me_field_txtvec_82,about_me_field_txtvec_83,about_me_field_txtvec_84,about_me_field_txtvec_85,about_me_field_txtvec_86,about_me_field_txtvec_87,about_me_field_txtvec_88,about_me_field_txtvec_89,about_me_field_txtvec_90,about_me_field_txtvec_91,about_me_field_txtvec_92,about_me_field_txtvec_93,about_me_field_txtvec_94,about_me_field_txtvec_95,about_me_field_txtvec_96,about_me_field_txtvec_97,about_me_field_txtvec_98,about_me_field_txtvec_99,about_me_field_txtvec_100,about_me_field_txtvec_101,about_me_field_txtvec_102,about_me_field_txtvec_103,about_me_field_txtvec_104,about_me_field_txtvec_105,about_me_field_txtvec_106,about_me_field_txtvec_107,about_me_field_txtvec_108,about_me_field_txtvec_109,about_me_field_txtvec_110,about_me_field_txtvec_111,about_me_field_txtvec_112,about_me_field_txtvec_113,about_me_field_txtvec_114,about_me_field_txtvec_115,about_me_field_txtvec_116,about_me_field_txtvec_117,about_me_field_txtvec_118,about_me_field_txtvec_119,about_me_field_txtvec_120,about_me_field_txtvec_121,about_me_field_txtvec_122,about_me_field_txtvec_123,about_me_field_txtvec_124,about_me_field_txtvec_125,about_me_field_txtvec_126,about_me_field_txtvec_127,about_me_field_txtvec_128,about_me_field_txtvec_129,about_me_field_txtvec_130,about_me_field_txtvec_131,about_me_field_txtvec_132,about_me_field_txtvec_133,about_me_field_txtvec_134,about_me_field_txtvec_135,about_me_field_txtvec_136,about_me_field_txtvec_137,about_me_field_txtvec_138,about_me_field_txtvec_139,about_me_field_txtvec_140,about_me_field_txtvec_141,about_me_field_txtvec_142,about_me_field_txtvec_143,about_me_field_txtvec_144,about_me_field_txtvec_145,about_me_field_txtvec_146,about_me_field_txtvec_147,about_me_field_txtvec_148,about_me_field_txtvec_149,about_me_field_txtvec_150,about_me_field_txtvec_151,about_me_field_txtvec_152,about_me_field_txtvec_153,about_me_field_txtvec_154,about_me_field_txtvec_155,about_me_field_txtvec_156,about_me_field_txtvec_157,about_me_field_txtvec_158,about_me_field_txtvec_159,about_me_field_txtvec_160,about_me_field_txtvec_161,about_me_field_txtvec_162,about_me_field_txtvec_163,about_me_field_txtvec_164,about_me_field_txtvec_165,about_me_field_txtvec_166,about_me_field_txtvec_167,about_me_field_txtvec_168,about_me_field_txtvec_169,about_me_field_txtvec_170,about_me_field_txtvec_171,about_me_field_txtvec_172,about_me_field_txtvec_173,about_me_field_txtvec_174,about_me_field_txtvec_175,about_me_field_txtvec_176,about_me_field_txtvec_177,about_me_field_txtvec_178,about_me_field_txtvec_179,about_me_field_txtvec_180,about_me_field_txtvec_181,about_me_field_txtvec_182,about_me_field_txtvec_183,about_me_field_txtvec_184,about_me_field_txtvec_185,about_me_field_txtvec_186,about_me_field_txtvec_187,about_me_field_txtvec_188,about_me_field_txtvec_189,about_me_field_txtvec_190,about_me_field_txtvec_191,about_me_field_txtvec_192,about_me_field_txtvec_193,about_me_field_txtvec_194,about_me_field_txtvec_195,about_me_field_txtvec_196,about_me_field_txtvec_197,about_me_field_txtvec_198,about_me_field_txtvec_199,about_me_field_txtvec_200,about_me_field_txtvec_201,about_me_field_txtvec_202,about_me_field_txtvec_203,about_me_field_txtvec_204,about_me_field_txtvec_205,about_me_field_txtvec_206,about_me_field_txtvec_207,about_me_field_txtvec_208,about_me_field_txtvec_209,about_me_field_txtvec_210,about_me_field_txtvec_211,about_me_field_txtvec_212,about_me_field_txtvec_213,about_me_field_txtvec_214,about_me_field_txtvec_215,about_me_field_txtvec_216,...,proposal_field_txtvec_51,proposal_field_txtvec_52,proposal_field_txtvec_53,proposal_field_txtvec_54,proposal_field_txtvec_55,proposal_field_txtvec_56,proposal_field_txtvec_57,proposal_field_txtvec_58,proposal_field_txtvec_59,proposal_field_txtvec_60,proposal_field_txtvec_61,proposal_field_txtvec_62,proposal_field_txtvec_63,proposal_field_txtvec_64,proposal_field_txtvec_65,proposal_field_txtvec_66,proposal_field_txtvec_67,proposal_field_txtvec_68,proposal_field_txtvec_69,proposal_field_txtvec_70,proposal_field_txtvec_71,proposal_field_txtvec_72,proposal_field_txtvec_73,proposal_field_txtvec_74,proposal_field_txtvec_75,proposal_field_txtvec_76,proposal_field_txtvec_77,proposal_field_txtvec_78,proposal_field_txtvec_79,proposal_field_txtvec_80,proposal_field_txtvec_81,proposal_field_txtvec_82,proposal_field_txtvec_83,proposal_field_txtvec_84,proposal_field_txtvec_85,proposal_field_txtvec_86,proposal_field_txtvec_87,proposal_field_txtvec_88,proposal_field_txtvec_89,proposal_field_txtvec_90,proposal_field_txtvec_91,proposal_field_txtvec_92,proposal_field_txtvec_93,proposal_field_txtvec_94,proposal_field_txtvec_95,proposal_field_txtvec_96,proposal_field_txtvec_97,proposal_field_txtvec_98,proposal_field_txtvec_99,proposal_field_txtvec_100,proposal_field_txtvec_101,proposal_field_txtvec_102,proposal_field_txtvec_103,proposal_field_txtvec_104,proposal_field_txtvec_105,proposal_field_txtvec_106,proposal_field_txtvec_107,proposal_field_txtvec_108,proposal_field_txtvec_109,proposal_field_txtvec_110,proposal_field_txtvec_111,proposal_field_txtvec_112,proposal_field_txtvec_113,proposal_field_txtvec_114,proposal_field_txtvec_115,proposal_field_txtvec_116,proposal_field_txtvec_117,proposal_field_txtvec_118,proposal_field_txtvec_119,proposal_field_txtvec_120,proposal_field_txtvec_121,proposal_field_txtvec_122,proposal_field_txtvec_123,proposal_field_txtvec_124,proposal_field_txtvec_125,proposal_field_txtvec_126,proposal_field_txtvec_127,proposal_field_txtvec_128,proposal_field_txtvec_129,proposal_field_txtvec_130,proposal_field_txtvec_131,proposal_field_txtvec_132,proposal_field_txtvec_133,proposal_field_txtvec_134,proposal_field_txtvec_135,proposal_field_txtvec_136,proposal_field_txtvec_137,proposal_field_txtvec_138,proposal_field_txtvec_139,proposal_field_txtvec_140,proposal_field_txtvec_141,proposal_field_txtvec_142,proposal_field_txtvec_143,proposal_field_txtvec_144,proposal_field_txtvec_145,proposal_field_txtvec_146,proposal_field_txtvec_147,proposal_field_txtvec_148,proposal_field_txtvec_149,proposal_field_txtvec_150,proposal_field_txtvec_151,proposal_field_txtvec_152,proposal_field_txtvec_153,proposal_field_txtvec_154,proposal_field_txtvec_155,proposal_field_txtvec_156,proposal_field_txtvec_157,proposal_field_txtvec_158,proposal_field_txtvec_159,proposal_field_txtvec_160,proposal_field_txtvec_161,proposal_field_txtvec_162,proposal_field_txtvec_163,proposal_field_txtvec_164,proposal_field_txtvec_165,proposal_field_txtvec_166,proposal_field_txtvec_167,proposal_field_txtvec_168,proposal_field_txtvec_169,proposal_field_txtvec_170,proposal_field_txtvec_171,proposal_field_txtvec_172,proposal_field_txtvec_173,proposal_field_txtvec_174,proposal_field_txtvec_175,proposal_field_txtvec_176,proposal_field_txtvec_177,proposal_field_txtvec_178,proposal_field_txtvec_179,proposal_field_txtvec_180,proposal_field_txtvec_181,proposal_field_txtvec_182,proposal_field_txtvec_183,proposal_field_txtvec_184,proposal_field_txtvec_185,proposal_field_txtvec_186,proposal_field_txtvec_187,proposal_field_txtvec_188,proposal_field_txtvec_189,proposal_field_txtvec_190,proposal_field_txtvec_191,proposal_field_txtvec_192,proposal_field_txtvec_193,proposal_field_txtvec_194,proposal_field_txtvec_195,proposal_field_txtvec_196,proposal_field_txtvec_197,proposal_field_txtvec_198,proposal_field_txtvec_199,proposal_field_txtvec_200,proposal_field_txtvec_201,proposal_field_txtvec_202,proposal_field_txtvec_203,proposal_field_txtvec_204,proposal_field_txtvec_205,proposal_field_txtvec_206,proposal_field_txtvec_207,proposal_field_txtvec_208,proposal_field_txtvec_209,proposal_field_txtvec_210,proposal_field_txtvec_211,proposal_field_txtvec_212,proposal_field_txtvec_213,proposal_field_txtvec_214,proposal_field_txtvec_215,proposal_field_txtvec_216,proposal_field_txtvec_217,proposal_field_txtvec_218,proposal_field_txtvec_219,proposal_field_txtvec_220,proposal_field_txtvec_221,proposal_field_txtvec_222,proposal_field_txtvec_223,proposal_field_txtvec_224,proposal_field_txtvec_225,proposal_field_txtvec_226,proposal_field_txtvec_227,proposal_field_txtvec_228,proposal_field_txtvec_229,proposal_field_txtvec_230,proposal_field_txtvec_231,proposal_field_txtvec_232,proposal_field_txtvec_233,proposal_field_txtvec_234,proposal_field_txtvec_235,proposal_field_txtvec_236,proposal_field_txtvec_237,proposal_field_txtvec_238,proposal_field_txtvec_239,proposal_field_txtvec_240,proposal_field_txtvec_241,proposal_field_txtvec_242,proposal_field_txtvec_243,proposal_field_txtvec_244,proposal_field_txtvec_245,proposal_field_txtvec_246,proposal_field_txtvec_247,proposal_field_txtvec_248,proposal_field_txtvec_249,proposal_field_txtvec_250,proposal_field_txtvec_251,proposal_field_txtvec_252,proposal_field_txtvec_253,proposal_field_txtvec_254,proposal_field_txtvec_255,proposal_field_txtvec_256,proposal_field_txtvec_257,proposal_field_txtvec_258,proposal_field_txtvec_259,proposal_field_txtvec_260,proposal_field_txtvec_261,proposal_field_txtvec_262,proposal_field_txtvec_263,proposal_field_txtvec_264,proposal_field_txtvec_265,proposal_field_txtvec_266,proposal_field_txtvec_267,proposal_field_txtvec_268,proposal_field_txtvec_269,proposal_field_txtvec_270,proposal_field_txtvec_271,proposal_field_txtvec_272,proposal_field_txtvec_273,proposal_field_txtvec_274,proposal_field_txtvec_275,proposal_field_txtvec_276,proposal_field_txtvec_277,proposal_field_txtvec_278,proposal_field_txtvec_279,proposal_field_txtvec_280,proposal_field_txtvec_281,proposal_field_txtvec_282,proposal_field_txtvec_283,proposal_field_txtvec_284,proposal_field_txtvec_285,proposal_field_txtvec_286,proposal_field_txtvec_287,proposal_field_txtvec_288,proposal_field_txtvec_289,proposal_field_txtvec_290,proposal_field_txtvec_291,proposal_field_txtvec_292,proposal_field_txtvec_293,proposal_field_txtvec_294,proposal_field_txtvec_295,proposal_field_txtvec_296,proposal_field_txtvec_297,proposal_field_txtvec_298,proposal_field_txtvec_299,proposal_field_txtvec_300
0,4863,15031,50.0,422,573.0,835.0,16,0,1,,0,0,0,41.0,32,Kenya,,0,13,0,,1,,,0.0,1,0,46,62,78,1429,My\r\n name is Julius Moturi aged 44 and i run...,My\r\n business revolves around sourcing and s...,The loan will be used in purchasing of drugs a...,0.005334,0.19704,-0.147241,-0.129046,0.099042,-0.010735,0.022164,-0.147691,0.045266,2.097052,-0.267564,0.08349,0.034284,-0.064016,-0.093086,-0.054804,-0.041256,1.13311,-0.176089,-0.007717,-0.037049,-0.052533,-0.098345,0.000215,0.059247,-0.005509,-0.065463,-0.03557,0.040065,-0.013519,-0.010093,0.06411,-0.026226,0.00332,0.053918,-0.06886,0.018232,-0.029697,-0.079219,-0.028608,0.016786,0.020905,0.044793,-0.108533,-0.020523,0.084645,-0.135869,-0.02641,0.057315,-0.01647,-0.079196,0.0014,-0.007715,-0.034016,0.078058,0.041547,-0.033194,-0.06248,-0.003992,-0.082701,-0.01609,-0.059694,-0.095857,0.165523,0.042999,-0.137183,0.011763,0.103157,0.012608,0.111246,0.013943,0.056732,0.173298,0.025337,0.116038,0.042948,0.10673,-0.01925,-0.08459,0.142018,0.040107,0.094704,-0.096763,-0.008263,0.010075,-0.148585,-0.023273,-0.06644,0.215335,0.055516,-0.09909,0.026575,-0.044171,0.024918,0.149086,0.006738,0.024719,-0.037127,-0.063755,-0.062754,-0.04457,0.040136,-0.082468,-0.077009,0.041507,-0.668004,0.107942,-0.021505,0.059401,-0.057606,0.037934,-0.140673,0.083927,-0.12401,-0.007633,-0.074748,0.018398,0.059864,0.030987,0.004441,0.050851,-0.014981,0.076751,-0.066101,0.044466,0.131618,-0.031273,-0.141945,0.031706,-0.052039,0.008087,-0.025309,-0.126043,0.071689,0.127243,0.019734,-0.045172,0.003638,0.027942,0.005864,-1.076815,0.008661,0.161463,0.016902,-0.015733,-0.037855,-0.08942,0.049407,0.058271,-0.104935,-0.079656,0.018669,0.123772,-0.019618,-0.044159,-0.01092,-0.119686,-0.031629,-0.051848,-0.128813,-0.005021,0.039796,0.006116,-0.054729,-0.072411,-0.148676,0.060309,-0.065373,0.167171,-0.015165,-0.046184,-0.029678,0.097808,-0.049404,-0.07767,0.021992,-0.121999,0.020912,0.033826,0.02801,0.035644,-0.063254,-0.129518,-0.098008,-0.062779,0.001454,-0.068396,-0.028609,0.03409,-0.002351,-0.043037,-0.05068,-0.099302,0.016488,0.046694,0.127623,0.026915,-0.113566,0.035538,0.101193,-0.0242,-0.104954,-0.085387,0.024815,0.138488,0.093079,0.054861,-0.001541,-0.038315,-0.030454,-0.158155,-0.02442,-0.017727,-0.102206,0.055471,0.079703,...,-0.022024,-0.001316,0.008705,-0.022529,0.129659,-0.044189,-0.034186,-0.139172,-0.055848,-0.089801,-0.052343,-0.129641,-0.086181,0.201801,0.093826,-0.129184,-0.00583,0.127544,0.032208,0.077698,0.059653,0.050267,0.192142,-0.002884,0.070535,0.045991,0.1185,-0.076081,-0.069773,0.212751,-0.012546,0.150264,-0.140694,0.029062,0.02348,-0.171741,-0.040265,-0.065481,0.27864,0.072909,-0.135152,0.038192,-0.007742,0.047436,0.056081,0.011197,0.004131,-0.150117,-0.024931,-0.022846,-0.071349,0.024418,-0.097479,-0.062646,0.109573,-0.847528,0.14969,0.040295,0.009372,-0.041261,0.041759,-0.178167,0.139369,-0.097876,0.040461,-0.026092,0.012655,0.037679,0.056507,0.006227,0.080894,-0.032358,0.000656,-0.002274,0.00669,0.067913,-0.049741,-0.106599,0.044316,0.00897,-0.01292,-0.007877,-0.150276,0.074918,0.074909,0.03858,-0.050342,-0.031836,0.008667,-0.012578,-1.077557,0.026014,0.122965,-0.075748,-0.013735,-0.083887,-0.059964,0.077079,0.054489,-0.065728,-0.037905,0.040605,0.025753,-0.017191,-0.031857,-0.081231,-0.105204,0.002481,-0.008021,-0.11127,-0.001633,0.045311,-0.074138,-0.075448,-0.055051,-0.112408,0.110356,0.009867,0.190417,-0.061,0.009775,0.002245,0.091633,-0.092346,-0.006184,0.062603,-0.065325,0.020182,0.022205,0.048158,0.058879,-0.060296,-0.113716,-0.127915,-0.103132,0.000995,-0.096621,-0.070427,0.048946,0.020875,0.011862,0.011928,-0.062683,0.014262,0.064039,0.097164,-0.010283,-0.086218,0.036937,0.176831,0.02611,-0.132224,-0.104781,0.027497,0.133798,0.097097,0.075096,0.007108,0.034191,0.080983,-0.047864,-0.079154,-0.040492,-0.118335,0.072906,0.146449,-0.065933,0.01019,-0.197598,0.028748,0.014248,-0.042142,-0.086577,0.047323,-0.030462,-0.034574,-0.047483,0.169674,-0.026905,-0.034778,-0.073342,-0.000182,0.096918,0.070087,-0.074222,-0.103697,-0.006189,-0.048093,-0.076708,0.066148,0.094176,0.011528,0.045823,0.15234,0.202885,-0.255834,-0.006191,-0.137879,-0.104558,0.108508,0.074289,-0.098639,-0.050595,0.014111,0.108685,0.230513,0.14594,-0.090462,-0.082804,0.019442,0.088175,0.174671,0.040153,0.085895,0.110488,-0.174428,-0.002492,0.074181,0.407439,-0.032295,0.173604,-0.070134,-0.091411,-0.103979,-0.064275,0.013071,0.014751,0.075767,0.083165,0.181637,0.138995,0.017648,-0.007074,0.00239,-0.01432,-0.10838,0.132448,-0.094853,0.108559,-0.041275,-0.245914,0.071075,0.023381,-0.055131,0.009472,-0.066535,-0.052272,-0.069259,-0.012483,0.124293
1,5687,15031,150.0,748,573.0,835.0,16,0,1,,1,0,1,41.0,32,Kenya,,0,10,8,,1,,,0.0,1,1,46,62,78,1429,My\r\n name is Julius Moturi aged 44 and i run...,My\r\n business revolves around sourcing and s...,The loan will be used to purchase more drugs f...,0.005334,0.19704,-0.147241,-0.129046,0.099042,-0.010735,0.022164,-0.147691,0.045266,2.097052,-0.267564,0.08349,0.034284,-0.064016,-0.093086,-0.054804,-0.041256,1.13311,-0.176089,-0.007717,-0.037049,-0.052533,-0.098345,0.000215,0.059247,-0.005509,-0.065463,-0.03557,0.040065,-0.013519,-0.010093,0.06411,-0.026226,0.00332,0.053918,-0.06886,0.018232,-0.029697,-0.079219,-0.028608,0.016786,0.020905,0.044793,-0.108533,-0.020523,0.084645,-0.135869,-0.02641,0.057315,-0.01647,-0.079196,0.0014,-0.007715,-0.034016,0.078058,0.041547,-0.033194,-0.06248,-0.003992,-0.082701,-0.01609,-0.059694,-0.095857,0.165523,0.042999,-0.137183,0.011763,0.103157,0.012608,0.111246,0.013943,0.056732,0.173298,0.025337,0.116038,0.042948,0.10673,-0.01925,-0.08459,0.142018,0.040107,0.094704,-0.096763,-0.008263,0.010075,-0.148585,-0.023273,-0.06644,0.215335,0.055516,-0.09909,0.026575,-0.044171,0.024918,0.149086,0.006738,0.024719,-0.037127,-0.063755,-0.062754,-0.04457,0.040136,-0.082468,-0.077009,0.041507,-0.668004,0.107942,-0.021505,0.059401,-0.057606,0.037934,-0.140673,0.083927,-0.12401,-0.007633,-0.074748,0.018398,0.059864,0.030987,0.004441,0.050851,-0.014981,0.076751,-0.066101,0.044466,0.131618,-0.031273,-0.141945,0.031706,-0.052039,0.008087,-0.025309,-0.126043,0.071689,0.127243,0.019734,-0.045172,0.003638,0.027942,0.005864,-1.076815,0.008661,0.161463,0.016902,-0.015733,-0.037855,-0.08942,0.049407,0.058271,-0.104935,-0.079656,0.018669,0.123772,-0.019618,-0.044159,-0.01092,-0.119686,-0.031629,-0.051848,-0.128813,-0.005021,0.039796,0.006116,-0.054729,-0.072411,-0.148676,0.060309,-0.065373,0.167171,-0.015165,-0.046184,-0.029678,0.097808,-0.049404,-0.07767,0.021992,-0.121999,0.020912,0.033826,0.02801,0.035644,-0.063254,-0.129518,-0.098008,-0.062779,0.001454,-0.068396,-0.028609,0.03409,-0.002351,-0.043037,-0.05068,-0.099302,0.016488,0.046694,0.127623,0.026915,-0.113566,0.035538,0.101193,-0.0242,-0.104954,-0.085387,0.024815,0.138488,0.093079,0.054861,-0.001541,-0.038315,-0.030454,-0.158155,-0.02442,-0.017727,-0.102206,0.055471,0.079703,...,-0.019095,0.01525,0.00917,-0.028067,0.135085,-0.027728,0.013829,-0.100817,-0.011243,-0.04754,0.033016,-0.096565,-0.075504,0.172572,0.127519,-0.083566,-0.010099,0.089549,0.069007,0.033426,0.029014,0.049568,0.153157,-0.039078,0.048918,0.023526,0.082414,-0.05457,0.039455,0.168569,0.031767,0.132833,-0.121885,-0.014393,-0.019247,-0.172302,-0.132338,0.048891,0.217189,0.060537,-0.146157,0.036169,0.002553,0.037539,0.032072,0.004075,0.040086,-0.076691,-0.07638,-0.087458,-0.056089,0.046456,-0.086354,-0.058618,0.108828,-0.993582,0.047754,-0.019102,0.02043,0.011151,0.03526,-0.149874,0.094765,-0.129747,0.048738,-0.064938,0.071385,0.004592,0.028845,0.020498,0.065116,0.006436,-0.0042,-0.044875,0.035897,0.027421,0.00248,-0.076054,0.085214,0.001337,0.085475,-0.064179,-0.086655,0.027906,0.069509,0.05725,-0.00284,-0.100929,0.002093,0.067589,-0.775282,0.043491,0.182926,-0.061236,0.018388,-0.03899,-0.014483,0.01968,0.052552,-0.03906,-0.045021,0.066654,0.079304,-0.033807,-0.077919,-0.033278,-0.034943,-0.018957,-0.077906,-0.109916,0.022904,0.04977,-0.031838,-0.051977,-0.035436,-0.095585,0.153108,-0.035322,0.146603,-0.03208,0.057034,-0.022705,0.069918,-0.01685,-0.000958,0.082599,-0.042301,-0.006397,0.090079,0.024912,0.060438,-0.000576,-0.154753,-0.061783,-0.045193,0.011366,-0.062761,-0.067246,0.047668,0.023611,-0.043945,0.003889,-0.049562,0.001397,-0.030413,0.086224,-0.059898,-0.067752,-0.016239,0.182457,0.039812,-0.12652,-0.074317,0.043387,0.12944,0.033505,0.079823,0.0077,0.031171,0.024271,-0.029889,-0.109458,-0.026195,-0.11853,0.027125,0.005849,-0.039537,0.082941,-0.144742,-0.009685,-0.001085,-0.03644,-0.089924,-0.023567,-0.01412,0.032882,-0.030575,0.132016,-0.045414,-0.007764,-0.100753,0.003241,0.153813,0.057932,-0.027858,-0.048011,0.037504,-0.137985,-0.055761,0.025189,0.057968,0.010911,0.003485,0.105477,0.21131,-0.167269,-0.093932,-0.118246,-0.143667,0.02719,0.078906,-0.064373,-0.082936,0.003923,0.090131,0.281142,0.103831,-0.06787,-0.07832,0.040056,0.057724,0.136847,-0.01725,0.095606,0.040779,-0.110778,-0.021141,0.005366,0.359668,-0.042812,0.041625,-0.057533,-0.086099,-0.132493,-0.085257,-0.006383,-0.023411,0.169605,0.005877,0.1913,0.076348,0.021219,-0.017894,0.028536,-0.054006,-0.074369,0.13861,-0.147411,0.072227,-0.070429,-0.255438,0.023818,0.013614,-0.046869,-0.043032,-0.01837,-0.017136,-0.108482,-0.055714,0.125604
2,4895,15057,250.0,504,1374.0,627.0,15,0,1,,0,0,0,,32,Kenya,,1,41,0,,1,,,0.0,0,0,46,62,78,1429,I grew up in Nyamira Count in the Republic of ...,I will spend the loan money on purchasing drug...,"First and foremost, i would want to take this ...",0.038417,0.164513,-0.111832,-0.081935,0.128803,-0.078371,0.006664,-0.114832,0.031676,2.354455,-0.253142,0.057994,0.106669,-0.06202,-0.102681,-0.030842,-0.057929,1.221631,-0.218381,-0.005046,-0.017087,-0.03551,-0.117706,-0.012467,0.02852,0.053052,-0.066305,-0.041961,0.052694,-0.033234,-0.014114,0.053101,-0.011042,0.051555,0.048095,-0.093969,-0.011782,0.042241,-0.018971,-0.047216,-0.012297,0.077624,0.022149,-0.091148,-0.000921,0.119184,-0.102335,-0.007249,0.024746,-0.007759,-0.042129,-0.005203,0.016776,-0.014619,0.060122,0.028898,-0.019165,-0.076582,0.016074,-0.07626,-0.028666,-0.066493,-0.056011,0.139648,0.071138,-0.058063,0.052449,0.058963,0.022811,0.112718,0.004613,0.013714,0.170754,-0.027473,0.170694,0.07644,0.115822,-0.028939,-0.076933,0.18078,0.036058,0.117405,-0.130482,0.0018,0.003661,-0.153519,-0.111258,-0.000617,0.221764,0.064547,-0.103099,0.012006,-0.037631,-0.005444,0.0634,0.010092,0.05429,-0.056257,-0.073588,-0.014857,-0.0199,0.022013,-0.070672,-0.086591,0.051307,-0.767281,0.122446,-0.004454,0.018477,-0.017697,0.032386,-0.173693,0.130983,-0.075453,0.008979,-0.062223,0.07953,0.025901,0.035396,0.003548,0.132765,0.020993,0.052548,-0.056246,0.05581,0.050288,-0.001898,-0.101173,0.018042,0.001081,0.0015,-0.041642,-0.136178,0.07621,0.104434,0.017809,-0.000749,-0.000122,-0.010682,0.057856,-1.016452,0.057622,0.12217,-0.031639,-0.002283,-0.021854,-0.092124,0.04732,0.01425,-0.084896,-0.063259,0.053076,0.088921,-0.012714,-0.057787,-0.014756,-0.059405,-0.037225,-0.020943,-0.124591,0.008437,0.036292,-0.028754,-0.113359,-0.066873,-0.118796,0.078769,-0.057303,0.151031,-0.031428,-0.021232,-0.052563,0.07962,-0.070862,-0.074706,0.051276,-0.118108,0.027809,0.046322,0.045813,0.007074,-0.047391,-0.107277,-0.073769,-0.001932,-0.027334,-0.06848,-0.041887,0.063737,0.036883,-0.026534,-0.011857,-0.068036,0.014665,-0.002954,0.155614,0.004906,-0.079995,-0.001266,0.117209,0.017416,-0.126173,-0.091509,0.011609,0.19768,0.039106,0.055737,0.003928,-0.00429,-0.008478,-0.100369,-0.101852,-0.033223,-0.117237,0.055541,0.062612,...,-0.084264,0.007923,-0.026053,-0.088599,0.089559,-0.007502,-0.012213,-0.073939,-0.02679,-0.069676,-0.038613,-0.069222,-0.115138,0.196684,0.085332,-0.083998,0.022691,0.117182,0.116077,0.04706,0.105805,0.100738,0.236422,-0.046038,0.111896,0.036752,0.091095,-0.074996,-0.049345,0.166083,-0.02238,0.083555,-0.183497,0.011674,0.009683,-0.175927,-0.064966,-0.101767,0.297643,0.090603,-0.107307,-0.003266,-0.010309,0.146248,0.121465,-0.006985,0.029847,-0.098783,-0.046463,-0.028131,-0.027805,-0.013298,-0.049572,-0.070311,0.121246,-0.928927,0.10999,-0.017258,-0.006624,0.009372,0.039119,-0.162992,0.128491,-0.121845,0.021277,-0.092859,0.013435,0.000346,0.007537,0.012276,0.120032,-0.091078,0.010533,0.061048,0.096909,0.110513,-0.058741,-0.089451,0.03505,0.024912,0.009649,-0.004219,-0.15946,0.106656,0.086233,0.082299,-0.00323,-0.061903,0.009087,0.03603,-1.087809,0.065762,0.092865,-0.055619,0.00844,-0.12454,-0.078246,0.048492,-0.025508,-0.039102,-0.006363,0.039648,0.049517,-0.011435,-0.066486,-0.044979,-0.089192,-0.035131,-0.051342,-0.106646,0.000441,0.054787,-0.069838,-0.028131,-0.066909,-0.169915,0.088803,-0.034351,0.162643,-0.007506,-0.017037,-0.004065,0.153698,-0.135588,-0.031087,0.037768,-0.037589,0.051633,0.041449,0.049604,0.004287,0.001509,-0.103156,-0.053885,-0.02342,-0.019545,-0.078411,-0.075095,-0.014008,0.06767,0.020109,0.010322,-0.052664,-0.021779,0.056689,0.069094,-0.040105,-0.102341,0.012556,0.230285,-0.030626,-0.143035,-0.048921,-0.024056,0.172995,0.088118,0.017212,0.019319,0.033649,0.050245,0.000986,-0.030394,-0.041711,-0.128977,-0.016142,0.171795,-0.027095,0.024978,-0.183755,0.023331,0.021277,-0.055261,-0.085385,0.097107,-0.011071,0.010749,-0.071103,0.139109,-0.002918,-0.035572,-0.137951,-0.003563,0.024734,0.169537,-0.005618,-0.085393,-0.029762,-0.096814,-0.067237,0.129004,0.108298,-0.000169,-0.016136,0.141912,0.19465,-0.185571,-0.092099,-0.093298,-0.129224,0.174771,0.040181,-0.072567,-0.109484,0.015929,0.072133,0.175306,0.146926,-0.065315,-0.05204,0.035814,0.093498,0.147029,0.011992,0.086776,0.074659,-0.106588,-0.028891,0.093391,0.442776,0.050627,0.169051,-0.069812,-0.077934,-0.12544,-0.064273,0.014719,0.056409,0.141672,0.026448,0.189304,0.167604,0.060695,0.006295,0.001526,-0.017919,-0.104837,0.133654,-0.059386,0.147204,-0.006777,-0.207704,0.085733,0.003126,-0.039443,0.083205,-0.095055,-0.031572,-0.072249,0.058814,0.119693
3,10181,15057,244.0,627,1374.0,627.0,15,0,1,,0,0,0,,32,Kenya,9.0,1,35,0,,1,,,0.0,0,1,46,62,78,1429,I grew up in Nyamira Count in the Republic of ...,I will spend the loan money on purchasing drug...,I will spend the loan money on purchasing drug...,0.038417,0.164513,-0.111832,-0.081935,0.128803,-0.078371,0.006664,-0.114832,0.031676,2.354455,-0.253142,0.057994,0.106669,-0.06202,-0.102681,-0.030842,-0.057929,1.221631,-0.218381,-0.005046,-0.017087,-0.03551,-0.117706,-0.012467,0.02852,0.053052,-0.066305,-0.041961,0.052694,-0.033234,-0.014114,0.053101,-0.011042,0.051555,0.048095,-0.093969,-0.011782,0.042241,-0.018971,-0.047216,-0.012297,0.077624,0.022149,-0.091148,-0.000921,0.119184,-0.102335,-0.007249,0.024746,-0.007759,-0.042129,-0.005203,0.016776,-0.014619,0.060122,0.028898,-0.019165,-0.076582,0.016074,-0.07626,-0.028666,-0.066493,-0.056011,0.139648,0.071138,-0.058063,0.052449,0.058963,0.022811,0.112718,0.004613,0.013714,0.170754,-0.027473,0.170694,0.07644,0.115822,-0.028939,-0.076933,0.18078,0.036058,0.117405,-0.130482,0.0018,0.003661,-0.153519,-0.111258,-0.000617,0.221764,0.064547,-0.103099,0.012006,-0.037631,-0.005444,0.0634,0.010092,0.05429,-0.056257,-0.073588,-0.014857,-0.0199,0.022013,-0.070672,-0.086591,0.051307,-0.767281,0.122446,-0.004454,0.018477,-0.017697,0.032386,-0.173693,0.130983,-0.075453,0.008979,-0.062223,0.07953,0.025901,0.035396,0.003548,0.132765,0.020993,0.052548,-0.056246,0.05581,0.050288,-0.001898,-0.101173,0.018042,0.001081,0.0015,-0.041642,-0.136178,0.07621,0.104434,0.017809,-0.000749,-0.000122,-0.010682,0.057856,-1.016452,0.057622,0.12217,-0.031639,-0.002283,-0.021854,-0.092124,0.04732,0.01425,-0.084896,-0.063259,0.053076,0.088921,-0.012714,-0.057787,-0.014756,-0.059405,-0.037225,-0.020943,-0.124591,0.008437,0.036292,-0.028754,-0.113359,-0.066873,-0.118796,0.078769,-0.057303,0.151031,-0.031428,-0.021232,-0.052563,0.07962,-0.070862,-0.074706,0.051276,-0.118108,0.027809,0.046322,0.045813,0.007074,-0.047391,-0.107277,-0.073769,-0.001932,-0.027334,-0.06848,-0.041887,0.063737,0.036883,-0.026534,-0.011857,-0.068036,0.014665,-0.002954,0.155614,0.004906,-0.079995,-0.001266,0.117209,0.017416,-0.126173,-0.091509,0.011609,0.19768,0.039106,0.055737,0.003928,-0.00429,-0.008478,-0.100369,-0.101852,-0.033223,-0.117237,0.055541,0.062612,...,-0.047325,0.005186,0.006221,-0.039794,0.056833,0.009812,-0.024757,-0.138363,-0.050875,-0.137443,-0.014637,-0.07604,-0.022134,0.202519,0.145874,-0.104954,-0.045015,0.083879,0.059911,0.015732,0.031884,0.047042,0.21286,-0.038879,0.107402,0.076282,0.136524,-0.048363,0.030751,0.171611,-0.012545,0.066124,-0.121426,0.02937,0.00728,-0.159349,0.053419,-0.074619,0.36116,0.063032,-0.047909,-0.070505,0.015151,-0.009313,0.039609,-0.05318,0.048445,-0.054045,-0.074258,-0.042303,-0.031114,0.039182,-0.085443,-0.017696,0.048315,-0.669447,0.150421,-0.005372,0.00356,0.031739,0.030371,-0.206197,0.146891,-0.076542,0.02785,-0.121265,0.027868,-0.102027,0.053224,0.00857,0.070011,0.008036,0.031634,0.048224,-0.007451,0.084008,-0.013305,-0.06875,0.013641,-0.050421,0.114058,-0.088033,-0.132382,0.078345,0.137894,0.011811,-0.073056,-0.058963,-0.011405,-0.017725,-0.857099,0.122318,0.110496,-0.014863,0.009683,-0.031834,0.007712,0.094548,-0.006873,-0.042445,0.035769,0.03961,0.110877,0.054038,-0.025917,-0.057788,-0.076757,0.00874,-0.078336,-0.09063,0.031117,0.039168,-0.051037,-0.090037,-0.029182,-0.033445,0.13127,-0.051365,0.134262,-0.00414,0.053227,-0.02475,0.040731,-0.023367,-0.044129,0.046712,-0.037563,0.024654,0.053725,0.005986,0.073053,-0.013199,-0.121896,0.002271,-0.048146,-0.003956,-0.070042,-0.063442,-0.007585,0.02855,0.048381,0.03552,-0.05177,-0.035579,-0.006042,0.11562,0.001423,-0.033045,-0.017218,0.096183,0.064408,-0.145542,-0.072704,0.012916,0.097437,0.048224,-0.005682,-0.016486,0.015149,0.03121,-0.028565,-0.060433,-0.057731,-0.10363,0.01062,0.040131,-0.04508,-0.067087,-0.149827,0.104823,-0.057915,-0.010599,-0.140428,-0.045723,-0.042329,0.002648,0.014009,0.099857,-0.009144,-0.044339,-0.076249,0.026002,0.120089,0.078027,-0.02761,-0.085675,0.002765,-0.124841,-0.076962,0.042863,-0.004066,-0.018882,0.007637,0.076479,0.164104,-0.168996,-0.02334,-0.055464,-0.130926,0.106574,0.045066,-0.074815,-0.076123,-0.037801,0.076409,0.231813,0.009032,-0.115723,-0.048555,0.049379,0.087939,0.084619,0.010298,0.102466,0.063274,-0.124404,-0.023333,0.04758,0.316704,-0.02874,0.105763,-0.090604,-0.059083,-0.057665,-0.066831,-0.053071,-0.07269,0.078708,0.009933,0.166282,0.072077,0.000364,0.001178,-0.029351,-0.038755,-0.050794,0.108757,-0.09522,0.100103,0.020906,-0.199675,0.03284,0.025781,-0.012515,-0.009813,0.0086,-0.038415,-0.127675,0.012516,0.054503
4,20720,15057,220.0,627,1374.0,627.0,15,0,1,,0,0,0,,32,Kenya,9.0,1,32,0,0.7,1,,31.08,0.0,0,2,46,62,78,1429,I grew up in Nyamira Count in the Republic of ...,I will spend the loan money on purchasing drug...,I will spend the loan money on purchasing drug...,0.038417,0.164513,-0.111832,-0.081935,0.128803,-0.078371,0.006664,-0.114832,0.031676,2.354455,-0.253142,0.057994,0.106669,-0.06202,-0.102681,-0.030842,-0.057929,1.221631,-0.218381,-0.005046,-0.017087,-0.03551,-0.117706,-0.012467,0.02852,0.053052,-0.066305,-0.041961,0.052694,-0.033234,-0.014114,0.053101,-0.011042,0.051555,0.048095,-0.093969,-0.011782,0.042241,-0.018971,-0.047216,-0.012297,0.077624,0.022149,-0.091148,-0.000921,0.119184,-0.102335,-0.007249,0.024746,-0.007759,-0.042129,-0.005203,0.016776,-0.014619,0.060122,0.028898,-0.019165,-0.076582,0.016074,-0.07626,-0.028666,-0.066493,-0.056011,0.139648,0.071138,-0.058063,0.052449,0.058963,0.022811,0.112718,0.004613,0.013714,0.170754,-0.027473,0.170694,0.07644,0.115822,-0.028939,-0.076933,0.18078,0.036058,0.117405,-0.130482,0.0018,0.003661,-0.153519,-0.111258,-0.000617,0.221764,0.064547,-0.103099,0.012006,-0.037631,-0.005444,0.0634,0.010092,0.05429,-0.056257,-0.073588,-0.014857,-0.0199,0.022013,-0.070672,-0.086591,0.051307,-0.767281,0.122446,-0.004454,0.018477,-0.017697,0.032386,-0.173693,0.130983,-0.075453,0.008979,-0.062223,0.07953,0.025901,0.035396,0.003548,0.132765,0.020993,0.052548,-0.056246,0.05581,0.050288,-0.001898,-0.101173,0.018042,0.001081,0.0015,-0.041642,-0.136178,0.07621,0.104434,0.017809,-0.000749,-0.000122,-0.010682,0.057856,-1.016452,0.057622,0.12217,-0.031639,-0.002283,-0.021854,-0.092124,0.04732,0.01425,-0.084896,-0.063259,0.053076,0.088921,-0.012714,-0.057787,-0.014756,-0.059405,-0.037225,-0.020943,-0.124591,0.008437,0.036292,-0.028754,-0.113359,-0.066873,-0.118796,0.078769,-0.057303,0.151031,-0.031428,-0.021232,-0.052563,0.07962,-0.070862,-0.074706,0.051276,-0.118108,0.027809,0.046322,0.045813,0.007074,-0.047391,-0.107277,-0.073769,-0.001932,-0.027334,-0.06848,-0.041887,0.063737,0.036883,-0.026534,-0.011857,-0.068036,0.014665,-0.002954,0.155614,0.004906,-0.079995,-0.001266,0.117209,0.017416,-0.126173,-0.091509,0.011609,0.19768,0.039106,0.055737,0.003928,-0.00429,-0.008478,-0.100369,-0.101852,-0.033223,-0.117237,0.055541,0.062612,...,-0.047325,0.005186,0.006221,-0.039794,0.056833,0.009812,-0.024757,-0.138363,-0.050875,-0.137443,-0.014637,-0.07604,-0.022134,0.202519,0.145874,-0.104954,-0.045015,0.083879,0.059911,0.015732,0.031884,0.047042,0.21286,-0.038879,0.107402,0.076282,0.136524,-0.048363,0.030751,0.171611,-0.012545,0.066124,-0.121426,0.02937,0.00728,-0.159349,0.053419,-0.074619,0.36116,0.063032,-0.047909,-0.070505,0.015151,-0.009313,0.039609,-0.05318,0.048445,-0.054045,-0.074258,-0.042303,-0.031114,0.039182,-0.085443,-0.017696,0.048315,-0.669447,0.150421,-0.005372,0.00356,0.031739,0.030371,-0.206197,0.146891,-0.076542,0.02785,-0.121265,0.027868,-0.102027,0.053224,0.00857,0.070011,0.008036,0.031634,0.048224,-0.007451,0.084008,-0.013305,-0.06875,0.013641,-0.050421,0.114058,-0.088033,-0.132382,0.078345,0.137894,0.011811,-0.073056,-0.058963,-0.011405,-0.017725,-0.857099,0.122318,0.110496,-0.014863,0.009683,-0.031834,0.007712,0.094548,-0.006873,-0.042445,0.035769,0.03961,0.110877,0.054038,-0.025917,-0.057788,-0.076757,0.00874,-0.078336,-0.09063,0.031117,0.039168,-0.051037,-0.090037,-0.029182,-0.033445,0.13127,-0.051365,0.134262,-0.00414,0.053227,-0.02475,0.040731,-0.023367,-0.044129,0.046712,-0.037563,0.024654,0.053725,0.005986,0.073053,-0.013199,-0.121896,0.002271,-0.048146,-0.003956,-0.070042,-0.063442,-0.007585,0.02855,0.048381,0.03552,-0.05177,-0.035579,-0.006042,0.11562,0.001423,-0.033045,-0.017218,0.096183,0.064408,-0.145542,-0.072704,0.012916,0.097437,0.048224,-0.005682,-0.016486,0.015149,0.03121,-0.028565,-0.060433,-0.057731,-0.10363,0.01062,0.040131,-0.04508,-0.067087,-0.149827,0.104823,-0.057915,-0.010599,-0.140428,-0.045723,-0.042329,0.002648,0.014009,0.099857,-0.009144,-0.044339,-0.076249,0.026002,0.120089,0.078027,-0.02761,-0.085675,0.002765,-0.124841,-0.076962,0.042863,-0.004066,-0.018882,0.007637,0.076479,0.164104,-0.168996,-0.02334,-0.055464,-0.130926,0.106574,0.045066,-0.074815,-0.076123,-0.037801,0.076409,0.231813,0.009032,-0.115723,-0.048555,0.049379,0.087939,0.084619,0.010298,0.102466,0.063274,-0.124404,-0.023333,0.04758,0.316704,-0.02874,0.105763,-0.090604,-0.059083,-0.057665,-0.066831,-0.053071,-0.07269,0.078708,0.009933,0.166282,0.072077,0.000364,0.001178,-0.029351,-0.038755,-0.050794,0.108757,-0.09522,0.100103,0.020906,-0.199675,0.03284,0.025781,-0.012515,-0.009813,0.0086,-0.038415,-0.127675,0.012516,0.054503


# Prepare data for DataRobot and Upload

#### Drop irrelevant columns, check size of data

In [37]:
cols_to_drop = ['missed_pmts', 'nonfraud_default', 'sift_labeled_bad', 'sift_science_score', 
                'made_pmts', 'fraud_flag', 'reserve_fee_pct','country_id']
final_df = df.drop(cols_to_drop, axis=1)

In [38]:
#Combine the ids to obtain a unique user id
final_df['unique_id'] = final_df.borrower_id.astype(str) +'__'+ final_df.id.astype(str)
final_df.drop(['borrower_id', 'id'], axis = 1, inplace=True)

#### Save Data

In [None]:
file_path = '/Users/hamelhusain/Google Drive/Team Shared Folder - Zidisha/Data/TextVec_Experiment.gz'
final_df.to_csv(file_path, compression = 'gzip', index=False)

#### Upload to DataRobot

In [None]:
import datarobot as dr
proj = dr.Project.create(file_path, project_name='Text Vector With Text')

In [None]:
proj.set_target(target = 'default_flag', 
                metric = 'AUC', 
                mode = dr.AUTOPILOT_MODE.QUICK,
                worker_count = 25)

#### Alternate version without original text fields

In [None]:
final_df_notext = final_df.drop(['about_me_field', 'about_business_field', 'proposal_field'], axis =1)
proj2 = dr.Project.create(final_df_notext, project_name='Text Vector No Text')
proj2.set_target(target = 'default_flag', 
                metric = 'AUC', 
                mode = dr.AUTOPILOT_MODE.QUICK,
                worker_count = 25)