Background for those reading: http://nlp.stanford.edu/projects/glove/

## Import Libraries and Data

In [7]:
import spacy
import pandas as pd
import numpy as np
import re
from sklearn.decomposition import PCA
#For counting of tokens
from sklearn.feature_extraction.text import CountVectorizer
vect = CountVectorizer(stop_words = 'english', max_df = 2)
#Load english corpus
en_nlp = spacy.load('en')
#Add additional stopwords
more_stopwords = ['ziisha', 'ziidisha', 'zidsisha',
       'zidishq', 'zidishia', 'zidishi', 'zidisher', 'zidishans',
       'zidisaha', 'zidisa', 'zidihsa', 'zididsha', 'zididisha', 'zididha',
       'zidiasha', 'ziddisha', 'zdisha', 'loans', 'loan', 'thank', 'you', 
        'business', 'funding', 'fund']
for word in more_stopwords:
    en_nlp.vocab[word].is_stop = True
#Load data from Stefan
dat = pd.read_csv('Regression_Dataset_20161221(na).csv')
#so you can see more columns
pd.options.display.max_columns = 500
pd.options.display.max_rows = 200

%matplotlib inline

## Define Helper Functions

In [31]:
def get_top_tokens(pandas_series, n=200):
    """
    gives you a count of most common tokens in text field
    """
    
    vect.fit(pandas_series.dropna())
    counts = list(vect.vocabulary_.items())
    df = pd.DataFrame(counts, columns=['token', 'count']).set_index('token').sort_values('count', ascending=False)
    return df.head(n)


def clean_text(text_string):
    """ 
    remove punctuation, convert to lowercase and remove stop words
    """
    doc =  en_nlp(text_string)
    return(' '.join([token.text.lower() for token in doc if token.is_stop == False and token.is_punct ==False]))


def vectorize_text(text_string, clean=True):
    """
    return GloVe vectors for words, document vector is the average of the word vectors
    reference: http://nlp.stanford.edu/projects/glove/
    """
    if type(text_string) != str:
        return np.zeros(300)  # return array of 300 zeros as built-in GloVe embedding has 300 dimensions
    
    if clean:
        text_string = clean_text(text_string)
        
    doc =  en_nlp(text_string)
    return np.round(doc.vector, 3) #reduce size of datatset by reducing significant digits

In [32]:
def attach_vectorized_column(df, column_name_list, clean=True):
    """
    append vectorized GloVe columns to dataframe
    
    warning: will add 300 columns for each text field
    """
    dataframes = []
    dataframes.append(df)
    
    for column_name in column_name_list:
        print('parsing {}'.format(column_name))
        
        col_names = [column_name+'_txtvec_'+str(x) for x in range(1, 301)]
        
        vecdf = pd.DataFrame.from_records((df[column_name].
        apply(lambda x: vectorize_text(x, clean=clean))),
        columns = col_names)
        
        dataframes.append(vecdf)
    
    return pd.concat(dataframes, axis = 1)

In [33]:
def attach_vectorized_column_summed(df, column_name_list, clean=True):
    """
    Sum all the vectors of the text fields as form of dimensionality reduction
    
    will only add 300 features summarizing all text, not 300 for each text field.  
    """
    vecs = []
    new_col_names = ['sum_txtvec_'+str(x) for x in range(1, 301)]

    for column_name in column_name_list:
        print('parsing {}'.format(column_name))
        vecs.append(df[column_name].apply(lambda x: vectorize_text(x, clean=clean)))
    
    combined_vecdf = pd.DataFrame.from_records(sum(vecs), 
                                               columns = new_col_names)
    
    return pd.concat([df, combined_vecdf], axis = 1)

# Preview Dataset

#### See top tokens in proposal field

In [6]:
get_top_tokens(dat.proposal_field, 20)

Unnamed: 0_level_0,count
token,Unnamed: 1_level_1
ön,18308
être,18307
été,18306
énorme,18305
élevage,18304
électricité,18303
écouler,18302
zte,18301
zorgho,18300
zoomtech,18299


# Vectorize Text Fields

Using this technique called Glove to vectorize text:  http://nlp.stanford.edu/projects/glove/

Using this library:  https://spacy.io/  , where GloVe has been pretrained on the Common Crawl corpus

#### Warning:  computing the vectors can take 15-30 minutes, and is fairly memory hungry; if you are playing with this maybe just skip this part and load from csv

In [7]:
df = attach_vectorized_column(dat, ['about_me_field', 'about_business_field', 'proposal_field'])

parsing about_me_field
parsing about_business_field
parsing proposal_field


**Preview dataset with vectorized columns**

In [9]:
df.head()

Unnamed: 0,id,borrower_id,usd_amount,len_proposal,len_about_me,len_about_business,len_address,missing_natl_id,missing_referred_by,application_time,default_flag,fraud_flag,nonfraud_default,friends_count,country_id,name,category_id,invited_flag,made_pmts,missed_pmts,sift_science_score,english_flag,business_years,usd_installment_amount,reserve_fee_pct,sift_labeled_bad,prior_loans,country_internet_users,country_life_expectancy,country_literacy,country_gdppc,about_me_field,about_business_field,proposal_field,about_me_field_txtvec_1,about_me_field_txtvec_2,about_me_field_txtvec_3,about_me_field_txtvec_4,about_me_field_txtvec_5,about_me_field_txtvec_6,about_me_field_txtvec_7,about_me_field_txtvec_8,about_me_field_txtvec_9,about_me_field_txtvec_10,about_me_field_txtvec_11,about_me_field_txtvec_12,about_me_field_txtvec_13,about_me_field_txtvec_14,about_me_field_txtvec_15,about_me_field_txtvec_16,about_me_field_txtvec_17,about_me_field_txtvec_18,about_me_field_txtvec_19,about_me_field_txtvec_20,about_me_field_txtvec_21,about_me_field_txtvec_22,about_me_field_txtvec_23,about_me_field_txtvec_24,about_me_field_txtvec_25,about_me_field_txtvec_26,about_me_field_txtvec_27,about_me_field_txtvec_28,about_me_field_txtvec_29,about_me_field_txtvec_30,about_me_field_txtvec_31,about_me_field_txtvec_32,about_me_field_txtvec_33,about_me_field_txtvec_34,about_me_field_txtvec_35,about_me_field_txtvec_36,about_me_field_txtvec_37,about_me_field_txtvec_38,about_me_field_txtvec_39,about_me_field_txtvec_40,about_me_field_txtvec_41,about_me_field_txtvec_42,about_me_field_txtvec_43,about_me_field_txtvec_44,about_me_field_txtvec_45,about_me_field_txtvec_46,about_me_field_txtvec_47,about_me_field_txtvec_48,about_me_field_txtvec_49,about_me_field_txtvec_50,about_me_field_txtvec_51,about_me_field_txtvec_52,about_me_field_txtvec_53,about_me_field_txtvec_54,about_me_field_txtvec_55,about_me_field_txtvec_56,about_me_field_txtvec_57,about_me_field_txtvec_58,about_me_field_txtvec_59,about_me_field_txtvec_60,about_me_field_txtvec_61,about_me_field_txtvec_62,about_me_field_txtvec_63,about_me_field_txtvec_64,about_me_field_txtvec_65,about_me_field_txtvec_66,about_me_field_txtvec_67,about_me_field_txtvec_68,about_me_field_txtvec_69,about_me_field_txtvec_70,about_me_field_txtvec_71,about_me_field_txtvec_72,about_me_field_txtvec_73,about_me_field_txtvec_74,about_me_field_txtvec_75,about_me_field_txtvec_76,about_me_field_txtvec_77,about_me_field_txtvec_78,about_me_field_txtvec_79,about_me_field_txtvec_80,about_me_field_txtvec_81,about_me_field_txtvec_82,about_me_field_txtvec_83,about_me_field_txtvec_84,about_me_field_txtvec_85,about_me_field_txtvec_86,about_me_field_txtvec_87,about_me_field_txtvec_88,about_me_field_txtvec_89,about_me_field_txtvec_90,about_me_field_txtvec_91,about_me_field_txtvec_92,about_me_field_txtvec_93,about_me_field_txtvec_94,about_me_field_txtvec_95,about_me_field_txtvec_96,about_me_field_txtvec_97,about_me_field_txtvec_98,about_me_field_txtvec_99,about_me_field_txtvec_100,about_me_field_txtvec_101,about_me_field_txtvec_102,about_me_field_txtvec_103,about_me_field_txtvec_104,about_me_field_txtvec_105,about_me_field_txtvec_106,about_me_field_txtvec_107,about_me_field_txtvec_108,about_me_field_txtvec_109,about_me_field_txtvec_110,about_me_field_txtvec_111,about_me_field_txtvec_112,about_me_field_txtvec_113,about_me_field_txtvec_114,about_me_field_txtvec_115,about_me_field_txtvec_116,about_me_field_txtvec_117,about_me_field_txtvec_118,about_me_field_txtvec_119,about_me_field_txtvec_120,about_me_field_txtvec_121,about_me_field_txtvec_122,about_me_field_txtvec_123,about_me_field_txtvec_124,about_me_field_txtvec_125,about_me_field_txtvec_126,about_me_field_txtvec_127,about_me_field_txtvec_128,about_me_field_txtvec_129,about_me_field_txtvec_130,about_me_field_txtvec_131,about_me_field_txtvec_132,about_me_field_txtvec_133,about_me_field_txtvec_134,about_me_field_txtvec_135,about_me_field_txtvec_136,about_me_field_txtvec_137,about_me_field_txtvec_138,about_me_field_txtvec_139,about_me_field_txtvec_140,about_me_field_txtvec_141,about_me_field_txtvec_142,about_me_field_txtvec_143,about_me_field_txtvec_144,about_me_field_txtvec_145,about_me_field_txtvec_146,about_me_field_txtvec_147,about_me_field_txtvec_148,about_me_field_txtvec_149,about_me_field_txtvec_150,about_me_field_txtvec_151,about_me_field_txtvec_152,about_me_field_txtvec_153,about_me_field_txtvec_154,about_me_field_txtvec_155,about_me_field_txtvec_156,about_me_field_txtvec_157,about_me_field_txtvec_158,about_me_field_txtvec_159,about_me_field_txtvec_160,about_me_field_txtvec_161,about_me_field_txtvec_162,about_me_field_txtvec_163,about_me_field_txtvec_164,about_me_field_txtvec_165,about_me_field_txtvec_166,about_me_field_txtvec_167,about_me_field_txtvec_168,about_me_field_txtvec_169,about_me_field_txtvec_170,about_me_field_txtvec_171,about_me_field_txtvec_172,about_me_field_txtvec_173,about_me_field_txtvec_174,about_me_field_txtvec_175,about_me_field_txtvec_176,about_me_field_txtvec_177,about_me_field_txtvec_178,about_me_field_txtvec_179,about_me_field_txtvec_180,about_me_field_txtvec_181,about_me_field_txtvec_182,about_me_field_txtvec_183,about_me_field_txtvec_184,about_me_field_txtvec_185,about_me_field_txtvec_186,about_me_field_txtvec_187,about_me_field_txtvec_188,about_me_field_txtvec_189,about_me_field_txtvec_190,about_me_field_txtvec_191,about_me_field_txtvec_192,about_me_field_txtvec_193,about_me_field_txtvec_194,about_me_field_txtvec_195,about_me_field_txtvec_196,about_me_field_txtvec_197,about_me_field_txtvec_198,about_me_field_txtvec_199,about_me_field_txtvec_200,about_me_field_txtvec_201,about_me_field_txtvec_202,about_me_field_txtvec_203,about_me_field_txtvec_204,about_me_field_txtvec_205,about_me_field_txtvec_206,about_me_field_txtvec_207,about_me_field_txtvec_208,about_me_field_txtvec_209,about_me_field_txtvec_210,about_me_field_txtvec_211,about_me_field_txtvec_212,about_me_field_txtvec_213,about_me_field_txtvec_214,about_me_field_txtvec_215,about_me_field_txtvec_216,...,proposal_field_txtvec_51,proposal_field_txtvec_52,proposal_field_txtvec_53,proposal_field_txtvec_54,proposal_field_txtvec_55,proposal_field_txtvec_56,proposal_field_txtvec_57,proposal_field_txtvec_58,proposal_field_txtvec_59,proposal_field_txtvec_60,proposal_field_txtvec_61,proposal_field_txtvec_62,proposal_field_txtvec_63,proposal_field_txtvec_64,proposal_field_txtvec_65,proposal_field_txtvec_66,proposal_field_txtvec_67,proposal_field_txtvec_68,proposal_field_txtvec_69,proposal_field_txtvec_70,proposal_field_txtvec_71,proposal_field_txtvec_72,proposal_field_txtvec_73,proposal_field_txtvec_74,proposal_field_txtvec_75,proposal_field_txtvec_76,proposal_field_txtvec_77,proposal_field_txtvec_78,proposal_field_txtvec_79,proposal_field_txtvec_80,proposal_field_txtvec_81,proposal_field_txtvec_82,proposal_field_txtvec_83,proposal_field_txtvec_84,proposal_field_txtvec_85,proposal_field_txtvec_86,proposal_field_txtvec_87,proposal_field_txtvec_88,proposal_field_txtvec_89,proposal_field_txtvec_90,proposal_field_txtvec_91,proposal_field_txtvec_92,proposal_field_txtvec_93,proposal_field_txtvec_94,proposal_field_txtvec_95,proposal_field_txtvec_96,proposal_field_txtvec_97,proposal_field_txtvec_98,proposal_field_txtvec_99,proposal_field_txtvec_100,proposal_field_txtvec_101,proposal_field_txtvec_102,proposal_field_txtvec_103,proposal_field_txtvec_104,proposal_field_txtvec_105,proposal_field_txtvec_106,proposal_field_txtvec_107,proposal_field_txtvec_108,proposal_field_txtvec_109,proposal_field_txtvec_110,proposal_field_txtvec_111,proposal_field_txtvec_112,proposal_field_txtvec_113,proposal_field_txtvec_114,proposal_field_txtvec_115,proposal_field_txtvec_116,proposal_field_txtvec_117,proposal_field_txtvec_118,proposal_field_txtvec_119,proposal_field_txtvec_120,proposal_field_txtvec_121,proposal_field_txtvec_122,proposal_field_txtvec_123,proposal_field_txtvec_124,proposal_field_txtvec_125,proposal_field_txtvec_126,proposal_field_txtvec_127,proposal_field_txtvec_128,proposal_field_txtvec_129,proposal_field_txtvec_130,proposal_field_txtvec_131,proposal_field_txtvec_132,proposal_field_txtvec_133,proposal_field_txtvec_134,proposal_field_txtvec_135,proposal_field_txtvec_136,proposal_field_txtvec_137,proposal_field_txtvec_138,proposal_field_txtvec_139,proposal_field_txtvec_140,proposal_field_txtvec_141,proposal_field_txtvec_142,proposal_field_txtvec_143,proposal_field_txtvec_144,proposal_field_txtvec_145,proposal_field_txtvec_146,proposal_field_txtvec_147,proposal_field_txtvec_148,proposal_field_txtvec_149,proposal_field_txtvec_150,proposal_field_txtvec_151,proposal_field_txtvec_152,proposal_field_txtvec_153,proposal_field_txtvec_154,proposal_field_txtvec_155,proposal_field_txtvec_156,proposal_field_txtvec_157,proposal_field_txtvec_158,proposal_field_txtvec_159,proposal_field_txtvec_160,proposal_field_txtvec_161,proposal_field_txtvec_162,proposal_field_txtvec_163,proposal_field_txtvec_164,proposal_field_txtvec_165,proposal_field_txtvec_166,proposal_field_txtvec_167,proposal_field_txtvec_168,proposal_field_txtvec_169,proposal_field_txtvec_170,proposal_field_txtvec_171,proposal_field_txtvec_172,proposal_field_txtvec_173,proposal_field_txtvec_174,proposal_field_txtvec_175,proposal_field_txtvec_176,proposal_field_txtvec_177,proposal_field_txtvec_178,proposal_field_txtvec_179,proposal_field_txtvec_180,proposal_field_txtvec_181,proposal_field_txtvec_182,proposal_field_txtvec_183,proposal_field_txtvec_184,proposal_field_txtvec_185,proposal_field_txtvec_186,proposal_field_txtvec_187,proposal_field_txtvec_188,proposal_field_txtvec_189,proposal_field_txtvec_190,proposal_field_txtvec_191,proposal_field_txtvec_192,proposal_field_txtvec_193,proposal_field_txtvec_194,proposal_field_txtvec_195,proposal_field_txtvec_196,proposal_field_txtvec_197,proposal_field_txtvec_198,proposal_field_txtvec_199,proposal_field_txtvec_200,proposal_field_txtvec_201,proposal_field_txtvec_202,proposal_field_txtvec_203,proposal_field_txtvec_204,proposal_field_txtvec_205,proposal_field_txtvec_206,proposal_field_txtvec_207,proposal_field_txtvec_208,proposal_field_txtvec_209,proposal_field_txtvec_210,proposal_field_txtvec_211,proposal_field_txtvec_212,proposal_field_txtvec_213,proposal_field_txtvec_214,proposal_field_txtvec_215,proposal_field_txtvec_216,proposal_field_txtvec_217,proposal_field_txtvec_218,proposal_field_txtvec_219,proposal_field_txtvec_220,proposal_field_txtvec_221,proposal_field_txtvec_222,proposal_field_txtvec_223,proposal_field_txtvec_224,proposal_field_txtvec_225,proposal_field_txtvec_226,proposal_field_txtvec_227,proposal_field_txtvec_228,proposal_field_txtvec_229,proposal_field_txtvec_230,proposal_field_txtvec_231,proposal_field_txtvec_232,proposal_field_txtvec_233,proposal_field_txtvec_234,proposal_field_txtvec_235,proposal_field_txtvec_236,proposal_field_txtvec_237,proposal_field_txtvec_238,proposal_field_txtvec_239,proposal_field_txtvec_240,proposal_field_txtvec_241,proposal_field_txtvec_242,proposal_field_txtvec_243,proposal_field_txtvec_244,proposal_field_txtvec_245,proposal_field_txtvec_246,proposal_field_txtvec_247,proposal_field_txtvec_248,proposal_field_txtvec_249,proposal_field_txtvec_250,proposal_field_txtvec_251,proposal_field_txtvec_252,proposal_field_txtvec_253,proposal_field_txtvec_254,proposal_field_txtvec_255,proposal_field_txtvec_256,proposal_field_txtvec_257,proposal_field_txtvec_258,proposal_field_txtvec_259,proposal_field_txtvec_260,proposal_field_txtvec_261,proposal_field_txtvec_262,proposal_field_txtvec_263,proposal_field_txtvec_264,proposal_field_txtvec_265,proposal_field_txtvec_266,proposal_field_txtvec_267,proposal_field_txtvec_268,proposal_field_txtvec_269,proposal_field_txtvec_270,proposal_field_txtvec_271,proposal_field_txtvec_272,proposal_field_txtvec_273,proposal_field_txtvec_274,proposal_field_txtvec_275,proposal_field_txtvec_276,proposal_field_txtvec_277,proposal_field_txtvec_278,proposal_field_txtvec_279,proposal_field_txtvec_280,proposal_field_txtvec_281,proposal_field_txtvec_282,proposal_field_txtvec_283,proposal_field_txtvec_284,proposal_field_txtvec_285,proposal_field_txtvec_286,proposal_field_txtvec_287,proposal_field_txtvec_288,proposal_field_txtvec_289,proposal_field_txtvec_290,proposal_field_txtvec_291,proposal_field_txtvec_292,proposal_field_txtvec_293,proposal_field_txtvec_294,proposal_field_txtvec_295,proposal_field_txtvec_296,proposal_field_txtvec_297,proposal_field_txtvec_298,proposal_field_txtvec_299,proposal_field_txtvec_300
0,4863,15031,50.0,422,573.0,835.0,16,0,1,,0,0,0,41.0,32,Kenya,,0,13,0,,1,,,0.0,1,0,46,62,78,1429,My\r\n name is Julius Moturi aged 44 and i run...,My\r\n business revolves around sourcing and s...,The loan will be used in purchasing of drugs a...,0.005334,0.19704,-0.147241,-0.129046,0.099042,-0.010735,0.022164,-0.147691,0.045266,2.097052,-0.267564,0.08349,0.034284,-0.064016,-0.093086,-0.054804,-0.041256,1.13311,-0.176089,-0.007717,-0.037049,-0.052533,-0.098345,0.000215,0.059247,-0.005509,-0.065463,-0.03557,0.040065,-0.013519,-0.010093,0.06411,-0.026226,0.00332,0.053918,-0.06886,0.018232,-0.029697,-0.079219,-0.028608,0.016786,0.020905,0.044793,-0.108533,-0.020523,0.084645,-0.135869,-0.02641,0.057315,-0.01647,-0.079196,0.0014,-0.007715,-0.034016,0.078058,0.041547,-0.033194,-0.06248,-0.003992,-0.082701,-0.01609,-0.059694,-0.095857,0.165523,0.042999,-0.137183,0.011763,0.103157,0.012608,0.111246,0.013943,0.056732,0.173298,0.025337,0.116038,0.042948,0.10673,-0.01925,-0.08459,0.142018,0.040107,0.094704,-0.096763,-0.008263,0.010075,-0.148585,-0.023273,-0.06644,0.215335,0.055516,-0.09909,0.026575,-0.044171,0.024918,0.149086,0.006738,0.024719,-0.037127,-0.063755,-0.062754,-0.04457,0.040136,-0.082468,-0.077009,0.041507,-0.668004,0.107942,-0.021505,0.059401,-0.057606,0.037934,-0.140673,0.083927,-0.12401,-0.007633,-0.074748,0.018398,0.059864,0.030987,0.004441,0.050851,-0.014981,0.076751,-0.066101,0.044466,0.131618,-0.031273,-0.141945,0.031706,-0.052039,0.008087,-0.025309,-0.126043,0.071689,0.127243,0.019734,-0.045172,0.003638,0.027942,0.005864,-1.076815,0.008661,0.161463,0.016902,-0.015733,-0.037855,-0.08942,0.049407,0.058271,-0.104935,-0.079656,0.018669,0.123772,-0.019618,-0.044159,-0.01092,-0.119686,-0.031629,-0.051848,-0.128813,-0.005021,0.039796,0.006116,-0.054729,-0.072411,-0.148676,0.060309,-0.065373,0.167171,-0.015165,-0.046184,-0.029678,0.097808,-0.049404,-0.07767,0.021992,-0.121999,0.020912,0.033826,0.02801,0.035644,-0.063254,-0.129518,-0.098008,-0.062779,0.001454,-0.068396,-0.028609,0.03409,-0.002351,-0.043037,-0.05068,-0.099302,0.016488,0.046694,0.127623,0.026915,-0.113566,0.035538,0.101193,-0.0242,-0.104954,-0.085387,0.024815,0.138488,0.093079,0.054861,-0.001541,-0.038315,-0.030454,-0.158155,-0.02442,-0.017727,-0.102206,0.055471,0.079703,...,-0.022024,-0.001316,0.008705,-0.022529,0.129659,-0.044189,-0.034186,-0.139172,-0.055848,-0.089801,-0.052343,-0.129641,-0.086181,0.201801,0.093826,-0.129184,-0.00583,0.127544,0.032208,0.077698,0.059653,0.050267,0.192142,-0.002884,0.070535,0.045991,0.1185,-0.076081,-0.069773,0.212751,-0.012546,0.150264,-0.140694,0.029062,0.02348,-0.171741,-0.040265,-0.065481,0.27864,0.072909,-0.135152,0.038192,-0.007742,0.047436,0.056081,0.011197,0.004131,-0.150117,-0.024931,-0.022846,-0.071349,0.024418,-0.097479,-0.062646,0.109573,-0.847528,0.14969,0.040295,0.009372,-0.041261,0.041759,-0.178167,0.139369,-0.097876,0.040461,-0.026092,0.012655,0.037679,0.056507,0.006227,0.080894,-0.032358,0.000656,-0.002274,0.00669,0.067913,-0.049741,-0.106599,0.044316,0.00897,-0.01292,-0.007877,-0.150276,0.074918,0.074909,0.03858,-0.050342,-0.031836,0.008667,-0.012578,-1.077557,0.026014,0.122965,-0.075748,-0.013735,-0.083887,-0.059964,0.077079,0.054489,-0.065728,-0.037905,0.040605,0.025753,-0.017191,-0.031857,-0.081231,-0.105204,0.002481,-0.008021,-0.11127,-0.001633,0.045311,-0.074138,-0.075448,-0.055051,-0.112408,0.110356,0.009867,0.190417,-0.061,0.009775,0.002245,0.091633,-0.092346,-0.006184,0.062603,-0.065325,0.020182,0.022205,0.048158,0.058879,-0.060296,-0.113716,-0.127915,-0.103132,0.000995,-0.096621,-0.070427,0.048946,0.020875,0.011862,0.011928,-0.062683,0.014262,0.064039,0.097164,-0.010283,-0.086218,0.036937,0.176831,0.02611,-0.132224,-0.104781,0.027497,0.133798,0.097097,0.075096,0.007108,0.034191,0.080983,-0.047864,-0.079154,-0.040492,-0.118335,0.072906,0.146449,-0.065933,0.01019,-0.197598,0.028748,0.014248,-0.042142,-0.086577,0.047323,-0.030462,-0.034574,-0.047483,0.169674,-0.026905,-0.034778,-0.073342,-0.000182,0.096918,0.070087,-0.074222,-0.103697,-0.006189,-0.048093,-0.076708,0.066148,0.094176,0.011528,0.045823,0.15234,0.202885,-0.255834,-0.006191,-0.137879,-0.104558,0.108508,0.074289,-0.098639,-0.050595,0.014111,0.108685,0.230513,0.14594,-0.090462,-0.082804,0.019442,0.088175,0.174671,0.040153,0.085895,0.110488,-0.174428,-0.002492,0.074181,0.407439,-0.032295,0.173604,-0.070134,-0.091411,-0.103979,-0.064275,0.013071,0.014751,0.075767,0.083165,0.181637,0.138995,0.017648,-0.007074,0.00239,-0.01432,-0.10838,0.132448,-0.094853,0.108559,-0.041275,-0.245914,0.071075,0.023381,-0.055131,0.009472,-0.066535,-0.052272,-0.069259,-0.012483,0.124293
1,5687,15031,150.0,748,573.0,835.0,16,0,1,,1,0,1,41.0,32,Kenya,,0,10,8,,1,,,0.0,1,1,46,62,78,1429,My\r\n name is Julius Moturi aged 44 and i run...,My\r\n business revolves around sourcing and s...,The loan will be used to purchase more drugs f...,0.005334,0.19704,-0.147241,-0.129046,0.099042,-0.010735,0.022164,-0.147691,0.045266,2.097052,-0.267564,0.08349,0.034284,-0.064016,-0.093086,-0.054804,-0.041256,1.13311,-0.176089,-0.007717,-0.037049,-0.052533,-0.098345,0.000215,0.059247,-0.005509,-0.065463,-0.03557,0.040065,-0.013519,-0.010093,0.06411,-0.026226,0.00332,0.053918,-0.06886,0.018232,-0.029697,-0.079219,-0.028608,0.016786,0.020905,0.044793,-0.108533,-0.020523,0.084645,-0.135869,-0.02641,0.057315,-0.01647,-0.079196,0.0014,-0.007715,-0.034016,0.078058,0.041547,-0.033194,-0.06248,-0.003992,-0.082701,-0.01609,-0.059694,-0.095857,0.165523,0.042999,-0.137183,0.011763,0.103157,0.012608,0.111246,0.013943,0.056732,0.173298,0.025337,0.116038,0.042948,0.10673,-0.01925,-0.08459,0.142018,0.040107,0.094704,-0.096763,-0.008263,0.010075,-0.148585,-0.023273,-0.06644,0.215335,0.055516,-0.09909,0.026575,-0.044171,0.024918,0.149086,0.006738,0.024719,-0.037127,-0.063755,-0.062754,-0.04457,0.040136,-0.082468,-0.077009,0.041507,-0.668004,0.107942,-0.021505,0.059401,-0.057606,0.037934,-0.140673,0.083927,-0.12401,-0.007633,-0.074748,0.018398,0.059864,0.030987,0.004441,0.050851,-0.014981,0.076751,-0.066101,0.044466,0.131618,-0.031273,-0.141945,0.031706,-0.052039,0.008087,-0.025309,-0.126043,0.071689,0.127243,0.019734,-0.045172,0.003638,0.027942,0.005864,-1.076815,0.008661,0.161463,0.016902,-0.015733,-0.037855,-0.08942,0.049407,0.058271,-0.104935,-0.079656,0.018669,0.123772,-0.019618,-0.044159,-0.01092,-0.119686,-0.031629,-0.051848,-0.128813,-0.005021,0.039796,0.006116,-0.054729,-0.072411,-0.148676,0.060309,-0.065373,0.167171,-0.015165,-0.046184,-0.029678,0.097808,-0.049404,-0.07767,0.021992,-0.121999,0.020912,0.033826,0.02801,0.035644,-0.063254,-0.129518,-0.098008,-0.062779,0.001454,-0.068396,-0.028609,0.03409,-0.002351,-0.043037,-0.05068,-0.099302,0.016488,0.046694,0.127623,0.026915,-0.113566,0.035538,0.101193,-0.0242,-0.104954,-0.085387,0.024815,0.138488,0.093079,0.054861,-0.001541,-0.038315,-0.030454,-0.158155,-0.02442,-0.017727,-0.102206,0.055471,0.079703,...,-0.019095,0.01525,0.00917,-0.028067,0.135085,-0.027728,0.013829,-0.100817,-0.011243,-0.04754,0.033016,-0.096565,-0.075504,0.172572,0.127519,-0.083566,-0.010099,0.089549,0.069007,0.033426,0.029014,0.049568,0.153157,-0.039078,0.048918,0.023526,0.082414,-0.05457,0.039455,0.168569,0.031767,0.132833,-0.121885,-0.014393,-0.019247,-0.172302,-0.132338,0.048891,0.217189,0.060537,-0.146157,0.036169,0.002553,0.037539,0.032072,0.004075,0.040086,-0.076691,-0.07638,-0.087458,-0.056089,0.046456,-0.086354,-0.058618,0.108828,-0.993582,0.047754,-0.019102,0.02043,0.011151,0.03526,-0.149874,0.094765,-0.129747,0.048738,-0.064938,0.071385,0.004592,0.028845,0.020498,0.065116,0.006436,-0.0042,-0.044875,0.035897,0.027421,0.00248,-0.076054,0.085214,0.001337,0.085475,-0.064179,-0.086655,0.027906,0.069509,0.05725,-0.00284,-0.100929,0.002093,0.067589,-0.775282,0.043491,0.182926,-0.061236,0.018388,-0.03899,-0.014483,0.01968,0.052552,-0.03906,-0.045021,0.066654,0.079304,-0.033807,-0.077919,-0.033278,-0.034943,-0.018957,-0.077906,-0.109916,0.022904,0.04977,-0.031838,-0.051977,-0.035436,-0.095585,0.153108,-0.035322,0.146603,-0.03208,0.057034,-0.022705,0.069918,-0.01685,-0.000958,0.082599,-0.042301,-0.006397,0.090079,0.024912,0.060438,-0.000576,-0.154753,-0.061783,-0.045193,0.011366,-0.062761,-0.067246,0.047668,0.023611,-0.043945,0.003889,-0.049562,0.001397,-0.030413,0.086224,-0.059898,-0.067752,-0.016239,0.182457,0.039812,-0.12652,-0.074317,0.043387,0.12944,0.033505,0.079823,0.0077,0.031171,0.024271,-0.029889,-0.109458,-0.026195,-0.11853,0.027125,0.005849,-0.039537,0.082941,-0.144742,-0.009685,-0.001085,-0.03644,-0.089924,-0.023567,-0.01412,0.032882,-0.030575,0.132016,-0.045414,-0.007764,-0.100753,0.003241,0.153813,0.057932,-0.027858,-0.048011,0.037504,-0.137985,-0.055761,0.025189,0.057968,0.010911,0.003485,0.105477,0.21131,-0.167269,-0.093932,-0.118246,-0.143667,0.02719,0.078906,-0.064373,-0.082936,0.003923,0.090131,0.281142,0.103831,-0.06787,-0.07832,0.040056,0.057724,0.136847,-0.01725,0.095606,0.040779,-0.110778,-0.021141,0.005366,0.359668,-0.042812,0.041625,-0.057533,-0.086099,-0.132493,-0.085257,-0.006383,-0.023411,0.169605,0.005877,0.1913,0.076348,0.021219,-0.017894,0.028536,-0.054006,-0.074369,0.13861,-0.147411,0.072227,-0.070429,-0.255438,0.023818,0.013614,-0.046869,-0.043032,-0.01837,-0.017136,-0.108482,-0.055714,0.125604
2,4895,15057,250.0,504,1374.0,627.0,15,0,1,,0,0,0,,32,Kenya,,1,41,0,,1,,,0.0,0,0,46,62,78,1429,I grew up in Nyamira Count in the Republic of ...,I will spend the loan money on purchasing drug...,"First and foremost, i would want to take this ...",0.038417,0.164513,-0.111832,-0.081935,0.128803,-0.078371,0.006664,-0.114832,0.031676,2.354455,-0.253142,0.057994,0.106669,-0.06202,-0.102681,-0.030842,-0.057929,1.221631,-0.218381,-0.005046,-0.017087,-0.03551,-0.117706,-0.012467,0.02852,0.053052,-0.066305,-0.041961,0.052694,-0.033234,-0.014114,0.053101,-0.011042,0.051555,0.048095,-0.093969,-0.011782,0.042241,-0.018971,-0.047216,-0.012297,0.077624,0.022149,-0.091148,-0.000921,0.119184,-0.102335,-0.007249,0.024746,-0.007759,-0.042129,-0.005203,0.016776,-0.014619,0.060122,0.028898,-0.019165,-0.076582,0.016074,-0.07626,-0.028666,-0.066493,-0.056011,0.139648,0.071138,-0.058063,0.052449,0.058963,0.022811,0.112718,0.004613,0.013714,0.170754,-0.027473,0.170694,0.07644,0.115822,-0.028939,-0.076933,0.18078,0.036058,0.117405,-0.130482,0.0018,0.003661,-0.153519,-0.111258,-0.000617,0.221764,0.064547,-0.103099,0.012006,-0.037631,-0.005444,0.0634,0.010092,0.05429,-0.056257,-0.073588,-0.014857,-0.0199,0.022013,-0.070672,-0.086591,0.051307,-0.767281,0.122446,-0.004454,0.018477,-0.017697,0.032386,-0.173693,0.130983,-0.075453,0.008979,-0.062223,0.07953,0.025901,0.035396,0.003548,0.132765,0.020993,0.052548,-0.056246,0.05581,0.050288,-0.001898,-0.101173,0.018042,0.001081,0.0015,-0.041642,-0.136178,0.07621,0.104434,0.017809,-0.000749,-0.000122,-0.010682,0.057856,-1.016452,0.057622,0.12217,-0.031639,-0.002283,-0.021854,-0.092124,0.04732,0.01425,-0.084896,-0.063259,0.053076,0.088921,-0.012714,-0.057787,-0.014756,-0.059405,-0.037225,-0.020943,-0.124591,0.008437,0.036292,-0.028754,-0.113359,-0.066873,-0.118796,0.078769,-0.057303,0.151031,-0.031428,-0.021232,-0.052563,0.07962,-0.070862,-0.074706,0.051276,-0.118108,0.027809,0.046322,0.045813,0.007074,-0.047391,-0.107277,-0.073769,-0.001932,-0.027334,-0.06848,-0.041887,0.063737,0.036883,-0.026534,-0.011857,-0.068036,0.014665,-0.002954,0.155614,0.004906,-0.079995,-0.001266,0.117209,0.017416,-0.126173,-0.091509,0.011609,0.19768,0.039106,0.055737,0.003928,-0.00429,-0.008478,-0.100369,-0.101852,-0.033223,-0.117237,0.055541,0.062612,...,-0.084264,0.007923,-0.026053,-0.088599,0.089559,-0.007502,-0.012213,-0.073939,-0.02679,-0.069676,-0.038613,-0.069222,-0.115138,0.196684,0.085332,-0.083998,0.022691,0.117182,0.116077,0.04706,0.105805,0.100738,0.236422,-0.046038,0.111896,0.036752,0.091095,-0.074996,-0.049345,0.166083,-0.02238,0.083555,-0.183497,0.011674,0.009683,-0.175927,-0.064966,-0.101767,0.297643,0.090603,-0.107307,-0.003266,-0.010309,0.146248,0.121465,-0.006985,0.029847,-0.098783,-0.046463,-0.028131,-0.027805,-0.013298,-0.049572,-0.070311,0.121246,-0.928927,0.10999,-0.017258,-0.006624,0.009372,0.039119,-0.162992,0.128491,-0.121845,0.021277,-0.092859,0.013435,0.000346,0.007537,0.012276,0.120032,-0.091078,0.010533,0.061048,0.096909,0.110513,-0.058741,-0.089451,0.03505,0.024912,0.009649,-0.004219,-0.15946,0.106656,0.086233,0.082299,-0.00323,-0.061903,0.009087,0.03603,-1.087809,0.065762,0.092865,-0.055619,0.00844,-0.12454,-0.078246,0.048492,-0.025508,-0.039102,-0.006363,0.039648,0.049517,-0.011435,-0.066486,-0.044979,-0.089192,-0.035131,-0.051342,-0.106646,0.000441,0.054787,-0.069838,-0.028131,-0.066909,-0.169915,0.088803,-0.034351,0.162643,-0.007506,-0.017037,-0.004065,0.153698,-0.135588,-0.031087,0.037768,-0.037589,0.051633,0.041449,0.049604,0.004287,0.001509,-0.103156,-0.053885,-0.02342,-0.019545,-0.078411,-0.075095,-0.014008,0.06767,0.020109,0.010322,-0.052664,-0.021779,0.056689,0.069094,-0.040105,-0.102341,0.012556,0.230285,-0.030626,-0.143035,-0.048921,-0.024056,0.172995,0.088118,0.017212,0.019319,0.033649,0.050245,0.000986,-0.030394,-0.041711,-0.128977,-0.016142,0.171795,-0.027095,0.024978,-0.183755,0.023331,0.021277,-0.055261,-0.085385,0.097107,-0.011071,0.010749,-0.071103,0.139109,-0.002918,-0.035572,-0.137951,-0.003563,0.024734,0.169537,-0.005618,-0.085393,-0.029762,-0.096814,-0.067237,0.129004,0.108298,-0.000169,-0.016136,0.141912,0.19465,-0.185571,-0.092099,-0.093298,-0.129224,0.174771,0.040181,-0.072567,-0.109484,0.015929,0.072133,0.175306,0.146926,-0.065315,-0.05204,0.035814,0.093498,0.147029,0.011992,0.086776,0.074659,-0.106588,-0.028891,0.093391,0.442776,0.050627,0.169051,-0.069812,-0.077934,-0.12544,-0.064273,0.014719,0.056409,0.141672,0.026448,0.189304,0.167604,0.060695,0.006295,0.001526,-0.017919,-0.104837,0.133654,-0.059386,0.147204,-0.006777,-0.207704,0.085733,0.003126,-0.039443,0.083205,-0.095055,-0.031572,-0.072249,0.058814,0.119693
3,10181,15057,244.0,627,1374.0,627.0,15,0,1,,0,0,0,,32,Kenya,9.0,1,35,0,,1,,,0.0,0,1,46,62,78,1429,I grew up in Nyamira Count in the Republic of ...,I will spend the loan money on purchasing drug...,I will spend the loan money on purchasing drug...,0.038417,0.164513,-0.111832,-0.081935,0.128803,-0.078371,0.006664,-0.114832,0.031676,2.354455,-0.253142,0.057994,0.106669,-0.06202,-0.102681,-0.030842,-0.057929,1.221631,-0.218381,-0.005046,-0.017087,-0.03551,-0.117706,-0.012467,0.02852,0.053052,-0.066305,-0.041961,0.052694,-0.033234,-0.014114,0.053101,-0.011042,0.051555,0.048095,-0.093969,-0.011782,0.042241,-0.018971,-0.047216,-0.012297,0.077624,0.022149,-0.091148,-0.000921,0.119184,-0.102335,-0.007249,0.024746,-0.007759,-0.042129,-0.005203,0.016776,-0.014619,0.060122,0.028898,-0.019165,-0.076582,0.016074,-0.07626,-0.028666,-0.066493,-0.056011,0.139648,0.071138,-0.058063,0.052449,0.058963,0.022811,0.112718,0.004613,0.013714,0.170754,-0.027473,0.170694,0.07644,0.115822,-0.028939,-0.076933,0.18078,0.036058,0.117405,-0.130482,0.0018,0.003661,-0.153519,-0.111258,-0.000617,0.221764,0.064547,-0.103099,0.012006,-0.037631,-0.005444,0.0634,0.010092,0.05429,-0.056257,-0.073588,-0.014857,-0.0199,0.022013,-0.070672,-0.086591,0.051307,-0.767281,0.122446,-0.004454,0.018477,-0.017697,0.032386,-0.173693,0.130983,-0.075453,0.008979,-0.062223,0.07953,0.025901,0.035396,0.003548,0.132765,0.020993,0.052548,-0.056246,0.05581,0.050288,-0.001898,-0.101173,0.018042,0.001081,0.0015,-0.041642,-0.136178,0.07621,0.104434,0.017809,-0.000749,-0.000122,-0.010682,0.057856,-1.016452,0.057622,0.12217,-0.031639,-0.002283,-0.021854,-0.092124,0.04732,0.01425,-0.084896,-0.063259,0.053076,0.088921,-0.012714,-0.057787,-0.014756,-0.059405,-0.037225,-0.020943,-0.124591,0.008437,0.036292,-0.028754,-0.113359,-0.066873,-0.118796,0.078769,-0.057303,0.151031,-0.031428,-0.021232,-0.052563,0.07962,-0.070862,-0.074706,0.051276,-0.118108,0.027809,0.046322,0.045813,0.007074,-0.047391,-0.107277,-0.073769,-0.001932,-0.027334,-0.06848,-0.041887,0.063737,0.036883,-0.026534,-0.011857,-0.068036,0.014665,-0.002954,0.155614,0.004906,-0.079995,-0.001266,0.117209,0.017416,-0.126173,-0.091509,0.011609,0.19768,0.039106,0.055737,0.003928,-0.00429,-0.008478,-0.100369,-0.101852,-0.033223,-0.117237,0.055541,0.062612,...,-0.047325,0.005186,0.006221,-0.039794,0.056833,0.009812,-0.024757,-0.138363,-0.050875,-0.137443,-0.014637,-0.07604,-0.022134,0.202519,0.145874,-0.104954,-0.045015,0.083879,0.059911,0.015732,0.031884,0.047042,0.21286,-0.038879,0.107402,0.076282,0.136524,-0.048363,0.030751,0.171611,-0.012545,0.066124,-0.121426,0.02937,0.00728,-0.159349,0.053419,-0.074619,0.36116,0.063032,-0.047909,-0.070505,0.015151,-0.009313,0.039609,-0.05318,0.048445,-0.054045,-0.074258,-0.042303,-0.031114,0.039182,-0.085443,-0.017696,0.048315,-0.669447,0.150421,-0.005372,0.00356,0.031739,0.030371,-0.206197,0.146891,-0.076542,0.02785,-0.121265,0.027868,-0.102027,0.053224,0.00857,0.070011,0.008036,0.031634,0.048224,-0.007451,0.084008,-0.013305,-0.06875,0.013641,-0.050421,0.114058,-0.088033,-0.132382,0.078345,0.137894,0.011811,-0.073056,-0.058963,-0.011405,-0.017725,-0.857099,0.122318,0.110496,-0.014863,0.009683,-0.031834,0.007712,0.094548,-0.006873,-0.042445,0.035769,0.03961,0.110877,0.054038,-0.025917,-0.057788,-0.076757,0.00874,-0.078336,-0.09063,0.031117,0.039168,-0.051037,-0.090037,-0.029182,-0.033445,0.13127,-0.051365,0.134262,-0.00414,0.053227,-0.02475,0.040731,-0.023367,-0.044129,0.046712,-0.037563,0.024654,0.053725,0.005986,0.073053,-0.013199,-0.121896,0.002271,-0.048146,-0.003956,-0.070042,-0.063442,-0.007585,0.02855,0.048381,0.03552,-0.05177,-0.035579,-0.006042,0.11562,0.001423,-0.033045,-0.017218,0.096183,0.064408,-0.145542,-0.072704,0.012916,0.097437,0.048224,-0.005682,-0.016486,0.015149,0.03121,-0.028565,-0.060433,-0.057731,-0.10363,0.01062,0.040131,-0.04508,-0.067087,-0.149827,0.104823,-0.057915,-0.010599,-0.140428,-0.045723,-0.042329,0.002648,0.014009,0.099857,-0.009144,-0.044339,-0.076249,0.026002,0.120089,0.078027,-0.02761,-0.085675,0.002765,-0.124841,-0.076962,0.042863,-0.004066,-0.018882,0.007637,0.076479,0.164104,-0.168996,-0.02334,-0.055464,-0.130926,0.106574,0.045066,-0.074815,-0.076123,-0.037801,0.076409,0.231813,0.009032,-0.115723,-0.048555,0.049379,0.087939,0.084619,0.010298,0.102466,0.063274,-0.124404,-0.023333,0.04758,0.316704,-0.02874,0.105763,-0.090604,-0.059083,-0.057665,-0.066831,-0.053071,-0.07269,0.078708,0.009933,0.166282,0.072077,0.000364,0.001178,-0.029351,-0.038755,-0.050794,0.108757,-0.09522,0.100103,0.020906,-0.199675,0.03284,0.025781,-0.012515,-0.009813,0.0086,-0.038415,-0.127675,0.012516,0.054503
4,20720,15057,220.0,627,1374.0,627.0,15,0,1,,0,0,0,,32,Kenya,9.0,1,32,0,0.7,1,,31.08,0.0,0,2,46,62,78,1429,I grew up in Nyamira Count in the Republic of ...,I will spend the loan money on purchasing drug...,I will spend the loan money on purchasing drug...,0.038417,0.164513,-0.111832,-0.081935,0.128803,-0.078371,0.006664,-0.114832,0.031676,2.354455,-0.253142,0.057994,0.106669,-0.06202,-0.102681,-0.030842,-0.057929,1.221631,-0.218381,-0.005046,-0.017087,-0.03551,-0.117706,-0.012467,0.02852,0.053052,-0.066305,-0.041961,0.052694,-0.033234,-0.014114,0.053101,-0.011042,0.051555,0.048095,-0.093969,-0.011782,0.042241,-0.018971,-0.047216,-0.012297,0.077624,0.022149,-0.091148,-0.000921,0.119184,-0.102335,-0.007249,0.024746,-0.007759,-0.042129,-0.005203,0.016776,-0.014619,0.060122,0.028898,-0.019165,-0.076582,0.016074,-0.07626,-0.028666,-0.066493,-0.056011,0.139648,0.071138,-0.058063,0.052449,0.058963,0.022811,0.112718,0.004613,0.013714,0.170754,-0.027473,0.170694,0.07644,0.115822,-0.028939,-0.076933,0.18078,0.036058,0.117405,-0.130482,0.0018,0.003661,-0.153519,-0.111258,-0.000617,0.221764,0.064547,-0.103099,0.012006,-0.037631,-0.005444,0.0634,0.010092,0.05429,-0.056257,-0.073588,-0.014857,-0.0199,0.022013,-0.070672,-0.086591,0.051307,-0.767281,0.122446,-0.004454,0.018477,-0.017697,0.032386,-0.173693,0.130983,-0.075453,0.008979,-0.062223,0.07953,0.025901,0.035396,0.003548,0.132765,0.020993,0.052548,-0.056246,0.05581,0.050288,-0.001898,-0.101173,0.018042,0.001081,0.0015,-0.041642,-0.136178,0.07621,0.104434,0.017809,-0.000749,-0.000122,-0.010682,0.057856,-1.016452,0.057622,0.12217,-0.031639,-0.002283,-0.021854,-0.092124,0.04732,0.01425,-0.084896,-0.063259,0.053076,0.088921,-0.012714,-0.057787,-0.014756,-0.059405,-0.037225,-0.020943,-0.124591,0.008437,0.036292,-0.028754,-0.113359,-0.066873,-0.118796,0.078769,-0.057303,0.151031,-0.031428,-0.021232,-0.052563,0.07962,-0.070862,-0.074706,0.051276,-0.118108,0.027809,0.046322,0.045813,0.007074,-0.047391,-0.107277,-0.073769,-0.001932,-0.027334,-0.06848,-0.041887,0.063737,0.036883,-0.026534,-0.011857,-0.068036,0.014665,-0.002954,0.155614,0.004906,-0.079995,-0.001266,0.117209,0.017416,-0.126173,-0.091509,0.011609,0.19768,0.039106,0.055737,0.003928,-0.00429,-0.008478,-0.100369,-0.101852,-0.033223,-0.117237,0.055541,0.062612,...,-0.047325,0.005186,0.006221,-0.039794,0.056833,0.009812,-0.024757,-0.138363,-0.050875,-0.137443,-0.014637,-0.07604,-0.022134,0.202519,0.145874,-0.104954,-0.045015,0.083879,0.059911,0.015732,0.031884,0.047042,0.21286,-0.038879,0.107402,0.076282,0.136524,-0.048363,0.030751,0.171611,-0.012545,0.066124,-0.121426,0.02937,0.00728,-0.159349,0.053419,-0.074619,0.36116,0.063032,-0.047909,-0.070505,0.015151,-0.009313,0.039609,-0.05318,0.048445,-0.054045,-0.074258,-0.042303,-0.031114,0.039182,-0.085443,-0.017696,0.048315,-0.669447,0.150421,-0.005372,0.00356,0.031739,0.030371,-0.206197,0.146891,-0.076542,0.02785,-0.121265,0.027868,-0.102027,0.053224,0.00857,0.070011,0.008036,0.031634,0.048224,-0.007451,0.084008,-0.013305,-0.06875,0.013641,-0.050421,0.114058,-0.088033,-0.132382,0.078345,0.137894,0.011811,-0.073056,-0.058963,-0.011405,-0.017725,-0.857099,0.122318,0.110496,-0.014863,0.009683,-0.031834,0.007712,0.094548,-0.006873,-0.042445,0.035769,0.03961,0.110877,0.054038,-0.025917,-0.057788,-0.076757,0.00874,-0.078336,-0.09063,0.031117,0.039168,-0.051037,-0.090037,-0.029182,-0.033445,0.13127,-0.051365,0.134262,-0.00414,0.053227,-0.02475,0.040731,-0.023367,-0.044129,0.046712,-0.037563,0.024654,0.053725,0.005986,0.073053,-0.013199,-0.121896,0.002271,-0.048146,-0.003956,-0.070042,-0.063442,-0.007585,0.02855,0.048381,0.03552,-0.05177,-0.035579,-0.006042,0.11562,0.001423,-0.033045,-0.017218,0.096183,0.064408,-0.145542,-0.072704,0.012916,0.097437,0.048224,-0.005682,-0.016486,0.015149,0.03121,-0.028565,-0.060433,-0.057731,-0.10363,0.01062,0.040131,-0.04508,-0.067087,-0.149827,0.104823,-0.057915,-0.010599,-0.140428,-0.045723,-0.042329,0.002648,0.014009,0.099857,-0.009144,-0.044339,-0.076249,0.026002,0.120089,0.078027,-0.02761,-0.085675,0.002765,-0.124841,-0.076962,0.042863,-0.004066,-0.018882,0.007637,0.076479,0.164104,-0.168996,-0.02334,-0.055464,-0.130926,0.106574,0.045066,-0.074815,-0.076123,-0.037801,0.076409,0.231813,0.009032,-0.115723,-0.048555,0.049379,0.087939,0.084619,0.010298,0.102466,0.063274,-0.124404,-0.023333,0.04758,0.316704,-0.02874,0.105763,-0.090604,-0.059083,-0.057665,-0.066831,-0.053071,-0.07269,0.078708,0.009933,0.166282,0.072077,0.000364,0.001178,-0.029351,-0.038755,-0.050794,0.108757,-0.09522,0.100103,0.020906,-0.199675,0.03284,0.025781,-0.012515,-0.009813,0.0086,-0.038415,-0.127675,0.012516,0.054503


In [34]:
df_summed = attach_vectorized_column_summed(dat, ['about_me_field', 'about_business_field', 'proposal_field'])
df_summed.head()

parsing about_me_field
parsing about_business_field
parsing proposal_field


Unnamed: 0,id,borrower_id,usd_amount,len_proposal,len_about_me,len_about_business,len_address,missing_natl_id,missing_referred_by,application_time,default_flag,fraud_flag,nonfraud_default,friends_count,country_id,name,category_id,invited_flag,made_pmts,missed_pmts,sift_science_score,english_flag,business_years,usd_installment_amount,reserve_fee_pct,sift_labeled_bad,prior_loans,country_internet_users,country_life_expectancy,country_literacy,country_gdppc,about_me_field,about_business_field,proposal_field,sum_txtvec_1,sum_txtvec_2,sum_txtvec_3,sum_txtvec_4,sum_txtvec_5,sum_txtvec_6,sum_txtvec_7,sum_txtvec_8,sum_txtvec_9,sum_txtvec_10,sum_txtvec_11,sum_txtvec_12,sum_txtvec_13,sum_txtvec_14,sum_txtvec_15,sum_txtvec_16,sum_txtvec_17,sum_txtvec_18,sum_txtvec_19,sum_txtvec_20,sum_txtvec_21,sum_txtvec_22,sum_txtvec_23,sum_txtvec_24,sum_txtvec_25,sum_txtvec_26,sum_txtvec_27,sum_txtvec_28,sum_txtvec_29,sum_txtvec_30,sum_txtvec_31,sum_txtvec_32,sum_txtvec_33,sum_txtvec_34,sum_txtvec_35,sum_txtvec_36,sum_txtvec_37,sum_txtvec_38,sum_txtvec_39,sum_txtvec_40,sum_txtvec_41,sum_txtvec_42,sum_txtvec_43,sum_txtvec_44,sum_txtvec_45,sum_txtvec_46,sum_txtvec_47,sum_txtvec_48,sum_txtvec_49,sum_txtvec_50,sum_txtvec_51,sum_txtvec_52,sum_txtvec_53,sum_txtvec_54,sum_txtvec_55,sum_txtvec_56,sum_txtvec_57,sum_txtvec_58,sum_txtvec_59,sum_txtvec_60,sum_txtvec_61,sum_txtvec_62,sum_txtvec_63,sum_txtvec_64,sum_txtvec_65,sum_txtvec_66,sum_txtvec_67,sum_txtvec_68,sum_txtvec_69,sum_txtvec_70,sum_txtvec_71,sum_txtvec_72,sum_txtvec_73,sum_txtvec_74,sum_txtvec_75,sum_txtvec_76,sum_txtvec_77,sum_txtvec_78,sum_txtvec_79,sum_txtvec_80,sum_txtvec_81,sum_txtvec_82,sum_txtvec_83,sum_txtvec_84,sum_txtvec_85,sum_txtvec_86,sum_txtvec_87,sum_txtvec_88,sum_txtvec_89,sum_txtvec_90,sum_txtvec_91,sum_txtvec_92,sum_txtvec_93,sum_txtvec_94,sum_txtvec_95,sum_txtvec_96,sum_txtvec_97,sum_txtvec_98,sum_txtvec_99,sum_txtvec_100,sum_txtvec_101,sum_txtvec_102,sum_txtvec_103,sum_txtvec_104,sum_txtvec_105,sum_txtvec_106,sum_txtvec_107,sum_txtvec_108,sum_txtvec_109,sum_txtvec_110,sum_txtvec_111,sum_txtvec_112,sum_txtvec_113,sum_txtvec_114,sum_txtvec_115,sum_txtvec_116,sum_txtvec_117,sum_txtvec_118,sum_txtvec_119,sum_txtvec_120,sum_txtvec_121,sum_txtvec_122,sum_txtvec_123,sum_txtvec_124,sum_txtvec_125,sum_txtvec_126,sum_txtvec_127,sum_txtvec_128,sum_txtvec_129,sum_txtvec_130,sum_txtvec_131,sum_txtvec_132,sum_txtvec_133,sum_txtvec_134,sum_txtvec_135,sum_txtvec_136,sum_txtvec_137,sum_txtvec_138,sum_txtvec_139,sum_txtvec_140,sum_txtvec_141,sum_txtvec_142,sum_txtvec_143,sum_txtvec_144,sum_txtvec_145,sum_txtvec_146,sum_txtvec_147,sum_txtvec_148,sum_txtvec_149,sum_txtvec_150,sum_txtvec_151,sum_txtvec_152,sum_txtvec_153,sum_txtvec_154,sum_txtvec_155,sum_txtvec_156,sum_txtvec_157,sum_txtvec_158,sum_txtvec_159,sum_txtvec_160,sum_txtvec_161,sum_txtvec_162,sum_txtvec_163,sum_txtvec_164,sum_txtvec_165,sum_txtvec_166,sum_txtvec_167,sum_txtvec_168,sum_txtvec_169,sum_txtvec_170,sum_txtvec_171,sum_txtvec_172,sum_txtvec_173,sum_txtvec_174,sum_txtvec_175,sum_txtvec_176,sum_txtvec_177,sum_txtvec_178,sum_txtvec_179,sum_txtvec_180,sum_txtvec_181,sum_txtvec_182,sum_txtvec_183,sum_txtvec_184,sum_txtvec_185,sum_txtvec_186,sum_txtvec_187,sum_txtvec_188,sum_txtvec_189,sum_txtvec_190,sum_txtvec_191,sum_txtvec_192,sum_txtvec_193,sum_txtvec_194,sum_txtvec_195,sum_txtvec_196,sum_txtvec_197,sum_txtvec_198,sum_txtvec_199,sum_txtvec_200,sum_txtvec_201,sum_txtvec_202,sum_txtvec_203,sum_txtvec_204,sum_txtvec_205,sum_txtvec_206,sum_txtvec_207,sum_txtvec_208,sum_txtvec_209,sum_txtvec_210,sum_txtvec_211,sum_txtvec_212,sum_txtvec_213,sum_txtvec_214,sum_txtvec_215,sum_txtvec_216,sum_txtvec_217,sum_txtvec_218,sum_txtvec_219,sum_txtvec_220,sum_txtvec_221,sum_txtvec_222,sum_txtvec_223,sum_txtvec_224,sum_txtvec_225,sum_txtvec_226,sum_txtvec_227,sum_txtvec_228,sum_txtvec_229,sum_txtvec_230,sum_txtvec_231,sum_txtvec_232,sum_txtvec_233,sum_txtvec_234,sum_txtvec_235,sum_txtvec_236,sum_txtvec_237,sum_txtvec_238,sum_txtvec_239,sum_txtvec_240,sum_txtvec_241,sum_txtvec_242,sum_txtvec_243,sum_txtvec_244,sum_txtvec_245,sum_txtvec_246,sum_txtvec_247,sum_txtvec_248,sum_txtvec_249,sum_txtvec_250,sum_txtvec_251,sum_txtvec_252,sum_txtvec_253,sum_txtvec_254,sum_txtvec_255,sum_txtvec_256,sum_txtvec_257,sum_txtvec_258,sum_txtvec_259,sum_txtvec_260,sum_txtvec_261,sum_txtvec_262,sum_txtvec_263,sum_txtvec_264,sum_txtvec_265,sum_txtvec_266,sum_txtvec_267,sum_txtvec_268,sum_txtvec_269,sum_txtvec_270,sum_txtvec_271,sum_txtvec_272,sum_txtvec_273,sum_txtvec_274,sum_txtvec_275,sum_txtvec_276,sum_txtvec_277,sum_txtvec_278,sum_txtvec_279,sum_txtvec_280,sum_txtvec_281,sum_txtvec_282,sum_txtvec_283,sum_txtvec_284,sum_txtvec_285,sum_txtvec_286,sum_txtvec_287,sum_txtvec_288,sum_txtvec_289,sum_txtvec_290,sum_txtvec_291,sum_txtvec_292,sum_txtvec_293,sum_txtvec_294,sum_txtvec_295,sum_txtvec_296,sum_txtvec_297,sum_txtvec_298,sum_txtvec_299,sum_txtvec_300
0,4863,15031,50.0,422,573.0,835.0,16,0,1,,0,0,0,41.0,32,Kenya,,0,13,0,,1,,,0.0,1,0,46,62,78,1429,My\r\n name is Julius Moturi aged 44 and i run...,My\r\n business revolves around sourcing and s...,The loan will be used in purchasing of drugs a...,-0.221,0.334,-0.153,-0.255,-0.204,-0.206,-0.172,-0.09,0.163,5.61,-0.826,0.339,0.038,0.066,0.029,-0.118,-0.116,3.091,-0.176,0.162,-0.112,0.141,-0.365,-0.019,-0.029,0.01,0.116,-0.255,0.325,-0.067,-0.161,-0.219,0.101,0.006999999,0.107,-0.086,0.022,0.046,0.022,0.021,0.033,0.075,0.407,-0.384,-0.18,-0.014,0.003,0.061,0.138,0.2,-0.012,-0.098,0.122,-0.031,0.358,-0.144,-0.058,-0.322,-0.193,-0.147,0.042,-0.428,-0.215,0.178,0.442,-0.259,0.079,0.265,0.194,-0.175,0.175,-0.147,0.291,-0.074,0.249,0.087,0.244,-0.03,0.113,0.295,0.291,0.194,-0.162,0.2,-0.221,-0.111,-0.235,0.341,0.432,0.135,-0.09,0.027,0.038,-0.192,-0.119,-0.164,0.066,-0.254,-0.002,0.018,-0.247,0.097,-0.184,-0.157,0.003,-3.034,0.178,-0.131,-0.033,0.033,0.15,-0.262,0.153,-0.232,0.071,-0.171,0.149,-0.08,0.323,0.234,0.134,0.187,0.102,-0.244,0.059,0.003,0.098,-0.159,0.049,-0.096,0.215,0.085,-0.142,-0.087,0.118,0.035,-0.14,-0.147,0.167,0.157,-1.642,-0.165,0.417,-0.214,-0.178,-0.288,0.028,0.118,0.231,-0.221,-0.146,0.2,-0.029,-0.008,0.085,-0.159,-0.156,-0.072,-0.194,-0.257,-0.122,0.142,-0.228,-0.251,-0.058,0.109,0.368,-0.105,0.377,-0.163,-0.017,0.033,0.193,0.016,0.114,0.088,-0.348,0.181,0.357,0.46,0.318,-0.025,-0.197,-0.211,-0.164,-0.018,0.02,-0.032,-0.098,-0.01,-0.025,-0.019,0.09,-0.162,0.039,-0.103,0.051,-0.014,0.266,0.237,0.237,-0.432,-0.006,0.031,-0.204,0.249,0.136,-0.128,0.109,0.103,-0.136,-0.414,0.067,-0.136,0.298,-0.159,-0.019,-0.034,-0.441,0.044,-0.223,0.129,-0.307,0.048,-0.103,-0.109,-0.087,0.255,-0.258,-0.181,-0.018,-0.078,0.175,-0.15,0.031,-0.209,0.156,-0.123,0.01,0.122,-0.277,-0.101,0.05,0.351,0.293,-0.247,0.016,-0.204,-0.288,0.13,0.234,0.111,-0.065,0.156,0.135,0.45,0.111,-0.187,-0.289,0.163,0.013,0.348,0.04,0.343,0.053,-0.369,0.106,0.074,0.875,-0.109,0.123,0.081,-0.28,-0.292,-0.232,-0.061,-0.122,0.016,0.25,0.327,-0.114,0.22,0.01,0.23,0.093,-0.021,0.26,-0.212,-0.128,-0.021,-0.629,-0.003,-0.02,0.234,-0.142,-0.164,-0.083,-0.363,0.144,0.21
1,5687,15031,150.0,748,573.0,835.0,16,0,1,,1,0,1,41.0,32,Kenya,,0,10,8,,1,,,0.0,1,1,46,62,78,1429,My\r\n name is Julius Moturi aged 44 and i run...,My\r\n business revolves around sourcing and s...,The loan will be used to purchase more drugs f...,-0.199,0.409,-0.117,-0.259,-0.141,-0.216,-0.15,-0.065,0.12,5.714,-0.765,0.15,0.105,0.032,-0.059,-0.093,-0.027,3.114,-0.124,0.025,-0.113,0.087,-0.214,0.019,0.079,-0.057,0.149,-0.163,0.216,0.019,-0.106,-0.11,0.053,-0.185,0.152,-0.152,-0.099,0.015,-0.067,0.156,-0.048,0.082,0.325,-0.276,-0.239,0.083,-0.021,0.05,0.184,0.162,-0.135,-0.047,0.15,-0.083,0.337,-0.066,0.034,-0.289,-0.06,-0.07,0.119,-0.347,-0.22,0.128,0.4,-0.185,0.099,0.218,0.209,-0.119,0.07,-0.094,0.264,-0.104,0.275,0.093,0.195,0.038,0.194,0.224,0.397,0.215,-0.158,0.103,-0.313,-0.193,-0.274,0.328,0.324,0.112,-0.226,0.076,-0.037,-0.203,-0.145,-0.13,0.157,-0.068,-0.131,-0.131,-0.206,0.145,-0.103,-0.127,0.079,-3.14,0.06,-0.259,-0.053,0.058,0.146,-0.3,0.107,-0.287,0.056,-0.169,0.221,-0.067,0.218,0.212,0.09,0.171,0.094,-0.393,0.168,-0.021,0.154,-0.154,0.158,-0.058,0.332,-0.072,-0.068,-0.108,0.158,0.038,-0.042,-0.186,0.095,0.253,-1.536,-0.045,0.479,-0.104,-0.128,-0.186,-0.01,0.111,0.097,-0.174,-0.21,0.236,0.069,-0.115,0.024,-0.1,-0.006,-0.115,-0.264,-0.242,-0.065,0.149,-0.061,-0.188,-0.055,0.111,0.397,-0.211,0.26,-0.092,0.055,-0.102,0.143,0.077,0.069,0.141,-0.317,0.104,0.411,0.367,0.263,0.093,-0.324,-0.053,-0.011,-0.034,0.038,-0.093,-0.041,0.033,-0.12,-0.086,0.036,-0.175,-0.049,-0.026,-0.044,-0.08,0.154,0.238,0.246,-0.456,0.004,0.002,-0.116,0.15,0.173,-0.124,0.08,-0.049,-0.102,-0.424,0.046,-0.109,0.2,-0.417,0.033,0.104,-0.33,-0.011,-0.217,0.063,-0.363,-0.076,-0.124,0.008,0.018,0.199,-0.266,-0.113,-0.096,-0.063,0.358,-0.154,0.141,-0.067,0.211,-0.25,-0.02,0.071,-0.255,-0.048,0.014,0.345,0.327,-0.095,-0.132,-0.105,-0.361,0.047,0.246,0.119,-0.059,0.18,0.165,0.624,-0.022,-0.166,-0.282,0.154,-0.055,0.327,-0.063,0.26,-0.055,-0.287,0.017,0.006,0.745,-0.118,-0.168,0.141,-0.334,-0.35,-0.205,-0.022,-0.177,0.174,0.032,0.353,-0.255,0.134,-0.106,0.254,-0.035,0.01,0.348,-0.295,-0.178,-0.078,-0.542,-0.081,-0.059,0.199,-0.136,-0.114,-0.006,-0.331,-0.022,0.226
2,4895,15057,250.0,504,1374.0,627.0,15,0,1,,0,0,0,,32,Kenya,,1,41,0,,1,,,0.0,0,0,46,62,78,1429,I grew up in Nyamira Count in the Republic of ...,I will spend the loan money on purchasing drug...,"First and foremost, i would want to take this ...",-0.161,0.35,-0.168,-0.1,-0.037,-0.284,-0.245,-0.114,0.243,6.058,-0.658,0.162,0.038,0.186,0.111,-0.053,-0.09,3.194,-0.521,0.237,-0.069,0.056,-0.159,-0.325,-0.106,0.27,0.159,-0.254,0.329,-0.135,-0.23,-0.109,0.175,-1.164153e-10,0.094,0.011,0.005,0.126,-0.029,-0.075,-0.003,-0.001,0.213,-0.113,-0.23,0.158,-0.088,0.046,0.035,0.088,-0.166,0.027,0.216,-0.233,0.136,-0.094,0.005,-0.258,-0.026,-0.311,-0.042,-0.289,-0.176,0.237,0.602,-0.154,0.042,0.27,0.266,-0.113,0.089,-0.109,0.431,-0.237,0.394,0.213,0.337,-0.05,0.125,0.378,0.224,0.03,-0.292,0.2,-0.303,-0.13,-0.347,0.263,0.718,0.185,-0.057,-0.096,-0.047,-0.059,-0.074,-0.202,0.173,-0.21,-0.061,-0.066,-0.12,-0.023,0.034,-0.063,0.168,-3.162,0.347,-0.075,-0.219,0.166,0.179,-0.425,0.284,-0.062,0.153,-0.345,0.183,-0.155,0.13,0.154,0.334,-0.04,0.078,0.129,0.204,0.044,0.004,-0.108,0.014,-0.094,0.24,-0.066,-0.26,0.184,0.314,0.022,-0.12,-0.192,-0.012,0.137,-1.888,0.161,0.175,-0.111,0.02,-0.402,-0.009,0.236,-0.151,0.025,0.063,0.166,0.026,0.037,-0.006,-0.043,-0.048,-0.089,-0.143,-0.347,-0.045,0.166,-0.232,-0.394,-0.03,-0.045,0.296,-0.2,0.221,-0.148,0.049,-0.104,0.186,-0.165,-0.071,0.041,-0.244,0.241,0.328,0.388,0.213,0.047,-0.226,0.067,0.013,0.04,-0.06,-0.197,-0.266,0.113,0.035,0.009,0.064,-0.096,-0.071,0.106,-0.016,0.022,0.161,0.323,0.313,-0.543,-0.07,-0.077,-0.114,0.227,-0.066,-0.039,0.085,0.181,0.017,-0.209,-0.053,-0.199,0.167,-0.108,0.096,-0.075,-0.45,0.098,-0.2,-0.139,-0.496,0.095,-0.138,0.089,-0.083,0.208,-0.068,-0.156,-0.239,0.007,0.247,0.015,0.197,-0.113,0.006,-0.325,-0.063,0.247,-0.212,-0.17,0.128,0.267,0.339,-0.226,-0.079,-0.009,-0.416,0.194,0.1,-0.083,-0.204,0.027,0.129,0.448,0.025,-0.21,-0.104,0.207,0.128,0.378,0.093,0.155,0.064,-0.421,-0.047,0.063,0.921,0.128,0.112,-0.136,-0.218,-0.319,-0.181,-0.042,0.001,0.085,0.131,0.489,-0.135,0.174,0.06,0.225,-0.02,-0.163,0.115,-0.217,0.027,0.14,-0.434,0.04,0.106,0.095,0.129,-0.182,-0.121,-0.343,0.17,0.132
3,10181,15057,244.0,627,1374.0,627.0,15,0,1,,0,0,0,,32,Kenya,9.0,1,35,0,,1,,,0.0,0,1,46,62,78,1429,I grew up in Nyamira Count in the Republic of ...,I will spend the loan money on purchasing drug...,I will spend the loan money on purchasing drug...,-0.265,0.474,0.055,-0.163,-0.109,-0.392,-0.426,-0.045,0.24,5.367,-0.713,0.249,-0.003,0.249,0.329,-0.109,0.007,3.319,-0.321,0.203,-0.202,0.133,-0.254,-0.241,-0.121,0.129,0.085,-0.203,0.381,0.122,-0.264,-0.267,0.221,0.015,0.077,0.103,-0.067,0.1,-0.04,0.011,0.064,-0.097,0.346,-0.098,-0.235,-0.002,-0.035,-0.056,0.056,0.129,-0.221,-0.006,0.282,-0.179,0.187,-0.098,-0.032,-0.351,-0.203,-0.439,0.04,-0.282,-0.169,0.243,0.666,-0.19,-0.109,0.277,0.182,-0.227,0.019,-0.078,0.503,-0.216,0.407,0.165,0.465,-0.012,0.268,0.351,0.302,0.075,-0.226,0.209,-0.291,-0.16,-0.147,0.264,0.671,0.139,-0.091,-0.256,-0.01,-0.366,-0.168,-0.169,0.299,-0.218,-0.076,-0.239,-0.127,0.186,-0.044,0.021,0.054,-2.812,0.478,-0.108,-0.188,0.076,0.244,-0.541,0.307,0.067,0.138,-0.367,0.114,-0.238,0.281,0.273,0.258,0.054,0.085,0.003,-0.012,0.015,0.064,-0.094,0.039,-0.179,0.312,-0.231,-0.239,0.199,0.525,-0.139,-0.241,-0.215,-0.051,0.065,-1.678,0.234,0.194,-0.062,-0.026,-0.265,0.087,0.361,-0.041,-0.067,0.159,0.138,0.053,0.016,0.09,-0.126,-0.054,0.057,-0.156,-0.377,0.066,0.153,-0.144,-0.453,0.034,0.163,0.407,-0.235,0.143,-0.161,0.246,-0.139,0.092,0.068,-0.035,-0.026,-0.267,0.223,0.369,0.229,0.298,0.024,-0.299,0.161,-0.054,0.058,-0.109,-0.232,-0.206,-0.001,0.135,0.022,0.052,-0.072,-0.133,0.216,0.055,0.081,0.063,0.011,0.467,-0.654,-0.111,-0.068,-0.166,0.212,0.006,-0.101,0.084,0.132,-0.087,-0.183,-0.098,-0.189,0.163,-0.33,0.078,-0.163,-0.329,0.2,-0.369,0.005,-0.598,-0.209,-0.243,0.125,-0.011,0.244,-0.124,-0.169,-0.179,0.031,0.426,-0.176,0.186,-0.158,0.194,-0.293,-0.111,0.105,-0.412,-0.188,0.116,0.209,0.431,-0.184,0.015,-0.007,-0.455,-0.037,0.166,-0.05,-0.109,-0.029,0.12,0.657,-0.159,-0.257,-0.088,0.272,0.084,0.268,-0.052,0.133,0.113,-0.436,-0.058,0.015,0.723,-0.028,0.064,-0.165,-0.192,-0.109,-0.137,-0.157,-0.203,0.0,0.096,0.358,-0.43,0.178,-0.052,0.203,-0.12,-0.036,0.173,-0.265,-0.049,0.157,-0.447,-0.022,0.222,0.169,-0.043,-0.02,-0.172,-0.411,0.15,0.098
4,20720,15057,220.0,627,1374.0,627.0,15,0,1,,0,0,0,,32,Kenya,9.0,1,32,0,0.7,1,,31.08,0.0,0,2,46,62,78,1429,I grew up in Nyamira Count in the Republic of ...,I will spend the loan money on purchasing drug...,I will spend the loan money on purchasing drug...,-0.265,0.474,0.055,-0.163,-0.109,-0.392,-0.426,-0.045,0.24,5.367,-0.713,0.249,-0.003,0.249,0.329,-0.109,0.007,3.319,-0.321,0.203,-0.202,0.133,-0.254,-0.241,-0.121,0.129,0.085,-0.203,0.381,0.122,-0.264,-0.267,0.221,0.015,0.077,0.103,-0.067,0.1,-0.04,0.011,0.064,-0.097,0.346,-0.098,-0.235,-0.002,-0.035,-0.056,0.056,0.129,-0.221,-0.006,0.282,-0.179,0.187,-0.098,-0.032,-0.351,-0.203,-0.439,0.04,-0.282,-0.169,0.243,0.666,-0.19,-0.109,0.277,0.182,-0.227,0.019,-0.078,0.503,-0.216,0.407,0.165,0.465,-0.012,0.268,0.351,0.302,0.075,-0.226,0.209,-0.291,-0.16,-0.147,0.264,0.671,0.139,-0.091,-0.256,-0.01,-0.366,-0.168,-0.169,0.299,-0.218,-0.076,-0.239,-0.127,0.186,-0.044,0.021,0.054,-2.812,0.478,-0.108,-0.188,0.076,0.244,-0.541,0.307,0.067,0.138,-0.367,0.114,-0.238,0.281,0.273,0.258,0.054,0.085,0.003,-0.012,0.015,0.064,-0.094,0.039,-0.179,0.312,-0.231,-0.239,0.199,0.525,-0.139,-0.241,-0.215,-0.051,0.065,-1.678,0.234,0.194,-0.062,-0.026,-0.265,0.087,0.361,-0.041,-0.067,0.159,0.138,0.053,0.016,0.09,-0.126,-0.054,0.057,-0.156,-0.377,0.066,0.153,-0.144,-0.453,0.034,0.163,0.407,-0.235,0.143,-0.161,0.246,-0.139,0.092,0.068,-0.035,-0.026,-0.267,0.223,0.369,0.229,0.298,0.024,-0.299,0.161,-0.054,0.058,-0.109,-0.232,-0.206,-0.001,0.135,0.022,0.052,-0.072,-0.133,0.216,0.055,0.081,0.063,0.011,0.467,-0.654,-0.111,-0.068,-0.166,0.212,0.006,-0.101,0.084,0.132,-0.087,-0.183,-0.098,-0.189,0.163,-0.33,0.078,-0.163,-0.329,0.2,-0.369,0.005,-0.598,-0.209,-0.243,0.125,-0.011,0.244,-0.124,-0.169,-0.179,0.031,0.426,-0.176,0.186,-0.158,0.194,-0.293,-0.111,0.105,-0.412,-0.188,0.116,0.209,0.431,-0.184,0.015,-0.007,-0.455,-0.037,0.166,-0.05,-0.109,-0.029,0.12,0.657,-0.159,-0.257,-0.088,0.272,0.084,0.268,-0.052,0.133,0.113,-0.436,-0.058,0.015,0.723,-0.028,0.064,-0.165,-0.192,-0.109,-0.137,-0.157,-0.203,0.0,0.096,0.358,-0.43,0.178,-0.052,0.203,-0.12,-0.036,0.173,-0.265,-0.049,0.157,-0.447,-0.022,0.222,0.169,-0.043,-0.02,-0.172,-0.411,0.15,0.098


# Prepare data for DataRobot and Upload

helper function that is not used until later

In [44]:
import datarobot as dr

def send_to_datarobot(df, proj_name):
    """
    helper function that takes data and uploads to DataRobot
    """
    df = df.copy()
    
    #drop irrelevant columns
    cols_to_drop = ['missed_pmts', 'nonfraud_default', 'sift_labeled_bad', 'sift_science_score', 
                'made_pmts', 'fraud_flag', 'reserve_fee_pct','country_id']
    final_df = df.drop(cols_to_drop, axis=1)
    #Combine the ids to obtain a unique user id
    final_df['unique_id'] = final_df.borrower_id.astype(str) +'__'+ final_df.id.astype(str)
    final_df.drop(['borrower_id', 'id'], axis = 1, inplace=True)
    
    #save file to disk, with compression
    file_path = '/Users/hamelhusain/Google Drive/Team Shared Folder - Zidisha/DR_'+proj_name+'.gz'
    print('Saving {}'.format(file_path))
    final_df.to_csv(file_path, compression = 'gzip', index=False)
    
    print('Beginning DR Upload')
    proj = dr.Project.create(file_path, project_name=proj_name)
    proj.set_target(target = 'default_flag', 
                metric = 'AUC', 
                #mode = dr.AUTOPILOT_MODE.QUICK,
                worker_count = 30)
    
    return proj

### Experiment 1:  Raw Text + Embeddings
https://app.datarobot.com/projects/58660cbbc808916efd0947d6/models



#### Drop irrelevant columns, check size of data

In [37]:
cols_to_drop = ['missed_pmts', 'nonfraud_default', 'sift_labeled_bad', 'sift_science_score', 
                'made_pmts', 'fraud_flag', 'reserve_fee_pct','country_id']
final_df = df.drop(cols_to_drop, axis=1)

In [38]:
#Combine the ids to obtain a unique user id
final_df['unique_id'] = final_df.borrower_id.astype(str) +'__'+ final_df.id.astype(str)
final_df.drop(['borrower_id', 'id'], axis = 1, inplace=True)

#### Save Data

In [47]:
file_path = '/Users/hamelhusain/Google Drive/Team Shared Folder - Zidisha/Data/TextVec_Experiment.gz'
final_df.to_csv(file_path, compression = 'gzip', index=False)

#### Upload to DataRobot

In [48]:
proj = dr.Project.create(file_path, project_name='Text Vector With Text')

In [50]:
proj.set_target(target = 'default_flag', 
                metric = 'AUC', 
                mode = dr.AUTOPILOT_MODE.QUICK,
                worker_count = 15)

ClientError: 422 client error: {'message': 'Target default_flag was already selected.'}

### Experiment 2: Remove Original Text Fields and Only Keep Embeddings

https://app.datarobot.com/projects/58660cbbc808916efd0947d6/models

In [112]:
filepath2 = '/Users/hamelhusain/Google Drive/Team Shared Folder - Zidisha/Data/TextVec_Experiment_NOTEXT.gz'
final_df_notext = final_df.drop(['about_me_field', 'about_business_field', 'proposal_field'], axis =1)
final_df_notext.to_csv(filepath2, compression = 'gzip', index=False)

In [125]:
proj2 = dr.Project.create(filepath2, project_name='Text Vector No Text')
proj2.set_target(target = 'default_flag', 
                metric = 'AUC', 
                worker_count = 10)

Project(Text Vector No Text)

In [None]:
proj.get_leaderboard_ui_permalink()

### Experiment 3: Cleaned Text, Summed Vectors From Three Fields

https://app.datarobot.com/manage-projects

cleaned text by removing stopwords, punctuation and converting to lower-case
Will have these three different feature lists:

1. raw text only
2. summed vectors without raw text
3. both


In [None]:
proj3 = send_to_datarobot(df_summed, 'Text_SummedVec')

Saving /Users/hamelhusain/Google Drive/Team Shared Folder - Zidisha/DR_Text_SummedVec.gz
Beginning DR Upload


# Appendix (Things I Tried But Didn't Work)

### Experiment - PCA on Word Emebeddings -> reduced from 300 to 25 dimensions

https://app.datarobot.com/projects/58660cbbc808916efd0947d6/models

25 dimensions captured 90% of the variance.  

didn't work well, noticable loss in accuracy 



In [115]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import train_test_split
df = final_df_notext.copy()
train_df, test_df, _, _ = train_test_split(df, df.default_flag, test_size=0.4, train_size=.6, random_state=1234)
val_df, holdout_df, _, _ = train_test_split(test_df, test_df.default_flag, 
                                            test_size = .5, train_size =.5, random_state=54)
val_df['Part'] = 'v'
train_df['Part'] = 't'
holdout_df['Part'] = 'h'
df = pd.concat([train_df, val_df, holdout_df])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [126]:
def embeddingPCA(df, column_names):
    df_temp = df.copy()
    
    pca = PCA(n_components=100)
    transformed_dfs = []
    
    for column_name in column_names:
        #compute names to drop and new names after pca
        names = [column_name+'_txtvec_'+str(x) for x in range(1, 301)]
        new_names = [column_name+'_pcavec_'+str(x) for x in range(1, 101)]
        
        #perform pca on training set and apply to entire dataset
        pca.fit(df_temp[df_temp.Part == 't'][names])
        pca_df = pd.DataFrame(pca.transform(df_temp[names]), columns=new_names)
        
        #collect components
        transformed_dfs.append(pca_df)
        #drop original vectors
        df_temp.drop(names, axis =1, inplace = True)

    #assemble all pieces
    return(pd.concat([df_temp] + transformed_dfs, axis = 1))

In [127]:
compressed_vec_df = embeddingPCA(df, ['about_me_field', 'about_business_field', 'proposal_field'])

In [128]:
compressed_vec_df.head()

Unnamed: 0,usd_amount,len_proposal,len_about_me,len_about_business,len_address,missing_natl_id,missing_referred_by,application_time,default_flag,friends_count,name,category_id,invited_flag,english_flag,business_years,usd_installment_amount,prior_loans,country_internet_users,country_life_expectancy,country_literacy,country_gdppc,unique_id,Part,about_me_field_pcavec_1,about_me_field_pcavec_2,about_me_field_pcavec_3,about_me_field_pcavec_4,about_me_field_pcavec_5,about_me_field_pcavec_6,about_me_field_pcavec_7,about_me_field_pcavec_8,about_me_field_pcavec_9,about_me_field_pcavec_10,about_me_field_pcavec_11,about_me_field_pcavec_12,about_me_field_pcavec_13,about_me_field_pcavec_14,about_me_field_pcavec_15,about_me_field_pcavec_16,about_me_field_pcavec_17,about_me_field_pcavec_18,about_me_field_pcavec_19,about_me_field_pcavec_20,about_me_field_pcavec_21,about_me_field_pcavec_22,about_me_field_pcavec_23,about_me_field_pcavec_24,about_me_field_pcavec_25,about_me_field_pcavec_26,about_me_field_pcavec_27,about_me_field_pcavec_28,about_me_field_pcavec_29,about_me_field_pcavec_30,about_me_field_pcavec_31,about_me_field_pcavec_32,about_me_field_pcavec_33,about_me_field_pcavec_34,about_me_field_pcavec_35,about_me_field_pcavec_36,about_me_field_pcavec_37,about_me_field_pcavec_38,about_me_field_pcavec_39,about_me_field_pcavec_40,about_me_field_pcavec_41,about_me_field_pcavec_42,about_me_field_pcavec_43,about_me_field_pcavec_44,about_me_field_pcavec_45,about_me_field_pcavec_46,about_me_field_pcavec_47,about_me_field_pcavec_48,about_me_field_pcavec_49,about_me_field_pcavec_50,about_me_field_pcavec_51,about_me_field_pcavec_52,about_me_field_pcavec_53,about_me_field_pcavec_54,about_me_field_pcavec_55,about_me_field_pcavec_56,about_me_field_pcavec_57,about_me_field_pcavec_58,about_me_field_pcavec_59,about_me_field_pcavec_60,about_me_field_pcavec_61,about_me_field_pcavec_62,about_me_field_pcavec_63,about_me_field_pcavec_64,about_me_field_pcavec_65,about_me_field_pcavec_66,about_me_field_pcavec_67,about_me_field_pcavec_68,about_me_field_pcavec_69,about_me_field_pcavec_70,about_me_field_pcavec_71,about_me_field_pcavec_72,about_me_field_pcavec_73,about_me_field_pcavec_74,about_me_field_pcavec_75,about_me_field_pcavec_76,about_me_field_pcavec_77,about_me_field_pcavec_78,about_me_field_pcavec_79,about_me_field_pcavec_80,about_me_field_pcavec_81,about_me_field_pcavec_82,about_me_field_pcavec_83,about_me_field_pcavec_84,about_me_field_pcavec_85,about_me_field_pcavec_86,about_me_field_pcavec_87,about_me_field_pcavec_88,about_me_field_pcavec_89,about_me_field_pcavec_90,about_me_field_pcavec_91,about_me_field_pcavec_92,about_me_field_pcavec_93,about_me_field_pcavec_94,about_me_field_pcavec_95,about_me_field_pcavec_96,about_me_field_pcavec_97,about_me_field_pcavec_98,about_me_field_pcavec_99,about_me_field_pcavec_100,about_business_field_pcavec_1,about_business_field_pcavec_2,about_business_field_pcavec_3,about_business_field_pcavec_4,about_business_field_pcavec_5,about_business_field_pcavec_6,about_business_field_pcavec_7,about_business_field_pcavec_8,about_business_field_pcavec_9,about_business_field_pcavec_10,about_business_field_pcavec_11,about_business_field_pcavec_12,about_business_field_pcavec_13,about_business_field_pcavec_14,about_business_field_pcavec_15,about_business_field_pcavec_16,about_business_field_pcavec_17,about_business_field_pcavec_18,about_business_field_pcavec_19,about_business_field_pcavec_20,about_business_field_pcavec_21,about_business_field_pcavec_22,about_business_field_pcavec_23,about_business_field_pcavec_24,about_business_field_pcavec_25,about_business_field_pcavec_26,about_business_field_pcavec_27,about_business_field_pcavec_28,about_business_field_pcavec_29,about_business_field_pcavec_30,about_business_field_pcavec_31,about_business_field_pcavec_32,about_business_field_pcavec_33,about_business_field_pcavec_34,about_business_field_pcavec_35,about_business_field_pcavec_36,about_business_field_pcavec_37,about_business_field_pcavec_38,about_business_field_pcavec_39,about_business_field_pcavec_40,about_business_field_pcavec_41,about_business_field_pcavec_42,about_business_field_pcavec_43,about_business_field_pcavec_44,about_business_field_pcavec_45,about_business_field_pcavec_46,about_business_field_pcavec_47,about_business_field_pcavec_48,about_business_field_pcavec_49,about_business_field_pcavec_50,about_business_field_pcavec_51,about_business_field_pcavec_52,about_business_field_pcavec_53,about_business_field_pcavec_54,about_business_field_pcavec_55,about_business_field_pcavec_56,about_business_field_pcavec_57,about_business_field_pcavec_58,about_business_field_pcavec_59,about_business_field_pcavec_60,about_business_field_pcavec_61,about_business_field_pcavec_62,about_business_field_pcavec_63,about_business_field_pcavec_64,about_business_field_pcavec_65,about_business_field_pcavec_66,about_business_field_pcavec_67,about_business_field_pcavec_68,about_business_field_pcavec_69,about_business_field_pcavec_70,about_business_field_pcavec_71,about_business_field_pcavec_72,about_business_field_pcavec_73,about_business_field_pcavec_74,about_business_field_pcavec_75,about_business_field_pcavec_76,about_business_field_pcavec_77,about_business_field_pcavec_78,about_business_field_pcavec_79,about_business_field_pcavec_80,about_business_field_pcavec_81,about_business_field_pcavec_82,about_business_field_pcavec_83,about_business_field_pcavec_84,about_business_field_pcavec_85,about_business_field_pcavec_86,about_business_field_pcavec_87,about_business_field_pcavec_88,about_business_field_pcavec_89,about_business_field_pcavec_90,about_business_field_pcavec_91,about_business_field_pcavec_92,about_business_field_pcavec_93,about_business_field_pcavec_94,about_business_field_pcavec_95,about_business_field_pcavec_96,about_business_field_pcavec_97,about_business_field_pcavec_98,about_business_field_pcavec_99,about_business_field_pcavec_100,proposal_field_pcavec_1,proposal_field_pcavec_2,proposal_field_pcavec_3,proposal_field_pcavec_4,proposal_field_pcavec_5,proposal_field_pcavec_6,proposal_field_pcavec_7,proposal_field_pcavec_8,proposal_field_pcavec_9,proposal_field_pcavec_10,proposal_field_pcavec_11,proposal_field_pcavec_12,proposal_field_pcavec_13,proposal_field_pcavec_14,proposal_field_pcavec_15,proposal_field_pcavec_16,proposal_field_pcavec_17,proposal_field_pcavec_18,proposal_field_pcavec_19,proposal_field_pcavec_20,proposal_field_pcavec_21,proposal_field_pcavec_22,proposal_field_pcavec_23,proposal_field_pcavec_24,proposal_field_pcavec_25,proposal_field_pcavec_26,proposal_field_pcavec_27,proposal_field_pcavec_28,proposal_field_pcavec_29,proposal_field_pcavec_30,proposal_field_pcavec_31,proposal_field_pcavec_32,proposal_field_pcavec_33,proposal_field_pcavec_34,proposal_field_pcavec_35,proposal_field_pcavec_36,proposal_field_pcavec_37,proposal_field_pcavec_38,proposal_field_pcavec_39,proposal_field_pcavec_40,proposal_field_pcavec_41,proposal_field_pcavec_42,proposal_field_pcavec_43,proposal_field_pcavec_44,proposal_field_pcavec_45,proposal_field_pcavec_46,proposal_field_pcavec_47,proposal_field_pcavec_48,proposal_field_pcavec_49,proposal_field_pcavec_50,proposal_field_pcavec_51,proposal_field_pcavec_52,proposal_field_pcavec_53,proposal_field_pcavec_54,proposal_field_pcavec_55,proposal_field_pcavec_56,proposal_field_pcavec_57,proposal_field_pcavec_58,proposal_field_pcavec_59,proposal_field_pcavec_60,proposal_field_pcavec_61,proposal_field_pcavec_62,proposal_field_pcavec_63,proposal_field_pcavec_64,proposal_field_pcavec_65,proposal_field_pcavec_66,proposal_field_pcavec_67,proposal_field_pcavec_68,proposal_field_pcavec_69,proposal_field_pcavec_70,proposal_field_pcavec_71,proposal_field_pcavec_72,proposal_field_pcavec_73,proposal_field_pcavec_74,proposal_field_pcavec_75,proposal_field_pcavec_76,proposal_field_pcavec_77,proposal_field_pcavec_78,proposal_field_pcavec_79,proposal_field_pcavec_80,proposal_field_pcavec_81,proposal_field_pcavec_82,proposal_field_pcavec_83,proposal_field_pcavec_84,proposal_field_pcavec_85,proposal_field_pcavec_86,proposal_field_pcavec_87,proposal_field_pcavec_88,proposal_field_pcavec_89,proposal_field_pcavec_90,proposal_field_pcavec_91,proposal_field_pcavec_92,proposal_field_pcavec_93,proposal_field_pcavec_94,proposal_field_pcavec_95,proposal_field_pcavec_96,proposal_field_pcavec_97,proposal_field_pcavec_98,proposal_field_pcavec_99,proposal_field_pcavec_100
0,50.0,422,573.0,835.0,16,0,1,,0,41.0,Kenya,,0,1,,,0,46,62,78,1429,15031__4863,t,-0.452464,-0.020609,0.038686,0.044421,-0.057978,-0.164204,0.015314,0.119357,-0.033161,0.018883,-0.052689,0.068519,0.022803,-0.020316,0.020145,0.027101,-0.033742,-0.01456,-0.003503,-0.087936,0.028989,-0.093624,-0.060633,-0.006939,-0.01197,0.036576,-0.059451,0.023272,-0.019035,-0.017879,0.036198,0.026767,0.01141,-0.04492,-0.001969,-0.014069,-0.000949,-0.00477,-0.012332,0.009808,-0.008921,0.005152,0.021469,0.033321,0.040138,-0.009453,-0.007417,-0.006656,0.006049,0.031206,0.02211,0.007811,-0.035129,0.011244,0.012239,-0.022703,-0.055776,-0.010699,0.00658,-0.042116,0.036484,-0.001732,0.010159,0.002646,3.5e-05,-0.018241,0.034784,-0.006028,0.011344,-0.003633,0.00605,-0.000521,-0.031909,-0.001193,-0.024856,0.027692,-0.022164,0.018939,-0.012609,-0.024571,0.002448,0.027277,-0.014159,0.001861,-0.020909,-0.01253,0.004513,0.004784,-0.001925,-0.004383,0.025213,0.022066,-0.014488,0.000199,-0.001656,8.8e-05,0.004603,-0.002479,-0.017315,-0.004263,-0.61379,0.00435,0.200959,-0.223262,-0.215368,-0.045924,0.033283,0.112771,0.014753,-0.139537,-0.174319,-9.6e-05,0.093512,-0.042669,0.068677,0.080001,0.069259,0.085027,0.003164,-0.208367,-0.034253,-0.071778,0.011648,0.074885,-0.012808,0.035622,0.018686,0.039983,0.027684,-0.022721,0.023809,-0.010624,-0.001683,-0.010726,-0.028356,-0.095093,-0.039695,-0.001786,-0.052328,0.007543,0.005626,-0.024839,0.007222,-0.012841,0.065839,-0.002725,0.017082,0.019077,-0.026916,0.007311,0.038188,0.047176,-0.001866,0.009316,-0.028507,-0.009296,0.01372,0.035044,-0.025747,0.025107,0.005188,0.002091,0.030301,0.007802,-0.059314,-0.012074,0.030199,-0.027865,-0.035101,-0.023814,-0.053293,0.039744,0.067393,0.006533,-0.001514,-0.001318,-0.002835,0.03444,-0.020053,-0.05656,-0.004786,0.014546,-0.018505,-0.002994,-0.020395,0.004723,0.027396,0.021039,-0.004472,-0.00027,0.001404,0.002798,-0.015976,-0.016927,-0.000107,0.005814,-0.018412,-0.006278,-0.020494,-0.010608,-0.392802,0.057965,-0.073519,0.01042,-0.285315,0.061431,0.011518,0.09646,-0.013832,-0.059726,0.128249,0.036704,0.090707,-0.009193,0.021461,0.087355,-0.040879,-0.033872,0.068984,0.013334,-0.032753,-0.080364,0.027995,-0.070825,-0.061312,0.016107,0.064169,0.063159,-0.018859,-0.1116,-0.018056,-0.033097,-0.109559,-0.021725,-0.013776,0.025086,-0.024255,-0.010034,-0.022526,0.047835,0.009365,-0.065829,0.010054,-0.006689,0.01297,0.011818,-0.058661,0.009519,0.046394,-0.022114,0.006237,-0.006436,0.001834,-0.021277,0.007047,-0.037167,-0.024178,-0.022581,-0.000655,0.051444,0.002606,0.01704,0.024285,-0.015225,-0.021961,-0.023757,-0.000671,0.01207,-0.026496,0.006242,-0.035156,-0.011218,0.004099,0.006017,-0.024985,0.009205,-0.023626,-0.013945,-0.003109,-0.015425,-0.026744,-0.038656,0.023047,0.003492,0.023095,-0.023388,-0.013779,-0.00639,-0.030645,0.003001,0.029019,0.022896,-0.026172,0.005735,-0.029785,0.0228,0.002596,0.02627,-0.009233,0.037127
1,150.0,748,573.0,835.0,16,0,1,,1,41.0,Kenya,,0,1,,,1,46,62,78,1429,15031__5687,t,-0.152005,0.052372,0.074781,-0.214973,-0.021547,-0.03203,0.087804,-0.058905,0.064238,-0.068226,-0.075392,0.042595,-0.040662,-0.028318,-0.019337,-0.008281,-0.03312,-0.019434,0.049275,-0.024889,-0.018772,0.022156,0.018968,-0.017212,0.084284,0.028598,-0.03834,0.024859,0.009805,-0.033665,0.040726,0.005874,0.014688,0.020857,-0.048201,-0.00145,-0.06653,0.008113,0.040005,0.005045,0.020354,0.035814,-0.010017,0.00591,0.00988,0.013337,-0.033966,-0.004215,-0.016033,-0.00883,-0.034419,0.013858,0.042582,-0.010634,-0.025758,-0.004348,-0.025581,3.2e-05,-0.012602,0.016956,0.005912,-0.022414,0.044867,-0.010144,-0.018246,-0.019079,0.006555,0.020697,0.030234,-0.02328,0.002668,0.028282,0.022808,0.00156,-0.002802,-0.009735,0.001371,-0.002741,-0.007167,0.011522,0.004541,0.006759,0.006065,-0.028072,0.024072,-0.018241,-0.016671,0.012817,0.006859,0.008559,-0.026432,-0.008754,-0.001446,-0.008615,-0.018956,-0.015342,0.026479,0.025159,-0.017808,0.00552,-0.065251,0.146739,-0.080042,-0.140603,-0.01179,-0.327025,0.247713,0.095539,-0.151304,-0.248473,0.129528,0.081438,-0.003988,0.122655,0.036475,-0.051013,0.060403,-0.07197,0.018819,-0.171977,-0.138978,0.008683,0.070663,-0.05859,-0.067785,-0.063399,-0.002881,0.022423,0.045085,0.027469,0.02347,-0.05034,0.024981,-0.005542,0.063567,0.053421,0.029612,-0.020452,0.001659,-0.063287,0.071145,-0.063967,0.064827,0.029919,-0.041056,-0.0008,-0.000598,0.005126,-0.048463,-0.00566,-0.04313,-0.009575,0.04096,-0.029217,0.044303,0.002146,-0.014033,0.002597,0.009945,0.001099,0.034391,0.041638,-0.018939,-0.014071,0.001237,-0.001412,-0.000389,-0.016247,0.013849,-0.00464,0.03079,-0.014463,-0.037764,0.018889,0.027509,0.009909,0.009252,0.021504,-0.009369,-0.035546,0.007943,0.037858,-0.017852,-0.021238,-0.003674,-0.022635,0.056493,0.00634,0.007083,0.036886,0.028353,0.000921,0.015935,0.033895,-1e-06,-0.045349,0.004948,0.015517,0.013999,-0.019304,-0.414585,0.043387,-0.001076,-0.199352,-0.190527,-0.065506,-0.001672,-0.028102,-0.073051,0.125895,0.087138,0.056677,0.108592,-0.002977,0.007787,-0.011851,0.019034,0.076799,-0.028478,-0.012292,0.068289,-0.067795,-0.051673,-0.049482,0.016352,-0.142066,0.071355,0.035123,-0.066781,0.016694,-0.045278,-0.073956,0.009749,0.044169,0.053613,0.08108,0.055791,-0.045286,0.088333,-0.006182,-0.003746,0.029965,-0.032154,0.015316,-0.03865,0.038755,0.016307,0.018698,-0.085566,0.013636,0.02987,-0.081516,0.04006,0.053474,0.014281,-0.019908,-0.01611,0.022554,0.073581,-0.000927,0.05331,0.001102,-0.011917,-0.003226,-0.033074,-0.036643,-0.008097,-0.045532,-0.018837,0.0164,0.014352,0.027564,0.05775,-0.013946,-0.047556,-0.007872,-0.032609,0.02539,0.046738,-0.00093,-0.022679,0.010183,0.001767,0.002596,-0.008116,0.026935,0.044489,-0.014083,-0.008585,-0.036861,-0.001984,5.2e-05,0.004342,0.017061,0.002391,-0.01604,-0.000841,-0.012864,0.004203,-0.016436
2,250.0,504,1374.0,627.0,15,0,1,,0,,Kenya,,1,1,,,0,46,62,78,1429,15057__4895,t,-0.289466,-0.027736,0.068674,-0.117728,-0.115345,-0.118819,0.113467,0.082218,-0.208501,0.052228,-0.072231,-0.031127,0.032414,-0.050952,0.082485,-0.004854,-0.043707,-0.021453,0.001695,-0.031649,0.000328,-0.01024,0.062597,-0.016148,-0.036498,0.057864,0.02848,-0.027678,0.02397,-0.006794,-0.04473,0.021124,0.051651,-0.019604,0.011945,0.019432,0.01771,-0.02276,0.04647,-0.01137,-0.016438,0.033032,-0.000523,-0.036013,0.040428,0.004598,-0.034123,0.018454,0.042698,-0.019875,0.053662,0.030002,-0.009543,-0.004098,0.014553,-0.000584,0.01734,-0.03514,0.001303,0.01662,0.007835,0.020114,-0.012892,-0.019218,-0.009218,0.025709,-0.007066,-0.033493,-0.008288,0.021546,-0.008245,-0.026873,0.000661,-0.000324,0.012303,-0.027275,0.00483,-0.017272,0.019197,-0.006419,0.021724,0.010719,0.00297,-0.018016,0.019328,-0.009408,-0.00137,0.015365,0.024001,0.001874,0.004593,-0.020377,0.023846,0.003572,0.040432,-0.005208,0.012094,0.003913,-0.024008,0.011968,-0.407253,-0.057035,-0.016527,-0.152227,-0.012603,-0.195259,-0.097927,0.17475,-0.152338,0.057433,-0.016203,0.017347,-0.099809,-0.056612,0.057837,0.103783,0.075957,0.00606,0.163461,-0.087272,0.071732,-0.06407,0.019328,-0.043732,0.057685,0.042413,-0.029681,0.042145,-0.041232,-0.018369,-0.043768,0.04652,0.046019,-0.007596,0.062913,-0.010657,0.082577,0.039165,-0.006342,0.059977,0.05406,-0.009417,0.002741,-0.02985,0.00464,-0.02755,0.01839,-0.029776,0.003554,-0.044091,0.027098,-0.071092,-0.002621,0.000991,-0.024982,0.015687,-0.014217,-0.00839,0.028631,-0.033182,-0.007913,-0.018751,0.018439,-0.019799,0.036832,-0.001743,-0.028781,-0.005392,-0.012208,0.019666,0.027886,-0.018626,-0.02058,0.059815,0.022513,0.036261,-0.004561,0.023926,-0.008766,0.040072,0.0039,-0.021954,0.011676,-0.013213,-0.021241,-0.039721,0.001748,-0.039466,-0.004843,-0.037135,0.000557,-0.013738,-0.005985,-0.007164,-0.017584,0.007937,-0.003532,0.007145,0.031598,-0.020038,-0.464549,0.054439,-0.378479,0.075,-0.37624,-0.106626,-0.116493,0.202044,-0.097025,-0.179078,-0.018095,-0.021532,0.049588,-0.014046,-0.008553,0.075906,0.001438,0.031024,-0.088429,-0.019089,-0.056044,0.107365,-0.043628,0.011863,0.02832,0.06023,-0.001877,-0.051299,0.001893,-0.04637,0.078225,-0.058885,-0.031514,0.053628,0.010223,-0.01092,0.020082,0.062111,0.014332,0.046544,0.003895,0.02634,0.041581,0.025273,0.015671,-0.059533,-0.03168,0.027761,-0.04027,0.034421,-0.092713,0.019423,-0.035796,-0.075727,-0.026989,-0.035205,0.015887,-0.032482,-0.0052,0.019051,-0.032315,-0.01598,-0.036479,-0.026755,0.043801,0.072673,0.04001,0.005046,-0.019042,-0.012524,0.029987,0.022723,0.030271,0.034863,-0.013874,0.006399,-0.006866,-0.008418,0.011616,0.014168,0.002153,0.034943,-0.004975,0.015473,-0.033888,0.040012,-0.010579,0.007846,-0.022998,0.017968,0.019916,0.041293,-0.013948,0.027211,-0.030351,0.015756,0.046405,0.042268,-0.004636,-0.044094
3,244.0,627,1374.0,627.0,15,0,1,,0,,Kenya,9.0,1,1,,,1,46,62,78,1429,15057__10181,t,0.273833,0.09515,-0.256238,-0.237596,0.12631,-0.002383,0.036198,-0.114631,-0.069998,-0.041154,-0.124872,0.024483,0.03503,-0.089591,-0.008872,0.012603,-0.028575,0.035448,0.003503,0.035311,0.003838,0.107558,-0.031035,-0.038494,-0.006403,0.082892,0.070131,-0.068527,-0.031423,0.038271,-0.010385,-0.014333,-0.09292,0.063249,-0.022175,-0.036794,0.055485,0.017835,0.006699,-0.065126,0.100192,-0.032453,0.042961,0.012975,0.048865,0.032219,0.066084,0.05391,0.031544,-0.013139,0.00794,0.019441,-0.080982,-0.008064,0.008952,0.019556,0.014447,0.009728,0.04622,0.004563,0.000414,-0.010286,0.059171,0.031521,-0.00106,-0.023508,0.036692,0.016776,0.014027,0.002244,-0.012021,-0.005776,0.003129,-0.004522,-0.034231,0.012487,-0.022518,0.011739,-0.038008,-0.003673,0.005401,-0.010654,-0.018505,-0.003186,-0.0334,0.013285,0.024607,0.014573,-0.035257,-0.027552,0.003181,0.017688,0.05065,-0.004221,-0.015362,-0.008776,0.009792,-0.010869,0.015535,-0.010789,0.166095,0.079909,-0.126642,-0.227441,0.082981,-0.002618,0.085472,0.025483,0.074966,-0.0992,-0.117575,-0.033071,0.025337,0.007985,0.07586,-0.058456,0.092345,-0.060526,-0.037042,0.031192,-0.009224,-0.018835,-0.015556,0.121166,0.044162,0.032028,-0.014466,-0.05702,-0.061215,0.087582,0.01003,0.018575,0.110423,-0.03593,0.024296,-0.04893,-0.038938,0.013566,0.106595,0.030813,-0.023734,-0.067961,-0.001089,-0.016437,0.008819,0.046047,0.012232,0.046393,0.046928,0.00853,0.036671,-0.009597,-0.003034,-0.061061,0.015063,-0.007201,0.022941,-0.023958,-0.016962,-0.000776,-0.015152,-0.056832,0.01602,0.041625,0.033075,-0.008864,0.023574,-0.025569,-0.050242,0.00978,0.029446,0.015826,-0.010131,-0.011077,-0.013404,0.045993,0.007648,0.020848,0.022828,0.019469,-0.000825,0.026934,-0.02984,0.004295,0.019261,-0.024328,-0.01802,0.040311,-0.021252,0.009234,-0.021533,0.018539,-0.019583,-0.002893,0.01148,-0.015619,0.013968,-0.048324,-0.007368,0.021542,-0.084682,-0.080408,-0.205179,-0.385472,0.041819,0.203539,0.209979,0.147918,0.107055,0.08777,0.060008,-0.027597,0.021866,0.049128,-0.064317,0.142743,0.048358,-0.0886,0.059089,-0.09458,0.033668,-0.044638,0.078952,0.012037,-0.039284,0.000552,-0.051093,-0.071176,-0.043612,0.025779,-0.079159,-0.008032,0.014659,0.061144,0.075128,0.029187,0.068442,0.083237,0.018054,0.083005,-0.080639,0.007942,0.01532,0.016216,-0.055214,0.015748,0.082535,0.043535,0.047913,-0.015584,-0.039473,-0.016582,-0.038106,-0.069785,-0.031654,0.008247,-0.030286,0.012932,-0.005405,-0.041363,-0.114042,-0.053723,-0.061009,0.018432,-0.04973,0.021259,-0.004088,-0.060623,-0.016888,-0.013954,-0.024464,-0.052817,0.00419,-0.022546,-0.029971,0.056029,-0.010158,-0.062169,-0.018739,0.002946,-0.017932,0.007862,-0.061455,0.034301,-0.017832,0.032798,0.015261,0.001556,0.020797,0.000118,-0.024582,0.031817,0.006462,0.015015,-0.017952,-0.01971,0.052,0.023646,-0.047722,-9.5e-05
4,220.0,627,1374.0,627.0,15,0,1,,0,,Kenya,9.0,1,1,,31.08,2,46,62,78,1429,15057__20720,v,-0.182544,0.010394,0.078445,-0.107128,0.035597,-0.099861,0.055537,-0.150695,0.070401,-0.14956,-0.026528,-0.171145,0.194121,-0.01392,-0.024681,-0.025653,-0.019596,0.071863,-0.01108,-0.049268,0.025978,0.104103,0.03122,0.032526,-0.24344,0.004974,0.02061,-0.047635,-0.031334,0.050454,-0.008986,-0.163598,0.052497,0.062778,0.094869,-0.082205,-0.110566,-0.024318,-0.04878,-0.032714,0.014794,0.029561,-0.017363,0.036391,0.005651,0.040324,-0.034946,0.001684,-0.00763,0.031903,-0.017757,-0.015671,0.039483,0.013832,0.008511,0.031405,0.055134,0.04634,-0.043831,-0.00309,-0.040846,-0.001786,-0.029581,0.041361,-0.042387,-0.004028,-0.014838,-0.042919,0.018735,-0.038617,-0.017557,-0.024076,-0.049742,-0.024863,0.007078,0.064059,-0.009144,-0.003816,0.001042,-0.000655,-0.013719,0.026498,0.013689,-0.009054,-0.045548,-0.009788,-0.026168,-0.018043,-0.014496,0.023254,0.016874,-0.011108,0.022241,0.006453,-0.012488,0.019816,-0.025071,-0.003824,-0.016347,-0.008629,-0.22778,-0.006978,0.147215,-0.261975,0.12961,0.09859,0.266354,-0.011106,0.119286,0.168701,-0.031464,-0.104033,-0.057917,-0.013642,-0.03431,-0.03823,0.067915,-0.0367,-0.032694,0.019126,-0.00251,0.011403,0.034781,-0.030624,0.129169,-0.042874,-0.063671,-0.05308,0.02781,-0.033134,-0.114776,0.010801,-0.11891,0.024748,-0.01135,-0.028425,-0.008229,0.00364,-0.012578,-0.029734,0.003888,-0.004546,-0.045512,-0.045801,0.060498,-0.054215,-0.021878,0.062002,-0.007418,-0.027718,-0.010949,-0.024296,0.00124,-0.0532,0.021888,0.042134,0.015181,-0.01259,-0.019807,0.000737,-0.01853,0.021578,0.044547,0.009846,0.002906,-0.014371,0.050933,-0.024002,0.026571,0.026848,0.012002,-0.002054,0.009136,-0.009964,0.005723,-0.00314,0.039335,0.004752,-0.020572,-0.031156,0.028973,-0.021757,-0.008125,-0.00905,-0.016718,0.00587,0.024118,-0.016082,0.00184,-0.000477,0.025676,0.006081,0.000319,-0.006366,0.022879,0.003574,-0.009262,-0.012606,0.000949,0.00243,0.180893,0.236161,0.132738,-0.06732,-0.209655,0.057666,0.186585,0.104413,0.012831,0.047229,0.102579,-0.067855,-0.058273,-0.085543,-0.145312,-0.010649,0.082806,-0.010391,-0.021148,-0.127642,-0.078779,-0.004486,-0.033445,0.016043,0.00423,0.02195,0.050688,0.007513,0.006962,-0.041979,-0.008964,0.01091,-0.029806,-0.022645,0.009284,0.061199,0.090248,-0.005789,-0.014616,0.009512,-0.028027,0.002587,-0.001093,-0.016502,-0.0605,-0.035726,0.015724,0.009891,0.004111,0.027037,-0.018776,-0.000353,0.011743,-0.009229,-0.039648,-0.006482,-0.035666,-0.018349,-0.005901,-0.012393,-0.008333,-0.051455,-0.055554,0.014352,0.021094,-0.000669,0.030948,0.003407,0.039151,-0.018769,0.013656,-0.019263,0.045163,-0.034423,-0.01107,0.023355,-0.021954,-0.054622,0.023834,0.007709,0.012707,-0.004082,-0.022583,0.014831,0.020435,-0.014325,-0.03674,-0.039277,-0.030332,0.002403,-0.011158,0.012851,0.025787,-0.008878,-0.031715,0.015525,0.016976,0.037019,0.03236,0.01582


In [129]:
tvh = dr.UserTVH(user_partition_col='Part', training_level='t', validation_level='v', holdout_level='h')
proj3 = dr.Project.create(compressed_vec_df, project_name='Text Vector PCA')

In [130]:
proj3.set_target(target = 'default_flag', 
                metric = 'AUC', 
                partitioning_method = tvh,
                mode = dr.AUTOPILOT_MODE.QUICK,
                worker_count = 10)

Project(Text Vector PCA)