In [9]:
!pip install joblib pandas xgboost scikit-learn gcsfs

Collecting gcsfs
[?25l  Downloading https://files.pythonhosted.org/packages/76/19/68ab4e6570a7882698058be8ecf1b195b0b784b838ac1b0ea82c422c0f5a/gcsfs-0.2.2.tar.gz (52kB)
[K    100% |████████████████████████████████| 61kB 2.9MB/s ta 0:00:011
Collecting google-auth-oauthlib (from gcsfs)
  Downloading https://files.pythonhosted.org/packages/74/a2/1323b1bce9935ac948cd4863509de16cf852cd80b12dd29e648c65fea93d/google_auth_oauthlib-0.4.0-py2.py3-none-any.whl
Building wheels for collected packages: gcsfs
  Building wheel for gcsfs (setup.py) ... [?25ldone
[?25h  Stored in directory: /home/jovyan/.cache/pip/wheels/9f/0f/b9/5bc5222756d121ccace51ab3084a1c733380908a4e2f939038
Successfully built gcsfs
Installing collected packages: google-auth-oauthlib, gcsfs
Successfully installed gcsfs-0.2.2 google-auth-oauthlib-0.4.0
[33mYou are using pip version 19.0.1, however version 19.1.1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [55]:
import argparse
import logging
import joblib
import sys
import pandas as pd
import numpy as np
from sklearn.metrics import roc_auc_score
from sklearn.ensemble import RandomForestClassifier
import datetime
import pickle
from sklearn.preprocessing import OneHotEncoder
import gcsfs

In [8]:
logging.basicConfig(format='%(message)s')
logging.getLogger().setLevel(logging.INFO)

In [113]:
PROJECT = 'mwpmltr'
BUCKET = 'mock_lc'
TRAIN_DATA_PATH = 'gs://{}/data/train.csv'.format(BUCKET)
TEST_DATA_PATH = 'gs://{}/data/test.csv'.format(BUCKET)
MODEL_PATH = '{}/models/lc_randomforest.pkl'.format(BUCKET)

RESPONSE = 'is_bad'

CAT_ENCODER_BASE = 'onehotencoder_'

PARAMS = {'max_depth': 50,
          'max_features': 25,
          'max_leaf_nodes': 445,
          'min_samples_leaf': 1,
          'min_samples_split': 0.000125,
          'min_weight_fraction_leaf': 0.0,
          'n_estimators': 2500}
RANDOM_SEED = 42

In [46]:
def read_data(path):
    df = pd.read_csv(path)
    y_df = df[RESPONSE]
    X_df = df.drop(RESPONSE, axis=1)
    return X_df, y_df

In [70]:
def process_dates(df):
        logging.info("Processing date column. Shape={}".format(df.shape))
        date_column = 'earliest_cr_line'

        a = datetime.datetime.now() # Should be fixed date in production
        df["earliest_cr_fill"] = df[date_column].fillna(str(datetime.datetime.now().strftime("%m/%d/%Y")))
        df["earliest_cr_fill"] = df["earliest_cr_fill"].apply(lambda x: expand_year(x))
        df["days_to_earliest_cr"] = df.earliest_cr_line.apply(
            lambda x: (pd.to_datetime(x, format="%m/%d/%y") - pd.datetime(1900, 1, 1)).days)

        df.drop([date_column, "earliest_cr_fill"], axis=1, inplace=True)
        return df
    
def process_text(df):
    logging.info("Processing text columns. Shape={}".format(df.shape))
    text_columns = ['Notes', 'purpose', 'emp_title']

    # Character counts
    df_with_processed_text = df.copy()
    for t in text_columns:
        df_with_processed_text[t + "_char_count"] = df[t].apply(lambda x: len(str(x)))

    df_with_processed_text.drop(text_columns, axis=1, inplace=True)
    return df_with_processed_text

def add_other_features(df):
    logging.info("Adding log features. Shape={}".format(df.shape))
    df['log_annual_inc'] = np.log(df['annual_inc'] + 1)
    df['log_mths_since_last_record'] = np.log(df['mths_since_last_record'] + 1)
    df['log_revol_bal'] = np.log(df['revol_bal'] + 1)

    return df

def process_categorical(df, is_train=True):
    logging.info("Processing categorical columns. Shape={}".format(df.shape))
    categorical_columns = ['home_ownership', 'verification_status', 'pymnt_plan', 'purpose_cat',
                           'addr_state', 'initial_list_status', 'policy_code']

    fs = gcsfs.GCSFileSystem(project=PROJECT)
    # One hot encoding
    df_new = df.copy()
    if is_train:
        for col in categorical_columns:
            enc = OneHotEncoder(handle_unknown='ignore', sparse=False)
            enc.fit(df.loc[:, [col]])
            new_onehot_features = enc.categories_[0]
            encoded = pd.DataFrame(enc.transform(df.loc[:, [col]]))
            encoded.columns = list([col + f for f in new_onehot_features])
            df_new = pd.concat([df_new, encoded], axis=1)   

            with fs.open(BUCKET + "/encoders/" + CAT_ENCODER_BASE + col + ".pkl", 'wb') as f:
                pickle.dump(enc, f)
    else:
        for col in categorical_columns:
            with fs.open(BUCKET + "/encoders/" + CAT_ENCODER_BASE + col + ".pkl", 'rb') as f:
                enc = pickle.load(f)
                new_onehot_features = enc.categories_[0]
                encoded = pd.DataFrame(enc.transform(df.loc[:, [col]]), columns=[col + f for f in new_onehot_features])
                df_new = pd.concat([df_new, encoded], axis=1)

    df_new.drop(categorical_columns, axis=1, inplace=True)

    return df_new

def process_na(df):
    def flag_nulls(df):
        logging.info("Flagging null values")
        df_with_nulls = df.copy()
        for col in df.columns:
            if df[col].isna().sum() > 0:
                df_with_nulls[col + "_null"] = df[col].isna()
        return df_with_nulls

    logging.info("Processing nulls. Shape={}".format(df.shape))
    df = flag_nulls(df)
    df.fillna(-9999, inplace=True)
    df.emp_length = df.emp_length.apply(lambda x: 0 if x == 'na' else x)

    return df

def drop_extra(df):
    df.drop("zip_code", axis=1, inplace=True)
    return df

def expand_year(x):
    """Converts YY -> YYYY

    Assumes there are no records in the current year, which will be returned as-is
    """
    this_year = datetime.datetime.now().strftime("%y")
    if int(x[::-1][:2][::-1]) < int(this_year):
        return x[0:len(x) - 2] + "20" + x[len(x) - 2:]
    elif int(x[::-1][:2][::-1]) > int(this_year):
        return x[0:len(x) - 2] + "19" + x[len(x) - 2:]
    else:
        return x
    
def fill_columns(df, is_train):
    fs = gcsfs.GCSFileSystem(project=PROJECT)
    if is_train:
        with fs.open(BUCKET + "/encoders/colnames.pkl", 'wb') as f:
            pickle.dump(df.columns.values, f)
        return df
    else:
        with fs.open(BUCKET + "/encoders/colnames.pkl", 'rb') as f:
            features = pickle.load(f)
            
            test_cols = list(df.columns)
            cols_not_in_test = {c: 0 for c in features if c not in test_cols}
            df = df.assign(**cols_not_in_test)
            return df
        

In [71]:
def preprocess(df, is_train=True):
    try:
        df = df.drop(['Id', 'Unnamed: 0'], axis=1)
        df = process_dates(df)
        df = process_text(df)
        df = add_other_features(df)
        df = process_categorical(df, is_train)
        df = process_na(df)
        df = drop_extra(df)
        df = fill_columns(df, is_train)
        return df
    except Exception as e:
        logging.error("Could not preprocess data due to error: {}".format(e))

In [48]:
X_train, y_train = read_data(TRAIN_DATA_PATH)
X_test, y_test = read_data(TEST_DATA_PATH)

In [99]:
def train_model(X_train, y_train, X_test, y_test, params=PARAMS):
    """Train the model using Random Forest Classifier using tuning params from Bayesian Optimization"""
    model = RandomForestClassifier(random_state=RANDOM_SEED, verbose=2, n_jobs=-1, **params)
    
    logging.info("Training model...")
    model.fit(X_train, y_train)
    
    return model

def eval_model(model, X_test, y_test):
    """Evaluate test AUC"""
    predictions = model.predict_proba(X_test)
    logging.info("AUC={:0.2f}".format(roc_auc_score(y_test, predictions[:, 1])))
    

def save_model(model, model_path):
    """Store model object on GCS"""
    fs = gcsfs.GCSFileSystem(project=PROJECT)
    with fs.open(model_path, 'wb') as f:
        pickle.dump(model, f)

In [124]:
class LendingClubModel(object):
    def __init__(self):
        self.train_path = TRAIN_DATA_PATH
        self.test_path = TEST_DATA_PATH
        self.model_path = MODEL_PATH
        self.model = None
        
    def train(self):
        X_train, y_train = read_data(self.train_path)
        X_test, y_test = read_data(self.test_path)
        
        X_train = preprocess(X_train, True).to_numpy()
        X_test = preprocess(X_test, False).to_numpy()
        
        self.model = train_model(X_train, y_train, X_test, y_test)
        eval_model(self.model, X_test, y_test)
        save_model(self.model, self.model_path)
        
    def predict(self, X):
        if not self.model:
            fs = gcsfs.GCSFileSystem(project=PROJECT)
            with fs.open(self.model_path, 'rb') as f:
                self.model = joblib.load(f)
                
#         X_processed = preprocess(X, False)
        
        prediction = self.model.predict(data=X)
        
        return [[prediction.item(0), prediction.item[0]]]

In [123]:
m = LendingClubModel()
m.train()

Processing date column. Shape=(8000, 26)
Processing text columns. Shape=(8000, 26)
Adding log features. Shape=(8000, 26)
Processing categorical columns. Shape=(8000, 29)
Processing nulls. Shape=(8000, 114)
Flagging null values
Processing date column. Shape=(2000, 26)
Processing text columns. Shape=(2000, 26)
Adding log features. Shape=(2000, 26)
Processing categorical columns. Shape=(2000, 29)
Processing nulls. Shape=(2000, 114)
Flagging null values
Training model...
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    0.2s


building tree 1 of 2500
building tree 2 of 2500
building tree 3 of 2500
building tree 4 of 2500
building tree 5 of 2500
building tree 6 of 2500
building tree 7 of 2500
building tree 8 of 2500
building tree 9 of 2500
building tree 10 of 2500
building tree 11 of 2500
building tree 12 of 2500building tree 13 of 2500

building tree 14 of 2500
building tree 15 of 2500
building tree 16 of 2500
building tree 17 of 2500
building tree 18 of 2500
building tree 19 of 2500
building tree 20 of 2500
building tree 21 of 2500
building tree 22 of 2500
building tree 23 of 2500
building tree 24 of 2500
building tree 25 of 2500
building tree 26 of 2500
building tree 27 of 2500
building tree 28 of 2500
building tree 29 of 2500
building tree 30 of 2500
building tree 31 of 2500
building tree 32 of 2500
building tree 33 of 2500
building tree 34 of 2500
building tree 35 of 2500building tree 36 of 2500

building tree 37 of 2500
building tree 38 of 2500
building tree 39 of 2500
building tree 40 of 2500
building 

[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:    0.8s


building tree 191 of 2500
building tree 192 of 2500
building tree 193 of 2500
building tree 194 of 2500
building tree 195 of 2500
building tree 196 of 2500
building tree 197 of 2500
building tree 198 of 2500
building tree 199 of 2500
building tree 200 of 2500building tree 201 of 2500

building tree 202 of 2500
building tree 203 of 2500
building tree 204 of 2500
building tree 205 of 2500
building tree 206 of 2500
building tree 207 of 2500
building tree 208 of 2500
building tree 209 of 2500
building tree 210 of 2500building tree 211 of 2500

building tree 212 of 2500building tree 213 of 2500

building tree 214 of 2500
building tree 215 of 2500
building tree 216 of 2500
building tree 217 of 2500
building tree 218 of 2500
building tree 219 of 2500
building tree 220 of 2500
building tree 221 of 2500
building tree 222 of 2500building tree 223 of 2500

building tree 224 of 2500
building tree 225 of 2500
building tree 226 of 2500
building tree 227 of 2500
building tree 228 of 2500
building tre

[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed:    1.8s


building tree 365 of 2500building tree 366 of 2500


building tree 367 of 2500building tree 368 of 2500

building tree 369 of 2500
building tree 370 of 2500
building tree 371 of 2500
building tree 372 of 2500
building tree 373 of 2500
building tree 374 of 2500
building tree 375 of 2500
building tree 376 of 2500
building tree 377 of 2500
building tree 378 of 2500
building tree 379 of 2500
building tree 380 of 2500
building tree 381 of 2500building tree 382 of 2500

building tree 383 of 2500
building tree 384 of 2500
building tree 385 of 2500
building tree 386 of 2500
building tree 387 of 2500
building tree 388 of 2500
building tree 389 of 2500
building tree 390 of 2500
building tree 391 of 2500
building tree 392 of 2500
building tree 393 of 2500
building tree 394 of 2500
building tree 395 of 2500
building tree 396 of 2500
building tree 397 of 2500
building tree 398 of 2500
building tree 399 of 2500
building tree 400 of 2500
building tree 401 of 2500
building tree 402 of 2500
building tr

[Parallel(n_jobs=-1)]: Done 632 tasks      | elapsed:    3.5s


building tree 649 of 2500
building tree 650 of 2500
building tree 651 of 2500
building tree 652 of 2500
building tree 653 of 2500
building tree 654 of 2500
building tree 655 of 2500
building tree 656 of 2500
building tree 657 of 2500
building tree 658 of 2500
building tree 659 of 2500
building tree 660 of 2500
building tree 661 of 2500
building tree 662 of 2500
building tree 663 of 2500
building tree 664 of 2500
building tree 665 of 2500
building tree 666 of 2500
building tree 667 of 2500
building tree 668 of 2500building tree 669 of 2500
building tree 670 of 2500

building tree 671 of 2500
building tree 672 of 2500
building tree 673 of 2500
building tree 674 of 2500
building tree 675 of 2500
building tree 676 of 2500
building tree 677 of 2500
building tree 678 of 2500
building tree 679 of 2500
building tree 680 of 2500
building tree 681 of 2500
building tree 682 of 2500
building tree 683 of 2500building tree 684 of 2500

building tree 685 of 2500
building tree 686 of 2500
building tre

building tree 973 of 2500
building tree 974 of 2500
building tree 975 of 2500
building tree 976 of 2500
building tree 977 of 2500
building tree 978 of 2500
building tree 979 of 2500
building tree 980 of 2500
building tree 981 of 2500
building tree 982 of 2500
building tree 983 of 2500
building tree 984 of 2500
building tree 985 of 2500
building tree 986 of 2500
building tree 987 of 2500
building tree 988 of 2500
building tree 989 of 2500
building tree 990 of 2500
building tree 991 of 2500
building tree 992 of 2500
building tree 993 of 2500
building tree 994 of 2500
building tree 995 of 2500
building tree 996 of 2500
building tree 997 of 2500
building tree 998 of 2500
building tree 999 of 2500
building tree 1000 of 2500
building tree 1001 of 2500
building tree 1002 of 2500
building tree 1003 of 2500
building tree 1004 of 2500
building tree 1005 of 2500building tree 1006 of 2500

building tree 1007 of 2500


[Parallel(n_jobs=-1)]: Done 997 tasks      | elapsed:    5.8s


building tree 1008 of 2500
building tree 1009 of 2500
building tree 1010 of 2500
building tree 1011 of 2500
building tree 1012 of 2500building tree 1013 of 2500

building tree 1014 of 2500
building tree 1015 of 2500
building tree 1016 of 2500
building tree 1017 of 2500
building tree 1018 of 2500
building tree 1019 of 2500
building tree 1020 of 2500
building tree 1021 of 2500
building tree 1022 of 2500
building tree 1023 of 2500
building tree 1024 of 2500building tree 1025 of 2500

building tree 1026 of 2500
building tree 1027 of 2500
building tree 1028 of 2500
building tree 1029 of 2500
building tree 1030 of 2500
building tree 1031 of 2500
building tree 1032 of 2500
building tree 1033 of 2500
building tree 1034 of 2500
building tree 1035 of 2500
building tree 1036 of 2500
building tree 1037 of 2500
building tree 1038 of 2500
building tree 1039 of 2500building tree 1040 of 2500

building tree 1041 of 2500
building tree 1042 of 2500
building tree 1043 of 2500
building tree 1044 of 2500
b

building tree 1333 of 2500
building tree 1334 of 2500
building tree 1335 of 2500building tree 1336 of 2500

building tree 1337 of 2500
building tree 1338 of 2500
building tree 1339 of 2500
building tree 1340 of 2500
building tree 1341 of 2500
building tree 1342 of 2500
building tree 1343 of 2500
building tree 1344 of 2500
building tree 1345 of 2500
building tree 1346 of 2500
building tree 1347 of 2500
building tree 1348 of 2500
building tree 1349 of 2500
building tree 1350 of 2500
building tree 1351 of 2500
building tree 1352 of 2500
building tree 1353 of 2500
building tree 1354 of 2500
building tree 1355 of 2500
building tree 1356 of 2500
building tree 1357 of 2500
building tree 1358 of 2500
building tree 1359 of 2500
building tree 1360 of 2500
building tree 1361 of 2500
building tree 1362 of 2500
building tree 1363 of 2500
building tree 1364 of 2500
building tree 1365 of 2500
building tree 1366 of 2500
building tree 1367 of 2500
building tree 1368 of 2500
building tree 1369 of 2500
b

[Parallel(n_jobs=-1)]: Done 1442 tasks      | elapsed:    8.7s


building tree 1473 of 2500
building tree 1474 of 2500
building tree 1475 of 2500
building tree 1476 of 2500
building tree 1477 of 2500
building tree 1478 of 2500
building tree 1479 of 2500building tree 1480 of 2500
building tree 1481 of 2500

building tree 1482 of 2500
building tree 1483 of 2500
building tree 1484 of 2500
building tree 1485 of 2500
building tree 1486 of 2500building tree 1487 of 2500

building tree 1488 of 2500
building tree 1489 of 2500
building tree 1490 of 2500
building tree 1491 of 2500
building tree 1492 of 2500
building tree 1493 of 2500
building tree 1494 of 2500building tree 1495 of 2500

building tree 1496 of 2500
building tree 1497 of 2500
building tree 1498 of 2500
building tree 1499 of 2500
building tree 1500 of 2500
building tree 1501 of 2500
building tree 1502 of 2500building tree 1503 of 2500

building tree 1504 of 2500
building tree 1505 of 2500
building tree 1506 of 2500
building tree 1507 of 2500
building tree 1508 of 2500
building tree 1509 of 2500
b

building tree 1778 of 2500
building tree 1779 of 2500
building tree 1780 of 2500
building tree 1781 of 2500
building tree 1782 of 2500
building tree 1783 of 2500
building tree 1784 of 2500
building tree 1785 of 2500
building tree 1786 of 2500
building tree 1787 of 2500
building tree 1788 of 2500
building tree 1789 of 2500
building tree 1790 of 2500
building tree 1791 of 2500
building tree 1792 of 2500
building tree 1793 of 2500
building tree 1794 of 2500
building tree 1795 of 2500
building tree 1796 of 2500
building tree 1797 of 2500
building tree 1798 of 2500
building tree 1799 of 2500
building tree 1800 of 2500
building tree 1801 of 2500
building tree 1802 of 2500
building tree 1803 of 2500
building tree 1804 of 2500
building tree 1805 of 2500
building tree 1806 of 2500
building tree 1807 of 2500
building tree 1808 of 2500
building tree 1809 of 2500
building tree 1810 of 2500
building tree 1811 of 2500
building tree 1812 of 2500
building tree 1813 of 2500
building tree 1814 of 2500
b

[Parallel(n_jobs=-1)]: Done 1969 tasks      | elapsed:   12.2s



building tree 1979 of 2500
building tree 1980 of 2500
building tree 1981 of 2500
building tree 1982 of 2500
building tree 1983 of 2500
building tree 1984 of 2500building tree 1985 of 2500

building tree 1986 of 2500
building tree 1987 of 2500
building tree 1988 of 2500
building tree 1989 of 2500
building tree 1990 of 2500
building tree 1991 of 2500
building tree 1992 of 2500
building tree 1993 of 2500
building tree 1994 of 2500
building tree 1995 of 2500
building tree 1996 of 2500
building tree 1997 of 2500
building tree 1998 of 2500
building tree 1999 of 2500
building tree 2000 of 2500
building tree 2001 of 2500building tree 2002 of 2500

building tree 2003 of 2500
building tree 2004 of 2500
building tree 2005 of 2500
building tree 2006 of 2500
building tree 2007 of 2500
building tree 2008 of 2500
building tree 2009 of 2500
building tree 2010 of 2500
building tree 2011 of 2500
building tree 2012 of 2500
building tree 2013 of 2500
building tree 2014 of 2500
building tree 2015 of 2500


building tree 2293 of 2500
building tree 2294 of 2500
building tree 2295 of 2500
building tree 2296 of 2500building tree 2297 of 2500

building tree 2298 of 2500
building tree 2299 of 2500
building tree 2300 of 2500
building tree 2301 of 2500
building tree 2302 of 2500
building tree 2303 of 2500
building tree 2304 of 2500
building tree 2305 of 2500
building tree 2306 of 2500
building tree 2307 of 2500
building tree 2308 of 2500
building tree 2309 of 2500
building tree 2310 of 2500
building tree 2311 of 2500building tree 2312 of 2500

building tree 2313 of 2500
building tree 2314 of 2500
building tree 2315 of 2500
building tree 2316 of 2500
building tree 2317 of 2500
building tree 2318 of 2500
building tree 2319 of 2500
building tree 2320 of 2500
building tree 2321 of 2500
building tree 2322 of 2500building tree 2323 of 2500

building tree 2324 of 2500
building tree 2325 of 2500building tree 2326 of 2500

building tree 2327 of 2500
building tree 2328 of 2500
building tree 2329 of 2500
b

[Parallel(n_jobs=-1)]: Done 2500 out of 2500 | elapsed:   15.7s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  25 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 146 tasks      | elapsed:    0.1s
[Parallel(n_jobs=8)]: Done 349 tasks      | elapsed:    0.2s
[Parallel(n_jobs=8)]: Done 632 tasks      | elapsed:    0.3s
[Parallel(n_jobs=8)]: Done 997 tasks      | elapsed:    0.5s
[Parallel(n_jobs=8)]: Done 1442 tasks      | elapsed:    0.7s
[Parallel(n_jobs=8)]: Done 1969 tasks      | elapsed:    0.9s
[Parallel(n_jobs=8)]: Done 2500 out of 2500 | elapsed:    1.1s finished
AUC=0.69


In [103]:
import os
import fairing

DOCKER_REGISTRY = 'gcr.io/{}/lending-club-job'.format(PROJECT)
PY_VERSION = '.'.join([str(x) for x in sys.version_info[0:3]])
BASE_IMAGE = 'python:{}'.format(PY_VERSION)

In [104]:
# Can use this to deploy as image, train on GKE backend
from fairing import TrainJob
from fairing.backends import KubeflowGKEBackend

In [107]:
train_job = TrainJob(LendingClubModel,
                    BASE_IMAGE,
                    input_files=['requirements.txt'],
                    docker_registry=DOCKER_REGISTRY,
                    backend=KubeflowGKEBackend())

train_job.submit()

Using preprocessor: <class 'fairing.preprocessors.function.FunctionPreProcessor'>
Using docker registry: gcr.io/mwpmltr/lending-club-job
Using builder: <class 'fairing.builders.cluster.cluster.ClusterBuilder'>
/opt/conda/lib/python3.6/site-packages/fairing/__init__.py already exists in Fairing context, skipping...
/opt/conda/lib/python3.6/site-packages/fairing/__init__.py already exists in Fairing context, skipping...
Waiting for fairing-builder-rhrm6 to start...
Waiting for fairing-builder-rhrm6 to start...
Waiting for fairing-builder-rhrm6 to start...
Pod started running True


[36mINFO[0m[0000] Downloading base image python:3.6.7
ERROR: logging before flag.Parse: E0621 16:11:35.436050       1 metadata.go:159] while reading 'google-dockercfg-url' metadata: http status code: 404 while fetching url http://metadata.google.internal./computeMetadata/v1/instance/attributes/google-dockercfg-url
ERROR: logging before flag.Parse: E0621 16:11:35.447296       1 metadata.go:142] while reading 'google-dockercfg' metadata: http status code: 404 while fetching url http://metadata.google.internal./computeMetadata/v1/instance/attributes/google-dockercfg
2019/06/21 16:11:35 No matching credentials were found, falling back on anonymous
[36mINFO[0m[0000] Executing 0 build triggers
[36mINFO[0m[0000] Unpacking rootfs as cmd RUN if [ -e requirements.txt ];then pip install --no-cache -r requirements.txt; fi requires it.
[36mINFO[0m[0018] Taking snapshot of full filesystem...
[36mINFO[0m[0026] Skipping paths under /dev, as it is a whitelisted directory
[36mINFO[0m[0026] S

You are using pip version 18.1, however version 19.1.1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.
[36mINFO[0m[0053] Taking snapshot of full filesystem...
[36mINFO[0m[0063] Skipping paths under /dev, as it is a whitelisted directory
[36mINFO[0m[0063] Skipping paths under /etc/secrets, as it is a whitelisted directory
[36mINFO[0m[0063] Skipping paths under /kaniko, as it is a whitelisted directory
[36mINFO[0m[0063] Skipping paths under /proc, as it is a whitelisted directory
[36mINFO[0m[0063] Skipping paths under /sys, as it is a whitelisted directory
[36mINFO[0m[0063] Skipping paths under /var/run, as it is a whitelisted directory
[36mINFO[0m[0076] Using files from context: [/kaniko/buildcontext/app]
[36mINFO[0m[0076] COPY /app/ /app/
[36mINFO[0m[0076] Taking snapshot of files...
2019/06/21 16:12:52 existing blob: sha256:5d71636fb824265e30ff34bf20737c9cdc4f5af28b6bce86f08215c55b89bfab
2019/06/21 16:12:52 existing blob: sh

Training job fairing-job-52k58 launched.
Waiting for fairing-job-52k58-pbxff to start...
Waiting for fairing-job-52k58-pbxff to start...
Waiting for fairing-job-52k58-pbxff to start...
Pod started running True


Processing date column. Shape=(8000, 26)
Processing text columns. Shape=(8000, 26)
Adding log features. Shape=(8000, 26)
Processing categorical columns. Shape=(8000, 29)
Processing nulls. Shape=(8000, 114)
Flagging null values
Processing date column. Shape=(2000, 26)
Processing text columns. Shape=(2000, 26)
Adding log features. Shape=(2000, 26)
Processing categorical columns. Shape=(2000, 29)
Processing nulls. Shape=(2000, 114)
Flagging null values
Training model...
building tree 1 of 2500
building tree 2 of 2500
building tree 3 of 2500
building tree 4 of 2500
building tree 5 of 2500
building tree 6 of 2500
building tree 7 of 2500
building tree 8 of 2500
building tree 9 of 2500
building tree 10 of 2500
building tree 11 of 2500
building tree 12 of 2500
building tree 13 of 2500
building tree 14 of 2500
building tree 15 of 2500
building tree 16 of 2500
building tree 17 of 2500
building tree 18 of 2500
building tree 19 of 2500
building tree 20 of 2500
building tree 21 of 2500
building tre

building tree 320 of 2500building tree 321 of 2500
building tree 322 of 2500

building tree 323 of 2500
building tree 324 of 2500
building tree 325 of 2500
building tree 326 of 2500
building tree 327 of 2500
building tree 328 of 2500
building tree 329 of 2500
building tree 330 of 2500
building tree 331 of 2500
building tree 332 of 2500
building tree 333 of 2500
building tree 334 of 2500
building tree 335 of 2500
building tree 336 of 2500
building tree 337 of 2500
building tree 338 of 2500
building tree 339 of 2500
building tree 340 of 2500
building tree 341 of 2500
building tree 342 of 2500
building tree 343 of 2500
building tree 344 of 2500
building tree 345 of 2500
building tree 346 of 2500
building tree 347 of 2500
building tree 348 of 2500
building tree 349 of 2500
building tree 350 of 2500
building tree 351 of 2500
building tree 352 of 2500
building tree 353 of 2500
building tree 354 of 2500
building tree 355 of 2500
building tree 356 of 2500
building tree 357 of 2500
building tre

building tree 636 of 2500
building tree 637 of 2500
building tree 638 of 2500
building tree 639 of 2500
building tree 640 of 2500
building tree 641 of 2500
building tree 642 of 2500
building tree 643 of 2500
building tree 644 of 2500
building tree 645 of 2500
building tree 646 of 2500
building tree 647 of 2500
building tree 648 of 2500
building tree 649 of 2500
building tree 650 of 2500
building tree 651 of 2500
building tree 652 of 2500
building tree 653 of 2500
building tree 654 of 2500
building tree 655 of 2500
building tree 656 of 2500
building tree 657 of 2500
building tree 658 of 2500
building tree 659 of 2500
building tree 660 of 2500
building tree 661 of 2500
building tree 662 of 2500
building tree 663 of 2500
building tree 664 of 2500
building tree 665 of 2500
building tree 666 of 2500
building tree 667 of 2500
building tree 668 of 2500
building tree 669 of 2500
building tree 670 of 2500
building tree 671 of 2500
building tree 672 of 2500
building tree 673 of 2500
building tre

building tree 952 of 2500
building tree 953 of 2500
building tree 954 of 2500
building tree 955 of 2500
building tree 956 of 2500
building tree 957 of 2500
building tree 958 of 2500
building tree 959 of 2500
building tree 960 of 2500
building tree 961 of 2500
building tree 962 of 2500
building tree 963 of 2500
building tree 964 of 2500
building tree 965 of 2500
building tree 966 of 2500
building tree 967 of 2500
building tree 968 of 2500
building tree 969 of 2500
building tree 970 of 2500
building tree 971 of 2500
building tree 972 of 2500
building tree 973 of 2500
building tree 974 of 2500
building tree 975 of 2500
building tree 976 of 2500
building tree 977 of 2500
building tree 978 of 2500
building tree 979 of 2500
building tree 980 of 2500
building tree 981 of 2500
building tree 982 of 2500
building tree 983 of 2500
building tree 984 of 2500
building tree 985 of 2500
building tree 986 of 2500
building tree 987 of 2500
building tree 988 of 2500
building tree 989 of 2500
building tre

building tree 1258 of 2500
building tree 1259 of 2500
building tree 1260 of 2500
building tree 1261 of 2500
building tree 1262 of 2500
building tree 1263 of 2500
building tree 1264 of 2500
building tree 1265 of 2500
building tree 1266 of 2500
building tree 1267 of 2500
building tree 1268 of 2500
building tree 1269 of 2500
building tree 1270 of 2500
building tree 1271 of 2500
building tree 1272 of 2500
building tree 1273 of 2500
building tree 1274 of 2500
building tree 1275 of 2500
building tree 1276 of 2500
building tree 1277 of 2500
building tree 1278 of 2500
building tree 1279 of 2500
building tree 1280 of 2500
building tree 1281 of 2500
building tree 1282 of 2500
building tree 1283 of 2500
building tree 1284 of 2500
building tree 1285 of 2500
building tree 1286 of 2500
building tree 1287 of 2500
building tree 1288 of 2500
building tree 1289 of 2500
building tree 1290 of 2500
building tree 1291 of 2500
building tree 1292 of 2500
building tree 1293 of 2500
building tree 1294 of 2500
b

building tree 1562 of 2500
building tree 1563 of 2500
building tree 1564 of 2500
building tree 1565 of 2500
building tree 1566 of 2500
building tree 1567 of 2500
building tree 1568 of 2500
building tree 1569 of 2500
building tree 1570 of 2500
building tree 1571 of 2500
building tree 1572 of 2500
building tree 1573 of 2500
building tree 1574 of 2500
building tree 1575 of 2500
building tree 1576 of 2500
building tree 1577 of 2500
building tree 1578 of 2500
building tree 1579 of 2500
building tree 1580 of 2500
building tree 1581 of 2500
building tree 1582 of 2500
building tree 1583 of 2500
building tree 1584 of 2500
building tree 1585 of 2500
building tree 1586 of 2500
building tree 1587 of 2500
building tree 1588 of 2500
building tree 1589 of 2500
building tree 1590 of 2500
building tree 1591 of 2500
building tree 1592 of 2500
building tree 1593 of 2500
building tree 1594 of 2500
building tree 1595 of 2500
building tree 1596 of 2500
building tree 1597 of 2500
building tree 1598 of 2500
b

building tree 1866 of 2500
building tree 1867 of 2500
building tree 1868 of 2500
building tree 1869 of 2500
building tree 1870 of 2500
building tree 1871 of 2500
building tree 1872 of 2500
building tree 1873 of 2500
building tree 1874 of 2500
building tree 1876 of 2500
building tree 1875 of 2500
building tree 1877 of 2500
building tree 1878 of 2500
building tree 1879 of 2500
building tree 1880 of 2500
building tree 1881 of 2500
building tree 1882 of 2500
building tree 1883 of 2500
building tree 1884 of 2500
building tree 1885 of 2500
building tree 1886 of 2500
building tree 1887 of 2500
building tree 1888 of 2500
building tree 1889 of 2500
building tree 1890 of 2500
building tree 1891 of 2500
building tree 1892 of 2500
building tree 1893 of 2500
building tree 1894 of 2500
building tree 1895 of 2500
building tree 1896 of 2500
building tree 1897 of 2500
building tree 1898 of 2500
building tree 1899 of 2500
building tree 1900 of 2500
building tree 1901 of 2500
building tree 1902 of 2500
b

building tree 2170 of 2500
building tree 2171 of 2500
building tree 2172 of 2500
building tree 2173 of 2500
building tree 2174 of 2500
building tree 2175 of 2500
building tree 2176 of 2500
building tree 2177 of 2500
building tree 2178 of 2500
building tree 2179 of 2500
building tree 2180 of 2500
building tree 2181 of 2500
building tree 2182 of 2500
building tree 2183 of 2500
building tree 2184 of 2500
building tree 2185 of 2500
building tree 2186 of 2500
building tree 2187 of 2500
building tree 2188 of 2500
building tree 2189 of 2500
building tree 2190 of 2500
building tree 2191 of 2500
building tree 2192 of 2500
building tree 2193 of 2500
building tree 2194 of 2500
building tree 2195 of 2500
building tree 2196 of 2500
building tree 2197 of 2500
building tree 2198 of 2500
building tree 2199 of 2500
building tree 2200 of 2500
building tree 2201 of 2500
building tree 2202 of 2500
building tree 2203 of 2500
building tree 2204 of 2500
building tree 2205 of 2500
building tree 2206 of 2500
b

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    0.2s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:    1.0s
[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed:    2.4s
[Parallel(n_jobs=-1)]: Done 632 tasks      | elapsed:    4.5s
[Parallel(n_jobs=-1)]: Done 997 tasks      | elapsed:    6.9s
[Parallel(n_jobs=-1)]: Done 1442 tasks      | elapsed:    9.8s
[Parallel(n_jobs=-1)]: Done 1969 tasks      | elapsed:   13.2s
[Parallel(n_jobs=-1)]: Done 2500 out of 2500 | elapsed:   17.0s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  25 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 146 tasks      | elapsed:    0.1s
[Parallel(n_jobs=8)]: Done 349 tasks      | elapsed:    0.2s
[Parallel(n_jobs=8)]: Done 632 tasks      | elapsed:    0.3s
[Parallel(n_jobs=8)]: Done 997 tasks      | elapsed:    0.5s
[Parallel(n_jobs=8)]: Don

Cleaning up job fairing-job-52k58...


In [129]:
from fairing import PredictionEndpoint
from fairing.backends import KubeflowGKEBackend

endpoint = PredictionEndpoint(LendingClubModel,
                             BASE_IMAGE,
                             input_files=['requirements.txt'],
                             docker_registry=DOCKER_REGISTRY,
                             backend=KubeflowGKEBackend())

endpoint.create()

Using preprocessor: <class 'fairing.preprocessors.function.FunctionPreProcessor'>
Using docker registry: gcr.io/mwpmltr/lending-club-job
Using builder: <class 'fairing.builders.cluster.cluster.ClusterBuilder'>
/opt/conda/lib/python3.6/site-packages/fairing/__init__.py already exists in Fairing context, skipping...
/opt/conda/lib/python3.6/site-packages/fairing/__init__.py already exists in Fairing context, skipping...
Waiting for fairing-builder-kjpc2 to start...
Waiting for fairing-builder-kjpc2 to start...
Pod started running True


[36mINFO[0m[0000] Downloading base image python:3.6.7
ERROR: logging before flag.Parse: E0621 16:34:15.460544       1 metadata.go:142] while reading 'google-dockercfg' metadata: http status code: 404 while fetching url http://metadata.google.internal./computeMetadata/v1/instance/attributes/google-dockercfg
ERROR: logging before flag.Parse: E0621 16:34:15.464537       1 metadata.go:159] while reading 'google-dockercfg-url' metadata: http status code: 404 while fetching url http://metadata.google.internal./computeMetadata/v1/instance/attributes/google-dockercfg-url
2019/06/21 16:34:15 No matching credentials were found, falling back on anonymous
[36mINFO[0m[0000] Executing 0 build triggers
[36mINFO[0m[0000] Unpacking rootfs as cmd RUN if [ -e requirements.txt ];then pip install --no-cache -r requirements.txt; fi requires it.
[36mINFO[0m[0019] Taking snapshot of full filesystem...
[36mINFO[0m[0027] Skipping paths under /dev, as it is a whitelisted directory
[36mINFO[0m[0027] S

You are using pip version 18.1, however version 19.1.1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.
[36mINFO[0m[0054] Taking snapshot of full filesystem...
[36mINFO[0m[0063] Skipping paths under /dev, as it is a whitelisted directory
[36mINFO[0m[0063] Skipping paths under /etc/secrets, as it is a whitelisted directory
[36mINFO[0m[0063] Skipping paths under /kaniko, as it is a whitelisted directory
[36mINFO[0m[0063] Skipping paths under /proc, as it is a whitelisted directory
[36mINFO[0m[0063] Skipping paths under /sys, as it is a whitelisted directory
[36mINFO[0m[0064] Skipping paths under /var/run, as it is a whitelisted directory
[36mINFO[0m[0077] Using files from context: [/kaniko/buildcontext/app]
[36mINFO[0m[0077] COPY /app/ /app/
[36mINFO[0m[0077] Taking snapshot of files...
2019/06/21 16:35:33 existing blob: sha256:54f7e8ac135a5f502a6ee9537ef3d64b1cd2fa570dc0a40b4d3b6f7ac81e7486
2019/06/21 16:35:33 existing blob: sh

Endpoint fairing-deployer-22rdw launched.


Waiting for prediction endpoint to come up...


Prediction endpoint: http://34.66.116.252:5000/predict


In [121]:
test = pd.read_csv(TEST_DATA_PATH)
test_X = test.drop(RESPONSE, axis=1)

In [127]:
X_test = preprocess(test_X, False)
X_test = X_test.to_numpy()

Processing date column. Shape=(2000, 26)
Processing text columns. Shape=(2000, 26)
Adding log features. Shape=(2000, 26)
Processing categorical columns. Shape=(2000, 29)
Processing nulls. Shape=(2000, 114)
Flagging null values


In [131]:
predictions = endpoint.predict_nparray(X_test)
predictions.head()

ConnectionError: HTTPConnectionPool(host='34.66.116.252', port=5000): Max retries exceeded with url: /predict (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x7f9a1a96d6d8>: Failed to establish a new connection: [Errno 111] Connection refused',))