In [1]:
#%reset -f
%matplotlib inline
import os
import sys
import pandas as pd
import numpy as np
import feather
import time
import h2oaiglm as h2oaiglm


Loaded H2OAIGLM CPU library

Loaded H2OAIGLM GPU library.


### Import Data Frame and create raw X and y arrays

In [2]:
t0 = time.time()
from os.path import expanduser
home = str(expanduser("~"))
filepostfix="/h2oai-prototypes/glm-bench/ipums.feather"
df = feather.read_dataframe(home+filepostfix)
#df = pd.read_csv("../R/data.csv")
t1 = time.time()
print("Time to read data via feather: %r" % (t1-t0))

Time to read data via feather: 2.157763957977295


In [3]:
target = df.columns[-1] ## last column is the response
cols = [c for c in df.columns if c != target]

In [4]:
X = np.array(df.ix[:,cols], order='f').astype('float32')
y = np.array(df[target].values, dtype='float32')
print(X.shape)
print(y.shape)

(55776, 9732)
(55776,)


### H2O AI GLM using the GPU Pointers

In [7]:
nGPUs=4
nlambda=100
nalpha=16
validFraction=0.2

# set solver cpu/gpu according to input args
if((nGPUs>0) and (h2oaiglm.ElasticNetSolverGPU is None)):
    print("\nGPU solver unavailable, using CPU solver\n")
    nGPUs=0

sharedA = 0
sourceme = 0
sourceDev = 0
nThreads = 1 if(nGPUs==0) else nGPUs # not required number of threads, but normal.  Bit more optimal to use 2 threads for CPU, but 1 thread per GPU is optimal.
intercept = 1
standardize = 0
lambda_min_ratio = 1e-7
nLambdas = nlambda
nAlphas = nalpha

if standardize:
    print ("implement standardization transformer")
    exit()

# Setup Train/validation Set Split
morig = X.shape[0]
norig = X.shape[1]
print("Original m=%d n=%d" % (morig,norig))
fortran = X.flags.f_contiguous
print("fortran=%d" % (fortran))


# Do train/valid split
HO=int(validFraction*morig)
H=morig-HO
print("Size of Train rows=%d valid rows=%d" % (H,HO))
trainX = np.copy(X[0:H,:])
trainY = np.copy(y[0:H])
validX = np.copy(X[H:-1,:])
validY = np.copy(y[H:-1])
trainW = np.copy(trainY)*0.0 + 1.0 # constant unity weight

mTrain = trainX.shape[0]
mvalid = validX.shape[0]
print("mTrain=%d mvalid=%d" % (mTrain,mvalid))

## TODO: compute these in C++ (CPU or GPU)
sdtrainY = np.sqrt(np.var(trainY))
print("sdtrainY: " + str(sdtrainY))
meantrainY = np.mean(trainY)
print("meantrainY: " + str(meantrainY))

## TODO: compute these in C++ (CPU or GPU)
sdvalidY = np.sqrt(np.var(validY))
print("sdvalidY: " + str(sdvalidY))
meanvalidY = np.mean(validY)
print("meanvalidY: " + str(meanvalidY))

## TODO: compute this in C++ (CPU or GPU)
# compute without intercept column

weights = 1./mTrain
#weights = 1. # like current cpp driver
if intercept==1:
    lambda_max0 = weights * max(abs(trainX.T.dot(trainY-meantrainY)))
else:
    lambda_max0 = weights * max(abs(trainX.T.dot(trainY)))
print("lambda_max0: " + str(lambda_max0))

if intercept==1:
    trainX = np.hstack([trainX, np.ones((trainX.shape[0],1),dtype=trainX.dtype)])
    validX = np.hstack([validX, np.ones((validX.shape[0],1),dtype=validX.dtype)])
    n = trainX.shape[1]
    print("New n=%d" % (n))

Original m=55776 n=9732
fortran=1
Size of Train rows=44621 valid rows=11155
mTrain=44621 mvalid=11154
sdtrainY: 39926.0
meantrainY: 32721.7
sdvalidY: 42661.9
meanvalidY: 33843.7
lambda_max0: 7947.98718092
New n=9733


In [8]:
print("Setting up Solver")
Solver = h2oaiglm.ElasticNetSolverGPU if(nGPUs>0) else h2oaiglm.ElasticNetSolverCPU
#  Solver = h2oaiglm.ElasticNetSolverCPU
assert Solver != None, "Couldn't instantiate ElasticNetSolver"
enet = Solver(sharedA, nThreads, nGPUs, 'c' if fortran else 'r', intercept, standardize, lambda_min_ratio, nLambdas, nAlphas)


## First, get backend pointers
print("Uploading")
print(trainX.dtype)
print(trainY.dtype)
print(validX.dtype)
print(validY.dtype)
print(trainW.dtype)
t0 = time.time()
a,b,c,d,e = enet.upload_data(sourceDev, trainX, trainY, validX, validY, trainW)
t1 = time.time()
print("Time to ingest data: %r" % (t1-t0))


## Solve
print("Solving")
t0 = time.time()
enet.fit(sourceDev, mTrain, n, mvalid, intercept, standardize, a, b, c, d, e)
t1 = time.time()
print("Done Solving")
print("Time to train H2O AI GLM: %r" % (t1-t0))

Setting up Solver
Uploading
float32
float32
float32
float32
float32
Detected np.float32
c_void_p(1121219248128)
c_void_p(1122958362112)
c_void_p(1122959884288)
c_void_p(1121213827584)
c_void_p(1122957787136)
Time to ingest data: 0.7800250053405762
Solving
single precision fit
Done Solving
Time to train H2O AI GLM: 119.09090614318848
