Skip to content

Commit

Permalink
Update sklearn auto-backend (#76)
Browse files Browse the repository at this point in the history
* Update sklearn auto-backend using register.dat to override sklearn classes with our own.
  • Loading branch information
pseudotensor committed Aug 31, 2017
1 parent 3c128f2 commit 5b489a2
Show file tree
Hide file tree
Showing 56 changed files with 243 additions and 250 deletions.
51 changes: 40 additions & 11 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# OSX #
*.DS_STORE

# Ignore artifacts made from make, h2ogpuml
# Ignore artifacts made from make, h2o4gpu
src/interface_py/py3nvml
src/interface_py/xgboost
deps/
Expand All @@ -27,22 +27,22 @@ cmake-build-debug/
env/
examples/cpp/cpuall
examples/cpp/gpuall
examples/cpp/h2ogpuml-glm-cpu
examples/cpp/h2ogpuml-glm-cpu-orig
examples/cpp/h2ogpuml-glm-cpu-ptr
examples/cpp/h2ogpuml-glm-gpu
examples/cpp/h2ogpuml-glm-gpu-orig
examples/cpp/h2ogpuml-glm-gpu-ptr
examples/cpp/h2ogpuml-kmeans-cpu
examples/cpp/h2ogpuml-kmeans-gpu
examples/cpp/h2o4gpu-glm-cpu
examples/cpp/h2o4gpu-glm-cpu-orig
examples/cpp/h2o4gpu-glm-cpu-ptr
examples/cpp/h2o4gpu-glm-gpu
examples/cpp/h2o4gpu-glm-gpu-orig
examples/cpp/h2o4gpu-glm-gpu-ptr
examples/cpp/h2o4gpu-kmeans-cpu
examples/cpp/h2o4gpu-kmeans-gpu
examples/py/1
examples/py/@
examples/py/Untitled.ipynb
examples/py/Untitled2.ipynb
examples/py/alpha
examples/py/preds
src/interface_c/ch2ogpuml_cpu
src/interface_c/ch2ogpuml_gpu
src/interface_c/ch2o4gpu_cpu
src/interface_c/ch2o4gpu_gpu
src/py3nvml/
tmp/

Expand Down Expand Up @@ -95,3 +95,32 @@ tmp/
# Temp space
./tmp

## sklearn stuff
sklearn/
src/interface_py/h2o4gpu/__check_build/
src/interface_py/h2o4gpu/__init__.py
src/interface_py/h2o4gpu/_build_utils/
src/interface_py/h2o4gpu/cluster/
src/interface_py/h2o4gpu/covariance/
src/interface_py/h2o4gpu/cross_decomposition/
src/interface_py/h2o4gpu/datasets/
src/interface_py/h2o4gpu/decomposition/
src/interface_py/h2o4gpu/ensemble/
src/interface_py/h2o4gpu/externals/
src/interface_py/h2o4gpu/feature_extraction/
src/interface_py/h2o4gpu/feature_selection/
src/interface_py/h2o4gpu/gaussian_process/
src/interface_py/h2o4gpu/linear_model/
src/interface_py/h2o4gpu/manifold/
src/interface_py/h2o4gpu/metrics/
src/interface_py/h2o4gpu/mixture/
src/interface_py/h2o4gpu/model_selection/
src/interface_py/h2o4gpu/neighbors/
src/interface_py/h2o4gpu/neural_network/
src/interface_py/h2o4gpu/preprocessing/
src/interface_py/h2o4gpu/semi_supervised/
src/interface_py/h2o4gpu/setup.py
src/interface_py/h2o4gpu/svm/
src/interface_py/h2o4gpu/tests/
src/interface_py/h2o4gpu/tree/
src/interface_py/h2o4gpu/utils/
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,6 @@
[submodule "py3nvml"]
path = py3nvml
url = https://github.com/h2oai/py3nvml
[submodule "scikit-learn"]
path = scikit-learn
url = https://github.com/h2oai/scikit-learn.git
14 changes: 9 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ c:
py:
$(MAKE) -j all -C src/interface_py

pyinstall: py
pyinstall:
$(MAKE) -j install -C src/interface_py

##############################################
Expand Down Expand Up @@ -117,8 +117,8 @@ cleanpy:

# uses https://github.com/Azure/fast_retraining
testxgboost: # liblightgbm (assumes one installs lightgdm yourself or run make liblightgbm)
sh testsxgboost/runtestxgboost.sh
sh testsxgboost/extracttestxgboost.sh
bash testsxgboost/runtestxgboost.sh
bash testsxgboost/extracttestxgboost.sh
bash tests/showresults.sh # same for all tests

################
Expand Down Expand Up @@ -190,8 +190,12 @@ liblightgbm: # only done if user directly requests, never an explicit dependency
rm -rf LightGBM ; result=`git clone --recursive https://github.com/Microsoft/LightGBM`
cd LightGBM && mkdir build ; cd build && cmake .. -DUSE_GPU=1 -DOpenCL_LIBRARY=$(CUDA_HOME)/lib64/libOpenCL.so -DOpenCL_INCLUDE_DIR=$(CUDA_HOME)/include/ && make -j && cd ../python-package ; python setup.py install --precompile --gpu && cd ../ && pip install arff tqdm keras runipy h5py --upgrade

apply_sklearn:
mkdir -p sklearn && cd sklearn && pip install -U sklearn --target=.
libsklearn: # assume already submodule gets sklearn
scripts/prepare_sklearn.sh # repeated calls don't hurt
mkdir -p sklearn && cd scikit-learn && python setup.py sdist bdist_wheel

apply_sklearn: libsklearn
bash ./scripts/apply_sklearn.sh


#################### Jenkins specific
Expand Down
3 changes: 3 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -81,3 +81,6 @@ widgetsnbextension==2.0.0
XlsxWriter==0.9.8
cmake==0.8.0
feather-format==0.4.0
Cython==0.25.2


1 change: 1 addition & 0 deletions scikit-learn
Submodule scikit-learn added at d8c363
20 changes: 20 additions & 0 deletions scripts/apply_sklearn.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/bin/bash

# apply sklearn
rm -rf sklearn
cd scikit-learn
pip install dist/h2o4gpu-0.20.dev0-cp36-cp36m-linux_x86_64.whl --upgrade --target ../sklearn/
cd ../

# link-up recursively
bash ./scripts/importsklearn.sh

# handle base __init__.py file appending
rm -rf src/interface_py/h2o4gpu/__init__.py
cat sklearn/h2o4gpu/__init__.py | sed 's/__version__.*//g' >> src/interface_py/h2o4gpu/__init__.py.2

cat src/interface_py/h2o4gpu/__init__.base.py src/interface_py/h2o4gpu/__init__.py.2 > src/interface_py/h2o4gpu/__init__.py
rm -rf src/interface_py/h2o4gpu/__init__.py.2

# register
bash ./scripts/post_apply_sklearn.sh
Empty file modified scripts/checkcommithistory.sh
100644 → 100755
Empty file.
Empty file modified scripts/gitrename.sh
100644 → 100755
Empty file.
4 changes: 2 additions & 2 deletions scripts/gitshallow_submodules.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ for i in $(git submodule | awk '{print $2}'); do
surl=$(git config -f .gitmodules --get submodule.$i.url)
echo "submodule:" $i $spath $surl
# if [ $spath == "cub" ] || [ $spath == "nccl" ] || [ $spath == "py3nvml" ] || [ $spath == "scikit-learn || [ $spath == "xgboost" ] ; then
# if [ $spath == "cub" ] || [ $spath == "nccl" ] || [ $spath == "py3nvml" ] || [ $spath == "scikit-learn" ] ; then # can't add xgboost because not pulling from master
if [ $spath == "cub" ] || [ $spath == "nccl" ] || [ $spath == "py3nvml" ] ; then # can't add xgboost because not pulling from master
if [ $spath == "cub" ] || [ $spath == "nccl" ] || [ $spath == "py3nvml" ] || [ $spath == "scikit-learn" ] ; then # can't add xgboost because not pulling from master
# if [ $spath == "cub" ] || [ $spath == "nccl" ] || [ $spath == "py3nvml" ] ; then # can't add xgboost because not pulling from master
git submodule update --depth 1 $spath
else
git submodule update $spath
Expand Down
41 changes: 41 additions & 0 deletions scripts/importsklearn.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#!/bin/bash

cd src/interface_py/h2o4gpu

# PATHS
sklearnpaths=`find ../../../sklearn/h2o4gpu -type d | tail -n +2 | grep -v pycache | awk '{ print length($0) " " $0; }' | sort -n | cut -d ' ' -f 2-`

for fil in $sklearnpaths
do

file=`basename $fil`
path=`dirname $fil | sed 's/\.\.\/\.\.\/\.\.\/sklearn\/h2o4gpu//g' | sed 's/^\///g' | sed 's/^/\.\//g'`
newfile=${path}/$file
echo $fil `dirname $fil` $file $path $newfile

echo "mkdir: " $newfile
mkdir -p $newfile
#rm -rf $newfile

done

if [ 1 -eq 1 ]
then
# FILES
sklearnfiles=`find ../../../sklearn/h2o4gpu -type f | grep -v pycache`

for fil in $sklearnfiles
do

file=`basename $fil`
path=`dirname $fil | sed 's/\.\.\/\.\.\/\.\.\/sklearn\/h2o4gpu//g' | sed 's/^\///g' | sed 's/^/\.\//g'`
newfile=${path}/$file

echo $fil "->" $newfile

#rm -rf $path
#git rm -rf $newfile
ln -sfr $fil $newfile

done
fi
54 changes: 54 additions & 0 deletions scripts/post_apply_sklearn.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#!/bin/bash

cd src/interface_py/h2o4gpu/
# now register any override of existing classes
# e.g. from grep -R 'from \.' * |grep KMeans
infile="register.clean.dat"
grep -v "#" register.dat > $infile

#IFS=' '
while read skfile initfile classorig classnew command
do

#classorig=`sed -e 's/\"//g' <<< $classorig`

#temp=${classnew%\"}
#temp=${temp#\"}
#classnew=`echo $temp`

echo $skfile
echo $initfile
echo $classorig
echo $classnew
echo $command

if [ 1 -eq 1 ]
then
# rename class
echo "1"
sed -i "s/class $classorig(/class $classnew(/g" $skfile
echo "2"
sed -i "s/($classorig)/($classnew)/g" $skfile
echo "3"
sed -i "s/sklearn_sklearn/sklearn/g" $skfile
echo "4"
sed -i "s/\ $classorig\([^[a-z]\)/ $classnew\1/g" $skfile
echo "5"
sed -i "s/sklearn_sklearn/sklearn/g" $skfile

# rename class in init file
#sed -i "s/\([^[a-z]\)$classorig\([^[a-z]\)/\1$classnew\2/g" $initfile
echo "6"
sed -i "s/\ $classorig\([^[a-z]\)/ $classnew\1/g" $initfile
echo "7"
sed -i "s/sklearn_sklearn/sklearn/g" $initfile

# append our class
echo "" >> $skfile
command=`sed -e 's/^"//' -e 's/"$//' <<< "$command"`
echo "$command" >> $initfile
fi

done < $infile

rm -rf $infile
36 changes: 36 additions & 0 deletions scripts/prepare_sklearn.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#!/bin/bash
cd scikit-learn
rm -rf build dist

########## DIRECTORIES
paths=`find -type d | grep -v pycache | awk '{ print length($0) " " $0; }' | sort -n | cut -d ' ' -f 2-`

for fil in $paths
do
echo $fil
if [[ "$fil" == *".git"* ]]
then
echo "avoid .git"
else
newpath=`echo $fil | sed 's/sklearn/h2o4gpu/g' | sed 's/scikit-learn/h2o4gpu/g'`
echo $fil "->" $newpath
mv $fil $newpath
fi
done

########## FILES
#files=`find -type f | grep -v pycache`
files=`find -type f | grep -v pycache | awk '{ print length($0) " " $0; }' | sort -n | cut -d ' ' -f 2-`

for fil in $files
do
echo $fil
if [[ "$fil" == *".git"* ]]
then
echo "avoid .git"
else
sed -i 's/sklearn/h2o4gpu/g' $fil
sed -i 's/scikit-learn/h2o4gpu/g' $fil
fi
done

1 change: 0 additions & 1 deletion src/interface_py/h2o4gpu/__check_build

This file was deleted.

15 changes: 15 additions & 0 deletions src/interface_py/h2o4gpu/__init__.base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
__version__ = "0.0.4"


from h2o4gpu.types import FunctionVector
from h2o4gpu.solvers.pogs import Pogs
from h2o4gpu.solvers.elastic_net import GLM
from h2o4gpu.solvers.logistic import LogisticRegression
from h2o4gpu.solvers.lasso import Lasso
from h2o4gpu.solvers.ridge import Ridge
from h2o4gpu.solvers.kmeans import KMeans
from h2o4gpu.util import typechecks
from h2o4gpu.util import compatibility
from h2o4gpu import h2o4gpu_exceptions
from h2o4gpu.util import metrics
from h2o4gpu.util import import_data
Loading

0 comments on commit 5b489a2

Please sign in to comment.