Permalink
Please sign in to comment.
Showing
with
5 additions
and 63,561 deletions.
- +5 −0 .gitignore
- +0 −8 acc/__init__.py
- BIN acc/build/temp.linux-x86_64-2.7/cuda_code.o
- BIN acc/build/temp.linux-x86_64-2.7/cuda_solver.o
- BIN acc/build/temp.linux-x86_64-2.7/gpu_code.o
- BIN acc/build/temp.linux-x86_64-2.7/gpu_solver.o
- BIN acc/cuda_code
- +0 −152 acc/cuda_code (copy).cpp
- +0 −138 acc/cuda_code.cpp
- +0 −12 acc/cuda_code.h
- +0 −1 acc/cuda_solver.c
- +0 −6,983 acc/cuda_solver.cpp
- +0 −62 acc/cuda_solver.pyx
- BIN acc/cuda_solver.so
- +0 −256 acc/gpu_code.cpp
- +0 −12 acc/gpu_code.h
- +0 −7,680 acc/gpu_solver.cpp
- +0 −83 acc/gpu_solver.pyx
- +0 −237 acc/mic_code.cpp
- +0 −12 acc/mic_code.h
- +0 −83 acc/mic_solver.pyx
- +0 −7 acc/script.sh
- +0 −136 acc/setup_cuda.py
- +0 −51 acc/setup_gpu.py
- +0 −96 acc/try_cuda.py
- +0 −120 acc/try_gpu.py
- BIN develop/Elm october.pdf
- +0 −127 develop/XXXXelm_big.py
- +0 −310 develop/XXXXhpelm.py
- BIN develop/_Accelerators/CrayWorkshop/A.bin
- +0 −1 develop/_Accelerators/CrayWorkshop/C.bin
- BIN develop/_Accelerators/CrayWorkshop/H.bin
- +0 −1 develop/_Accelerators/CrayWorkshop/INSTALL
- BIN develop/_Accelerators/CrayWorkshop/Math Forum Discussions - AT_X_A.pdf
- BIN develop/_Accelerators/CrayWorkshop/SC13-MAGMA.pdf
- +0 −1 develop/_Accelerators/CrayWorkshop/U.bin
- BIN develop/_Accelerators/CrayWorkshop/VT.bin
- BIN develop/_Accelerators/CrayWorkshop/W.bin
- BIN develop/_Accelerators/CrayWorkshop/Y.bin
- BIN develop/_Accelerators/CrayWorkshop/build/temp.linux-x86_64-2.7/f_apply.o
- BIN develop/_Accelerators/CrayWorkshop/build/temp.linux-x86_64-2.7/mp_func.o
- BIN develop/_Accelerators/CrayWorkshop/build/temp.linux-x86_64-2.7/nl_func.o
- +0 −6,077 develop/_Accelerators/CrayWorkshop/clapack.h
- +0 −5,813 develop/_Accelerators/CrayWorkshop/f_apply.c
- BIN develop/_Accelerators/CrayWorkshop/f_apply.so
- +0 −15 develop/_Accelerators/CrayWorkshop/mp_func.c
- +0 −3 develop/_Accelerators/CrayWorkshop/mp_func.h
- +0 −137 develop/_Accelerators/CrayWorkshop/my_svd.c
- BIN develop/_Accelerators/CrayWorkshop/mysvd
- +0 −21 develop/_Accelerators/CrayWorkshop/setup.py
- +0 −24 develop/_Accelerators/CrayWorkshop/test.py
- +0 −106 develop/_Accelerators/CrayWorkshop/test_copy.py
- +0 −62 develop/_Accelerators/CrayWorkshop/test_small_nn.py
- BIN develop/_Accelerators/magma_ssyrk.png
- +0 −1 develop/_Cython_func/INSTALL.TXT
- BIN develop/_Cython_func/build/temp.macosx-10.5-x86_64-2.7/f_apply.o
- BIN develop/_Cython_func/build/temp.macosx-10.5-x86_64-2.7/mp_func.o
- +0 −5,799 develop/_Cython_func/f_apply.c
- BIN develop/_Cython_func/f_apply.so
- +0 −15 develop/_Cython_func/mp_func.c
- +0 −3 develop/_Cython_func/mp_func.h
- +0 −21 develop/_Cython_func/setup.py
- +0 −21 develop/_Cython_func/test.py
- +0 −107 develop/_Distributed_HH/test_mpi_HH.py
- BIN develop/_ELM_JMA/elm-jma.pdf
- BIN develop/_ELM_JMA/elm-jma.zip
- +0 −97 develop/_ELM_JMA/elm-jma/auxBuildKernelMatrix.m
- +0 −4 develop/_ELM_JMA/elm-jma/auxGaussianFcn.m
- +0 −57 develop/_ELM_JMA/elm-jma/combineJackknife.m
- +0 −44 develop/_ELM_JMA/elm-jma/elmSimulate_loo.m
- +0 −88 develop/_ELM_JMA/elm-jma/elmTrain_loo.m
- BIN develop/_Feature_selection/IJUFKS-D-14-00134.pdf
- BIN develop/_Feature_selection/NEW-OP-ELM.pdf
- BIN develop/_HannanQuinn/Akaike and BIC.pdf
- +0 −1 develop/_Imbalance_learning/Weighted ELM.txt
- +0 −99 develop/_Incremental_Timeseries/MG_opium.py
- +0 −29 develop/_Incremental_Timeseries/OPIUM.py
- BIN develop/_Incremental_Timeseries/opium.pdf
- BIN develop/_ML-ELM_Multilayer/extreme-learning-machines.pdf
- +0 −31 develop/_Math_solution/ldlmnls_Emil_solution.m
- BIN develop/_Math_solution/normal equations 1.png
- BIN develop/_Math_solution/normal equations 2.png
- BIN develop/_Math_solution/stable_Cholesky.pdf
- BIN develop/_Reproducible_research/Reproducible Research | University of Western Sydney (UWS).webarchive
- +0 −177 develop/_Sphinx/doc/Makefile
- +0 −336 develop/_Sphinx/doc/conf.py
- +0 −27 develop/_Sphinx/doc/index.rst
- +0 −242 develop/_Sphinx/doc/make.bat
- +0 −291 develop/compare_optimization_functions.py
- +0 −269 develop/elm_small.py
- +0 −68 develop/generator_what_gets_executed.py
- +0 −43 develop/gpu/compile
- +0 −182 develop/gpu/example.c
- +0 −3 develop/gpu/example.h
- +0 −226 develop/gpu/gpu_solver.cpp
- +0 −9 develop/gpu/gpu_solver.h
- +0 −6,447 develop/gpu/magma_solver.cpp
- +0 −82 develop/gpu/magma_solver.pyx
- +0 −76 develop/gpu/setup_gpu.py
- +0 −61 develop/gpu/test.py
- BIN develop/gpu/test.py.lprof
- +0 −20 develop/gpu/tryconfig.py
- +0 −38 develop/gpu_cpp/Rectangle.cpp
- +0 −13 develop/gpu_cpp/Rectangle.h
- BIN develop/gpu_cpp/build/temp.linux-x86_64-2.7/Rectangle.o
- BIN develop/gpu_cpp/build/temp.linux-x86_64-2.7/rect.o
- +0 −1,859 develop/gpu_cpp/rect.cpp
- +0 −19 develop/gpu_cpp/rect.pyx
- BIN develop/gpu_cpp/rect.so
- +0 −13 develop/gpu_cpp/setup.py
- +0 −604 develop/hp_elm_multiprocessing_async.py
- BIN develop/incr_std.py.lprof
- +0 −65 develop/incremental_variance_computation.py
- +0 −7 develop/modules/__init__
- +0 −317 develop/modules/data_loader.py
- +0 −47 develop/modules/h5tools.py
- BIN develop/one_neuron_at_a_time.py.lprof
- +0 −18 develop/overwrite_inherited.py
- +0 −65 develop/parallel_cdist.py
- BIN develop/parallel_cdist.py.lprof
- +0 −322 develop/parallel_cdist_projection.py
- +0 −29 develop/parallel_h5_write.py
- +0 −73 develop/parallel_mpi_iris.py
- +0 −143 develop/press_classification.py
- +0 −39 develop/reikna_test.py
- +0 −38 develop/replace_normalization.py
- +0 −40 develop/semi_Tikhonov_regularizations.py
- +0 −55 develop/setup_gpu.py
- +0 −76 develop/translate_dictionary.py
- +0 −77 develop_HtH/idea.txt
- +0 −247 develop_HtH/over_sampling.py
- +0 −283 develop_HtH/singlesample.py
- +0 −7 develop_os/OS-ELM/HardlimActFun.m
- +0 −205 develop_os/OS-ELM/OSELM.m
- +0 −206 develop_os/OS-ELM/OSELM_VARY.m
- +0 −12 develop_os/OS-ELM/RBFun.m
- +0 −7 develop_os/OS-ELM/SigActFun.m
- +0 −7 develop_os/OS-ELM/SinActFun.m
- +0 −55 develop_os/testMYELM.py
- +0 −58 develop_os/testOSELM.py
- +0 −1,643 ...lver/When to Use Double Precision under NVIDIA Control Panel Manage 3D Settings - EVGA Forums.htm
- BIN ... NVIDIA Control Panel Manage 3D Settings - EVGA Forums_files/07991327102941AD9C26775F2917C54C.gif
- BIN ... NVIDIA Control Panel Manage 3D Settings - EVGA Forums_files/11B2B3C272C5480F8F739EF7B33C81C9.jpg
- BIN ... NVIDIA Control Panel Manage 3D Settings - EVGA Forums_files/1FBDE1D67CCE4065B93122208DC722CD.jpg
- BIN ... NVIDIA Control Panel Manage 3D Settings - EVGA Forums_files/239335867EEB4F5DB82A8D04839548C9.jpg
- BIN ... Use Double Precision under NVIDIA Control Panel Manage 3D Settings - EVGA Forums_files/3star.gif
- BIN ...Use Double Precision under NVIDIA Control Panel Manage 3D Settings - EVGA Forums_files/638326.png
- BIN ... NVIDIA Control Panel Manage 3D Settings - EVGA Forums_files/656C149DA46F4FF882ED56E767F0FED4.jpg
- BIN ... NVIDIA Control Panel Manage 3D Settings - EVGA Forums_files/74A7CD6B48A642BFBF81BD6AD7178FB2.jpg
- BIN ... Use Double Precision under NVIDIA Control Panel Manage 3D Settings - EVGA Forums_files/8star.gif
- BIN ... NVIDIA Control Panel Manage 3D Settings - EVGA Forums_files/A4FFFE3E75CC4C7B9AD0EE3A5FC6B7DD.jpg
- BIN ... NVIDIA Control Panel Manage 3D Settings - EVGA Forums_files/A6EC4944196040D4A71CFDB2BE61F468.jpg
- BIN ... NVIDIA Control Panel Manage 3D Settings - EVGA Forums_files/A7441CDCFA75433E82FE9352D6E554A3.jpg
- BIN ... NVIDIA Control Panel Manage 3D Settings - EVGA Forums_files/A79E37D55D1A485E96D67F78326097A9.jpg
- BIN ... NVIDIA Control Panel Manage 3D Settings - EVGA Forums_files/B84FF32121CD45128E7280F80314444C.jpg
- BIN ... NVIDIA Control Panel Manage 3D Settings - EVGA Forums_files/D2C158CCBA204C3688BBEE4EF465F732.jpg
- BIN ... NVIDIA Control Panel Manage 3D Settings - EVGA Forums_files/E4D6C3103D93422891793B3B028C59BB.jpg
- BIN ... NVIDIA Control Panel Manage 3D Settings - EVGA Forums_files/EB45543C92B446FC86FF65FCE51F0774.jpg
- BIN ...Double Precision under NVIDIA Control Panel Manage 3D Settings - EVGA Forums_files/EVGA_small.png
- BIN ... NVIDIA Control Panel Manage 3D Settings - EVGA Forums_files/F0A6B78C06F741BE964867289E968FB6.jpg
- +0 −1,832 ... Double Precision under NVIDIA Control Panel Manage 3D Settings - EVGA Forums_files/Javascript.js
- +0 −29 ...ble Precision under NVIDIA Control Panel Manage 3D Settings - EVGA Forums_files/Javascript_002.js
- BIN ...recision under NVIDIA Control Panel Manage 3D Settings - EVGA Forums_files/MemHome_button_off.jpg
- BIN ...Use Double Precision under NVIDIA Control Panel Manage 3D Settings - EVGA Forums_files/Update.gif
- +0 −566 ...Double Precision under NVIDIA Control Panel Manage 3D Settings - EVGA Forums_files/WebResource.js
- +0 −2 ...Double Precision under NVIDIA Control Panel Manage 3D Settings - EVGA Forums_files/absolutebm.htm
- BIN ...e Double Precision under NVIDIA Control Panel Manage 3D Settings - EVGA Forums_files/arrowOne.png
- BIN ...le Precision under NVIDIA Control Panel Manage 3D Settings - EVGA Forums_files/arrowOne_black.png
- BIN ...Double Precision under NVIDIA Control Panel Manage 3D Settings - EVGA Forums_files/arrowThree.png
- BIN ... Precision under NVIDIA Control Panel Manage 3D Settings - EVGA Forums_files/arrowThree_black.png
- BIN ...e Double Precision under NVIDIA Control Panel Manage 3D Settings - EVGA Forums_files/arrowTwo.png
- BIN ...le Precision under NVIDIA Control Panel Manage 3D Settings - EVGA Forums_files/arrowTwo_black.png
- BIN ... Use Double Precision under NVIDIA Control Panel Manage 3D Settings - EVGA Forums_files/blank.gif
- BIN ... Double Precision under NVIDIA Control Panel Manage 3D Settings - EVGA Forums_files/close_off.png
- +0 −16 ...ouble Precision under NVIDIA Control Panel Manage 3D Settings - EVGA Forums_files/combinedCss.css
- +0 −1,464 ...e Precision under NVIDIA Control Panel Manage 3D Settings - EVGA Forums_files/combinedCss_002.css
- +0 −3,893 ... Double Precision under NVIDIA Control Panel Manage 3D Settings - EVGA Forums_files/combinedJs.js
- +0 −1,911 ...ble Precision under NVIDIA Control Panel Manage 3D Settings - EVGA Forums_files/combinedJs_002.js
- BIN ...cision under NVIDIA Control Panel Manage 3D Settings - EVGA Forums_files/community_button_off.jpg
- BIN ...e Double Precision under NVIDIA Control Panel Manage 3D Settings - EVGA Forums_files/download.jpg
- BIN ...uble Precision under NVIDIA Control Panel Manage 3D Settings - EVGA Forums_files/download_002.jpg
- BIN ...uble Precision under NVIDIA Control Panel Manage 3D Settings - EVGA Forums_files/download_003.jpg
- BIN ...uble Precision under NVIDIA Control Panel Manage 3D Settings - EVGA Forums_files/download_004.jpg
- BIN ...uble Precision under NVIDIA Control Panel Manage 3D Settings - EVGA Forums_files/download_005.jpg
- BIN ...uble Precision under NVIDIA Control Panel Manage 3D Settings - EVGA Forums_files/download_006.jpg
- BIN ...uble Precision under NVIDIA Control Panel Manage 3D Settings - EVGA Forums_files/download_007.jpg
- BIN ...uble Precision under NVIDIA Control Panel Manage 3D Settings - EVGA Forums_files/download_008.jpg
- BIN ...uble Precision under NVIDIA Control Panel Manage 3D Settings - EVGA Forums_files/download_009.jpg
- BIN ...uble Precision under NVIDIA Control Panel Manage 3D Settings - EVGA Forums_files/download_010.jpg
- BIN ...uble Precision under NVIDIA Control Panel Manage 3D Settings - EVGA Forums_files/download_011.jpg
- BIN ...uble Precision under NVIDIA Control Panel Manage 3D Settings - EVGA Forums_files/download_012.jpg
- BIN ...to Use Double Precision under NVIDIA Control Panel Manage 3D Settings - EVGA Forums_files/egc.jpg
- BIN ...o Use Double Precision under NVIDIA Control Panel Manage 3D Settings - EVGA Forums_files/feed.png
- +0 −251 ...e Double Precision under NVIDIA Control Panel Manage 3D Settings - EVGA Forums_files/forumMenu.js
- +0 −2 ... Use Double Precision under NVIDIA Control Panel Manage 3D Settings - EVGA Forums_files/jquery.js
- +0 −127 ... Double Precision under NVIDIA Control Panel Manage 3D Settings - EVGA Forums_files/jquery_002.js
- +0 −1,398 ...n to Use Double Precision under NVIDIA Control Panel Manage 3D Settings - EVGA Forums_files/ln.js
- BIN ... Use Double Precision under NVIDIA Control Panel Manage 3D Settings - EVGA Forums_files/login.gif
- BIN ... to Use Double Precision under NVIDIA Control Panel Manage 3D Settings - EVGA Forums_files/m1.gif
- +0 −4 ...e Double Precision under NVIDIA Control Panel Manage 3D Settings - EVGA Forums_files/modernizr.js
- +0 −56 ...e Double Precision under NVIDIA Control Panel Manage 3D Settings - EVGA Forums_files/prettify.css
- BIN ...ecision under NVIDIA Control Panel Manage 3D Settings - EVGA Forums_files/products_button_off.jpg
- BIN ...se Double Precision under NVIDIA Control Panel Manage 3D Settings - EVGA Forums_files/sig-392.png
- BIN ...se Double Precision under NVIDIA Control Panel Manage 3D Settings - EVGA Forums_files/sig-767.png
- BIN ...ouble Precision under NVIDIA Control Panel Manage 3D Settings - EVGA Forums_files/sm_mods_off.gif
- BIN ...recision under NVIDIA Control Panel Manage 3D Settings - EVGA Forums_files/support_button_off.jpg
- +0 −200 develop_solver/datas_43.txt
- BIN develop_solver/plot_1000.pdf
- BIN develop_solver/plot_3000.pdf
- BIN develop_solver/plot_big.pdf
- BIN develop_solver/plot_largeN.pdf
- BIN develop_solver/plot_small.pdf
- BIN develop_solver/plot_tiny.pdf
- +0 −83 develop_solver/profiling_speen_vs_N.txt
- +0 −268 develop_solver/solver.py
- BIN develop_solver/solver.py.lprof
- +0 −100 develop_solver/solver_numbapro.py
- +0 −134 develop_solver/solver_python.py
- +0 −5 develop_solver/solver_python.py.lprof
- +0 −233 develop_solver/solver_skcuda.py
- +0 −3 develop_solver/solver_skcuda.py.lprof
- BIN develop_solver/test.py.lprof
- +0 −144 develop_solver/test_gpu_project.py
- BIN develop_solver/test_gpu_project.py.lprof
- +0 −201 develop_solver/test_numbapro.py
- +0 −149 develop_solver/test_skcuda.py
- BIN develop_solver/test_skcuda.py.lprof
| @@ -1,8 +0,0 @@ | ||
| -# -*- coding: utf-8 -*- | ||
| -""" | ||
| -Created on Sat Feb 21 20:46:31 2015 | ||
| - | ||
| -@author: akusok | ||
| -""" | ||
| - | ||
| -#from gpu_solver import GPUSolver, gpu_solve |
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
acc/cuda_code
Binary file not shown.
| @@ -1,152 +0,0 @@ | ||
| -#include <stdio.h> | ||
| -#include <stdlib.h> | ||
| -#include <cuda_runtime.h> | ||
| -#include "cublas_v2.h" | ||
| - | ||
| -// H_(N,L), T_(N,C), HH(L,L), HT(L,C) | ||
| - | ||
| -class CudaSolver { | ||
| - int L, C; | ||
| - double* devHH; | ||
| - double* devHT; | ||
| - double* one; | ||
| - cublasHandle_t handle; | ||
| - public: | ||
| - CudaSolver( int, int, double*, double* ); | ||
| - void get_corr( double* HH, double* HT ); | ||
| - void add_data( int N, double* H, double* T ); | ||
| - void finalize(); | ||
| -}; | ||
| - | ||
| - | ||
| -// init a zero matrix on GPU to store X'*X, add normalization | ||
| -CudaSolver::CudaSolver ( int nL, int nC, double* HH, double* HT ) { | ||
| - cudaError_t cudaStat; | ||
| - cublasStatus_t stat; | ||
| - double* ones; | ||
| - | ||
| - L = nL; | ||
| - C = nC; | ||
| - | ||
| - | ||
| - cudaStat = cudaMalloc((void**)&devHH, L*L*sizeof(*HH)); | ||
| - if (cudaStat != cudaSuccess) { printf ("devHH device memory allocation failed"); } | ||
| - fprintf(stdout, "Allocating devHH success\n"); | ||
| - | ||
| - cudaStat = cudaMalloc((void**)&devHT, L*C*sizeof(*HT)); | ||
| - if (cudaStat != cudaSuccess) { printf ("devHT device memory allocation failed"); } | ||
| - fprintf(stdout, "Allocating devHT success\n"); | ||
| - | ||
| - stat = cublasCreate(&handle); | ||
| - if (stat != CUBLAS_STATUS_SUCCESS) { printf ("CUBLAS initialization failed\n"); } | ||
| - fprintf(stdout, "CUBLAS initialization success\n"); | ||
| - | ||
| - stat = cublasSetPointerMode(handle, CUBLAS_POINTER_MODE_DEVICE); | ||
| - if (stat != CUBLAS_STATUS_SUCCESS) { printf ("Setting pointer mode failed"); } | ||
| - fprintf(stdout, "Setting pointer mode success\n"); | ||
| - | ||
| - stat = cublasSetMatrix (L, L, sizeof(*HH), HH, L, devHH, L); | ||
| - if (stat != CUBLAS_STATUS_SUCCESS) { printf ("devHH upload failed"); } | ||
| - fprintf(stdout, "devHH upload success\n"); | ||
| - | ||
| - stat = cublasSetMatrix (C, L, sizeof(*HT), HT, C, devHT, C); | ||
| - if (stat != CUBLAS_STATUS_SUCCESS) { printf ("devHT upload failed"); } | ||
| - fprintf(stdout, "devHT upload success\n"); | ||
| - | ||
| - | ||
| - ones = (double*) malloc (1*sizeof(double)); | ||
| - ones[0] = 1.0; | ||
| - cudaMalloc((void**)&one, 1*sizeof(double)); | ||
| - cublasSetVector(1, sizeof(*ones), ones, 1, one, 1); | ||
| - fprintf(stdout, "here\n"); | ||
| - | ||
| -}; | ||
| - | ||
| - | ||
| -// update covariance matrices with new data | ||
| -void CudaSolver::add_data ( int N, double* H, double* T ) { | ||
| - cudaError_t cudaStat; | ||
| - cublasStatus_t stat; | ||
| - double* devH; | ||
| - double* devT; | ||
| - | ||
| - cudaStat = cudaMalloc((void**)&devH, N*L*sizeof(*H)); | ||
| - if (cudaStat != cudaSuccess) { printf ("devH device memory allocation failed"); goto cleanup; } | ||
| - fprintf(stdout, "Allocating devH success\n"); | ||
| - | ||
| - cudaStat = cudaMalloc((void**)&devT, N*C*sizeof(*T)); | ||
| - if (cudaStat != cudaSuccess) { printf ("devT device memory allocation failed"); goto cleanup; } | ||
| - fprintf(stdout, "Allocating devT success\n"); | ||
| - | ||
| - stat = cublasSetMatrix (N, L, sizeof(*H), H, N, devH, N); | ||
| - if (stat != CUBLAS_STATUS_SUCCESS) { printf ("devH upload failed"); } | ||
| - fprintf(stdout, "devH upload success\n"); | ||
| - | ||
| - stat = cublasSetMatrix (N, C, sizeof(*T), T, N, devT, N); | ||
| - if (stat != CUBLAS_STATUS_SUCCESS) { printf ("devT upload failed"); } | ||
| - fprintf(stdout, "devT upload success\n"); | ||
| - | ||
| - | ||
| - stat = cublasDsyrk(handle, CUBLAS_FILL_MODE_UPPER, CUBLAS_OP_N, | ||
| - L, N, one, devH, L, one, devHH, L); | ||
| - if (stat != CUBLAS_STATUS_SUCCESS) { printf ("H'H update failed"); } | ||
| - fprintf(stdout, "H'H update success\n"); | ||
| - | ||
| -// | ||
| -// stat = cublasDgemm(handle, CUBLAS_OP_N, CUBLAS_OP_T, | ||
| -// L, C, N, | ||
| -// one, devH, L,//L | ||
| -// devT, C,//C | ||
| -// one, devHT, L); | ||
| - | ||
| - if (stat != CUBLAS_STATUS_SUCCESS) { printf ("H'T update failed\n"); } | ||
| - fprintf(stdout, "H'T update success\n"); | ||
| - | ||
| - | ||
| - cleanup: | ||
| - cudaFree (devH); | ||
| - fprintf(stdout, "devH freed success\n"); | ||
| - cudaFree (devT); | ||
| - fprintf(stdout, "devT freed success\n"); | ||
| - | ||
| - | ||
| -}; | ||
| - | ||
| - | ||
| -// return current covariance matrices | ||
| -void CudaSolver::get_corr ( double* HH, double* HT ) { | ||
| - cublasStatus_t stat; | ||
| - | ||
| - stat = cublasGetMatrix (L, L, sizeof(*HH), devHH, L, HH, L); | ||
| - if (stat != CUBLAS_STATUS_SUCCESS) { printf ("devHH download failed"); } | ||
| - | ||
| - stat = cublasGetMatrix (L, C, sizeof(*HT), devHT, L, HT, L); | ||
| - if (stat != CUBLAS_STATUS_SUCCESS) { printf ("devHT download failed"); } | ||
| -}; | ||
| - | ||
| - | ||
| -// free memory | ||
| -void CudaSolver::finalize( ) { | ||
| - fprintf(stdout, "Solver finalized\n"); | ||
| - cudaFree (devHH); | ||
| - cudaFree (devHT); | ||
| - cudaFree (one); | ||
| - cublasDestroy(handle); | ||
| -} | ||
| - | ||
| - | ||
| - | ||
| - | ||
| - | ||
| - | ||
| - | ||
| - | ||
| - | ||
| - | ||
| - | ||
| - | ||
| - | ||
| - | ||
| - | ||
| - | ||
| - |
| @@ -1,138 +0,0 @@ | ||
| -#include <stdio.h> | ||
| -#include <stdlib.h> | ||
| -#include <cuda_runtime.h> | ||
| -#include "cublas_v2.h" | ||
| - | ||
| -// H_(N,L), T_(N,C), HH(L,L), HT(L,C) | ||
| - | ||
| -class CudaSolver { | ||
| - int L, C; | ||
| - double* devHH; | ||
| - double* devHT; | ||
| - double* one; | ||
| - double* ones; | ||
| - cublasHandle_t handle; | ||
| - public: | ||
| - CudaSolver( int, int, double*, double* ); | ||
| - void get_corr( double* HH, double* HT ); | ||
| - void add_data( int N, double* H, double* T ); | ||
| - void finalize(); | ||
| -}; | ||
| - | ||
| - | ||
| -// init a zero matrix on GPU to store X'*X, add normalization | ||
| -CudaSolver::CudaSolver ( int nL, int nC, double* HH, double* HT ) { | ||
| - cudaError_t cudaStat; | ||
| - cublasStatus_t stat; | ||
| - | ||
| - L = nL; | ||
| - C = nC; | ||
| - | ||
| - | ||
| - cudaStat = cudaMalloc((void**)&devHH, L*L*sizeof(*HH)); | ||
| - if (cudaStat != cudaSuccess) { printf ("devHH device memory allocation failed"); } | ||
| - fprintf(stdout, "Allocating devHH success\n"); | ||
| - | ||
| - cudaStat = cudaMalloc((void**)&devHT, L*C*sizeof(*HT)); | ||
| - if (cudaStat != cudaSuccess) { printf ("devHT device memory allocation failed"); } | ||
| - fprintf(stdout, "Allocating devHT success\n"); | ||
| - | ||
| - stat = cublasCreate(&handle); | ||
| - if (stat != CUBLAS_STATUS_SUCCESS) { printf ("CUBLAS initialization failed\n"); } | ||
| - fprintf(stdout, "CUBLAS initialization success\n"); | ||
| - | ||
| - stat = cublasSetMatrix (L, L, sizeof(*HH), HH, L, devHH, L); | ||
| - if (stat != CUBLAS_STATUS_SUCCESS) { printf ("devHH upload failed"); } | ||
| - fprintf(stdout, "devHH upload success\n"); | ||
| - | ||
| - stat = cublasSetMatrix (C, L, sizeof(*HT), HT, C, devHT, C); | ||
| - if (stat != CUBLAS_STATUS_SUCCESS) { printf ("devHT upload failed"); } | ||
| - fprintf(stdout, "devHT upload success\n"); | ||
| -}; | ||
| - | ||
| - | ||
| -// update covariance matrices with new data | ||
| -void CudaSolver::add_data ( int N, double* H, double* T ) { | ||
| - cudaError_t cudaStat; | ||
| - cublasStatus_t stat; | ||
| - double* devH; | ||
| - double* devT; | ||
| - double one = 1.0; | ||
| - | ||
| - cudaStat = cudaMalloc((void**)&devH, N*L*sizeof(*H)); | ||
| - if (cudaStat != cudaSuccess) { printf ("devH device memory allocation failed"); goto cleanup1; } | ||
| -// fprintf(stdout, "Allocating devH success\n"); | ||
| - | ||
| - cudaStat = cudaMalloc((void**)&devT, N*C*sizeof(*T)); | ||
| - if (cudaStat != cudaSuccess) { printf ("devT device memory allocation failed"); goto cleanup2; } | ||
| -// fprintf(stdout, "Allocating devT success\n"); | ||
| - | ||
| - stat = cublasSetMatrix (N, L, sizeof(*H), H, N, devH, N); | ||
| - if (stat != CUBLAS_STATUS_SUCCESS) { printf ("devH upload failed"); } | ||
| -// fprintf(stdout, "devH upload success\n"); | ||
| - | ||
| - stat = cublasSetMatrix (N, C, sizeof(*T), T, N, devT, N); | ||
| - if (stat != CUBLAS_STATUS_SUCCESS) { printf ("devT upload failed"); } | ||
| -// fprintf(stdout, "devT upload success\n"); | ||
| - | ||
| - | ||
| - stat = cublasDsyrk(handle, CUBLAS_FILL_MODE_UPPER, CUBLAS_OP_T, | ||
| - L, N, &one, devH, N, &one, devHH, L); | ||
| - if (stat != CUBLAS_STATUS_SUCCESS) { printf ("H'H update failed\n"); } | ||
| -// fprintf(stdout, "H'H update success\n"); | ||
| - | ||
| - stat = cublasDgemm(handle, CUBLAS_OP_T, CUBLAS_OP_N, | ||
| - L, C, N, | ||
| - &one, devH, N, | ||
| - devT, N, | ||
| - &one, devHT, L); | ||
| - if (stat != CUBLAS_STATUS_SUCCESS) { printf ("H'T update failed\n"); } | ||
| -// fprintf(stdout, "H'T update success\n"); | ||
| - | ||
| - | ||
| - cleanup2: | ||
| - cudaFree (devT); | ||
| -// fprintf(stdout, "devT freed success\n"); | ||
| - cleanup1: | ||
| - cudaFree (devH); | ||
| -// fprintf(stdout, "devH freed success\n"); | ||
| - | ||
| -}; | ||
| - | ||
| - | ||
| -// return current covariance matrices | ||
| -void CudaSolver::get_corr ( double* HH, double* HT ) { | ||
| - cublasStatus_t stat; | ||
| - | ||
| - stat = cublasGetMatrix (L, L, sizeof(*HH), devHH, L, HH, L); | ||
| - if (stat != CUBLAS_STATUS_SUCCESS) { printf ("devHH download failed"); } | ||
| - | ||
| - stat = cublasGetMatrix (L, C, sizeof(*HT), devHT, L, HT, L); | ||
| - if (stat != CUBLAS_STATUS_SUCCESS) { printf ("devHT download failed"); } | ||
| -}; | ||
| - | ||
| - | ||
| -// free memory | ||
| -void CudaSolver::finalize( ) { | ||
| - fprintf(stdout, "Solver finalized\n"); | ||
| - cudaFree (devHH); | ||
| - cudaFree (devHT); | ||
| - cublasDestroy(handle); | ||
| -} | ||
| - | ||
| - | ||
| - | ||
| - | ||
| - | ||
| - | ||
| - | ||
| - | ||
| - | ||
| - | ||
| - | ||
| - | ||
| - | ||
| - | ||
| - | ||
| - | ||
| - |
| @@ -1,12 +0,0 @@ | ||
| -#include <stdio.h> | ||
| -#include <stdlib.h> | ||
| -#include <cuda_runtime.h> | ||
| -#include "cublas_v2.h" | ||
| - | ||
| -class CudaSolver { | ||
| - public: | ||
| - CudaSolver( int, int, double* HH, double* HT ); | ||
| - void get_corr( double* HH, double* HT ); | ||
| - void add_data( int N, double* H, double* T ); | ||
| - void finalize(); | ||
| -}; |
| @@ -1 +0,0 @@ | ||
| -#error Do not use this file, it is the result of a failed Cython compilation. |
Oops, something went wrong.
0 comments on commit
f30a3ba