-
-
Notifications
You must be signed in to change notification settings - Fork 85
/
cuda.scroll
264 lines (231 loc) · 15.9 KB
/
cuda.scroll
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
import ../code/conceptPage.scroll
id cuda
name CUDA
appeared 2007
tags pl
website https://developer.nvidia.com/cuda-zone
blog https://developer.nvidia.com/blog/tag/cuda/
releaseNotes https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html
download https://developer.nvidia.com/cuda-downloads
latestVersion 12.5
standsFor Compute Unified Device Architecture
isOpenSource true
clocExtensions cu cuh
fileType text
documentation https://docs.nvidia.com/cuda/
faq https://developer.nvidia.com/cuda-faq
centralPackageRepositoryCount 0
originCommunity Nvidia
helloWorldCollection Compute Unified Device Architecture
// Hello world in CUDA
#include <stdio.h>
const int N = 16;
const int blocksize = 16;
__global__
void hello(char *a, int *b)
{
a[threadIdx.x] += b[threadIdx.x];
}
int main()
{
char a[N] = "Hello \0\0\0\0\0\0";
int b[N] = {15, 10, 6, 0, -11, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
char *ad;
int *bd;
const int csize = N*sizeof(char);
const int isize = N*sizeof(int);
printf("%s", a);
cudaMalloc( (void**)&ad, csize );
cudaMalloc( (void**)&bd, isize );
cudaMemcpy( ad, a, csize, cudaMemcpyHostToDevice );
cudaMemcpy( bd, b, isize, cudaMemcpyHostToDevice );
dim3 dimBlock( blocksize, 1 );
dim3 dimGrid( 1, 1 );
hello<<<dimGrid, dimBlock>>>(ad, bd);
cudaMemcpy( a, ad, csize, cudaMemcpyDeviceToHost );
cudaFree( ad );
cudaFree( bd );
printf("%s\n", a);
return EXIT_SUCCESS;
}
pygmentsHighlighter CUDA
filename c_like.py
fileExtensions cu cuh
leachim6 Cuda
filepath c/Cuda.cu
fileExtensions cu
example
#include <stdio.h>
__global__ void hello_world(){
printf("Hello World\n");
}
int main() {
hello_world<<<1,1>>>();
return 0;
}
meetup https://www.meetup.com/topics/cuda
memberCount 9400
groupCount 32
multiLineCommentTokens /* */
printToken printf
hasPrintDebugging true
hasMultiLineComments true
/* A comment
*/
hasComments true
/* A comment
*/
hasSemanticIndentation false
hasCaseInsensitiveIdentifiers false
wikipedia https://en.wikipedia.org/wiki/CUDA
example
import numpy
from pycublas import CUBLASMatrix
A = CUBLASMatrix( numpy.mat([[1,2,3]],[[4,5,6]],numpy.float32) )
B = CUBLASMatrix( numpy.mat([[2,3]],[4,5],[[6,7]],numpy.float32) )
C = A*B
print C.np_mat()
related linux c fortran opengl opencl llvmir python perl java ruby lua haskell r matlab idl mathematica common-lisp f-sharp
summary CUDA is a parallel computing platform and application programming interface (API) model created by Nvidia. It allows software developers and software engineers to use a CUDA-enabled graphics processing unit (GPU) for general purpose processing – an approach termed GPGPU (General-Purpose computing on Graphics Processing Units). The CUDA platform is a software layer that gives direct access to the GPU's virtual instruction set and parallel computational elements, for the execution of compute kernels. The CUDA platform is designed to work with programming languages such as C, C++, and Fortran. This accessibility makes it easier for specialists in parallel programming to use GPU resources, in contrast to prior APIs like Direct3D and OpenGL, which required advanced skills in graphics programming. Also, CUDA supports programming frameworks such as OpenACC and OpenCL. When it was first introduced by Nvidia, the name CUDA was an acronym for Compute Unified Device Architecture, but Nvidia subsequently dropped the use of the acronym.
pageId 7933386
dailyPageViews 1966
created 2006
backlinksCount 444
revisionCount 1315
appeared 2007
githubBigQuery Cuda
repos 4379
users 3764
linguistGrammarRepo https://github.com/harrism/sublimetext-cuda-cpp
firstCommit 2012
lastCommit 2017
committerCount 3
commitCount 25
sampleCount 2
example
#include <stdio.h>
#include <cuda_runtime.h>
/**
* CUDA Kernel Device code
*
* Computes the vector addition of A and B into C. The 3 vectors have the same
* number of elements numElements.
*/
__global__ void
vectorAdd(const float *A, const float *B, float *C, int numElements)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < numElements)
{
C[i] = A[i] + B[i];
}
}
/**
* Host main routine
*/
int
main(void)
{
// Error code to check return values for CUDA calls
cudaError_t err = cudaSuccess;
// Launch the Vector Add CUDA Kernel
int threadsPerBlock = 256;
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
vectorAdd<<<blocksPerGrid, threadsPerBlock>>>(d_A, d_B, d_C, numElements);
err = cudaGetLastError();
if (err != cudaSuccess)
{
fprintf(stderr, "Failed to launch vectorAdd kernel (error code %s)!\n", cudaGetErrorString(err));
exit(EXIT_FAILURE);
}
// Reset the device and exit
err = cudaDeviceReset();
return 0;
}
isbndb 33
year|publisher|title|authors|isbn13
2010|Addison-Wesley Professional|CUDA by Example: An Introduction to General-Purpose GPU Programming|Sanders / Kandrot, Jason|9780131387683
2013|Pearson|Cuda Handbook|Nicholas Wilt|9780133261493
2014|Machinery Industry Press|CUDA Programming: A Developers Guide to Parallel Computing with GPUs(Chinese Edition)|[ MEI ] KU KE ( Shane Cook )|9787111448617
2010|Addison-Wesley Professional|CUDA by Example: An Introduction to General-Purpose GPU Programming|Sanders, Jason and Kandrot, Edward|9780132180139
2019-09-27T00:00:01Z|Packt Publishing|Learn CUDA Programming: A beginner's guide to GPU programming and parallel computing with CUDA 10.x and C/C++|Han, Jaegeun and Sharma, Bharatkumar|9781788996242
2018|Packt Publishing|Hands-On GPU Programming with Python and CUDA: Explore high-performance parallel computing with CUDA|Tuomanen, Dr. Brian|9781788993913
2014|Wrox|Professional CUDA C Programming|Cheng, John and Grossman, Max and McKercher, Ty|9781118739327
2014|Wrox|Professional CUDA C Programming|Cheng, John and Grossman, Max and McKercher, Ty|9781118739310
2013|Addison-Wesley Professional|The CUDA Handbook: A Comprehensive Guide to GPU Programming|Wilt, Nicholas Wilt|9780321809469
2013|Morgan Kaufmann|CUDA Fortran for Scientists and Engineers: Best Practices for Efficient CUDA Fortran Programming|Ruetsch, Gregory and Fatica, Massimiliano|9780124169708
2012|Morgan Kaufmann|CUDA Programming: A Developer's Guide to Parallel Computing with GPUs (Applications of Gpu Computing)|Cook, Shane|9780124159884
2011|Morgan Kaufmann|CUDA Application Design and Development|Farber, Rob|9780123884329
2013|Addison-Wesley Professional|CUDA Handbook, The: A Comprehensive Guide to GPU Programming|Wilt, Nicholas|9780133261509
2012|Morgan Kaufmann|CUDA Programming: A Developer's Guide to Parallel Computing with GPUs (Applications of Gpu Computing)|Cook, Shane|9780124159334
2011|Morgan Kaufmann|CUDA Application Design and Development|Farber, Rob|9780123884268
2018|Apress|Deep Belief Nets in C++ and CUDA C: Volume 2: Autoencoding in the Complex Domain|Masters, Timothy|9781484236468
2012|LAP LAMBERT Academic Publishing|Image Processing Using CUDA: Designing an object oriented framework for CUDA based image processing|Shete, Pritam and Bose, Surojit Kumar|9783659135569
2016|Createspace Independent Publishing Platform|Cuda Winner|Charles Brown|9781540660251
2016|Createspace Independent Publishing Platform|Cuda For Newbies|Dylan Skinner|9781540604323
2015|Addison-Wesley Longman, Incorporated|Cuda For Engineers|Duane Storti|9780134177519
2010|Pearson|CUDA by Example|Jason Sanders and Edward Kandrot|9780132180146
2019-09-27|Packt Publishing|Learn CUDA Programming|Jaegeun Han and Bharatkumar Sharma|9781788991292
20151102|Pearson Technology Group|CUDA for Engineers|Duane Storti; Mete Yurtoglu|9780134177557
2014-09-02|Wiley Professional Development (P&T)|Professional CUDA C Programming|John Cheng, Max Grossman, Ty McKercher|9781118739273
20220602|Cambridge University Press|Programming in Parallel with CUDA Programming in Parallel with CUDA|Richard Ansorge|9781108858885
09/2013|Elsevier S & T|CUDA Fortran for Scientists and Engineers: Best Practices for Efficient CUDA Fortran Programming|Ruetsch, Gregory; Fatica, Massimiliano|9780124169722
20180119|Taylor & Francis|GPU Parallel Program Development Using CUDA|Tolga Soyata|9781498750806
27-11-2018|Packt Publishing|Hands-On GPU Programming with Python and CUDA|Dr. Brian Tuomanen|9781788995221
|Wrox|Nvidia Gpu Programming: Massively Parallel Programming With Cuda|Cook and Shane|9780470939055
2013|Addison-wesley|The Cuda Handbook: A Comprehensive Guide To Gpu Programming|Wilt, Nicholas , 1970-|9780133261516
20180704|Springer Nature|Deep Belief Nets in C and CUDA C: Volume 3|Timothy Masters|9781484237212
20180423|Springer Nature|Deep Belief Nets in C and CUDA C: Volume 1|Timothy Masters|9781484235911
2019-01-23|LAP LAMBERT Academic Publishing|Novel Open Source Morphology Using GPU Processing With LTU- CUDA|Jagannathan Gnanasekaran|9786139444151
githubLanguage Cuda
fileExtensions cu cuh
trendingProjects
author name avatar url language languageColor stars forks currentPeriodStars description
rapidsai cudf https://github.com/rapidsai.png https://github.com/rapidsai/cudf Cuda #3A4E3A 1931 274 187 "cuDF - GPU DataFrame Library"
DeepGraphLearning graphvite https://github.com/DeepGraphLearning.png https://github.com/DeepGraphLearning/graphvite Cuda #3A4E3A 433 47 285 "A general and high-performance graph embedding system for various applications"
trendingProjectsCount 3
type programming
aceMode c_cpp
codemirrorMode clike
codemirrorMimeType text/x-c++src
tmScope source.cuda-c++
repos 18135
indeedJobs cuda engineer
2017 483
linkedInSkill cuda
2018 28572
semanticScholar 29
year|title|doi|citations|influentialCitations|authors|paperId
2010|CUDA by Example: An Introduction to General-Purpose GPU Programming|10.12694/SCPE.V11I4.663|1084|118|Jie Cheng|64ce52ec9f550ddd980e209ca68ff38947cf9061
2012|accULL: An OpenACC Implementation with CUDA and OpenCL Support|10.1007/978-3-642-32820-6_86|84|2|Ruymán Reyes and I. López-Rodríguez and J. Fumero and F. Sande|871d9641582562f9a83ed785ce3051f3e9e95483
2011|GPU programming in a high level language: compiling X10 to CUDA|10.1145/2212736.2212744|59|8|D. Cunningham and R. Bordawekar and V. Saraswat|c0f1c45ef7c9fb9751fdcc268daac62b70a7bd78
2009|GPU-accelerated SART reconstruction using the CUDA programming environment|10.1117/12.811559|49|5|B. Keck and H. Hofmann and H. Scherl and M. Kowarschik and J. Hornegger|e7b73201f2763e2e7b6d828b6dfa95fdbe31ba17
2016|CAMPARY: Cuda Multiple Precision Arithmetic Library and Applications|10.1007/978-3-319-42432-3_29|37|4|M. Joldes and J. Muller and V. Popescu and W. Tucker|1f324e5b66a3710250b44db80506fd8fde4c712f
2012|Overview and comparison of OpenCL and CUDA technology for GPGPU|10.1109/APCCAS.2012.6419068|34|1|Ching-Lung Su and Po-Yu Chen and Chun-Chieh Lan and Lung-Sheng Huang and Kuo-Hsuan Wu|f88d8ee763f8fc5e80a59be045926f6df13ac9fc
2017|BARRACUDA: binary-level analysis of runtime RAces in CUDA programs|10.1145/3062341.3062342|25|7|Ariel Eizenberg and Yuanfeng Peng and Toma Pigli and William Mansky and Joseph Devietti|069794b44b81c8b0651c8ea39594a91cd6081142
2013|Efficient compilation of CUDA kernels for high-performance computing on FPGAs|10.1145/2514641.2514652|22|0|Alexandros Papakonstantinou and Karthik Gururaj and J. Stratton and Deming Chen and J. Cong and W. Hwu|f24f326226d8143a1ff0afed7042edcd85534a3b
2011|Evolving CUDA PTX programs by quantum inspired linear genetic programming|10.1145/2001858.2002026|14|0|L. F. Cupertino and C. D. Silva and D. M. Dias and M. Pacheco and C. Bentes|78c1cb63859f9ea84c772c8ec4fc72c7791a2a7c
2013|CUDA Expression Templates for Electromagnetic Applications on GPUs [EM Programmer's Notebook]|10.1109/MAP.2013.6735497|12|0|A. Breglia and A. Capozzoli and C. Curcio and A. Liseno|becd4d7bad6d6b316e755b4038fa3cccd00662f0
2014|C2CU : A CUDA C Program Generator for Bulk Execution of a Sequential Algorithm|10.1007/978-3-319-11194-0_14|11|0|Daisuke Takafuji and K. Nakano and Yasuaki Ito|827cf47256651fc955ce880efc65e8292d445401
2014|Parallelized Seeded Region Growing Using CUDA|10.1155/2014/856453|9|1|Seongjin Park and Jeongjin Lee and Hyunna Lee and Juneseuk Shin and Jinwook Seo and K. Lee and Y. Shin and B. H. Kim|9c2bc31d176bbea810a7c1b654054271efd75135
2020|Porting a Legacy CUDA Stencil Code to oneAPI|10.1109/IPDPSW50202.2020.00070|9|0|Steffen Christgau and T. Steinke|8a91d5e27422f66ecbf4d24965484a7a778e74f9
2020|Computer vision algorithms acceleration using graphic processors NVIDIA CUDA|10.1007/s10586-020-03090-6|9|0|Mouna Afif and Yahia Said and M. Atri|e3adb20131eedbbdb31befa59e40a1d32a3c4847
2015|SciPAL: Expression Templates and Composition Closure Objects for High Performance Computational Physics with CUDA and OpenMP|10.1145/2686886|8|0|S. Kramer and J. Hagemann|ad752065baa739eac4144fc98ce595cd6a68dfa2
2019|Real-time moving human detection using HOG and Fourier descriptor based on CUDA implementation|10.1007/s11554-019-00935-1|7|0|Haythem Bahri and Marwa Chouchene and F. Sayadi and Mohamed Atri|5eea36b60acc51b215442d4e04875d97066b59b6
2011|Using a commercial graphical processing unit and the CUDA programming language to accelerate scientific image processing applications|10.1117/12.872217|7|0|R. Broussard and R. Ives|af5e6a48632822ddff4d961f97a79bfedb58d4aa
2018|Efficient 2D Convolution Filters Implementations on Graphics Processing Unit Using NVIDIA CUDA|10.5815/IJIGSP.2018.08.01|6|0|Mouna Afif and Yahia Said and Mohamed Atri|29958dcd1f577c4961d495c203871028c8b23538
2016|Breast Cancer Prediction by Logistic Regression with CUDA Parallel Programming Support|10.4172/2572-4118.1000111|5|0|Aless and R. Peretti and F. Amenta|d28520dd4a74a1768a205fc0cedaae33a2a81758
2018|Efficient implementation of integrall image algorithm on NVIDIA CUDA|10.1109/ASET.2018.8379824|3|0|Mouna Afif and Yahia Said and Mohamed Atri|92787e77b59c0a25b8b39d18f33981a12cd50748
2014|Document clustering using Multi-Objective Genetic Algorithms with parallel programming based on CUDA|10.5220/0005057502800287|3|0|Jung Song Lee and Soon-cheol Park and Jong-Joo Lee and Han-hee Ham|4d06b455d105dd62d35197dc7b5df463f7a25ca4
2015|Programming in CUDA for Kepler and Maxwell Architecture|10.22456/2175-2745.56384|3|1|E. Clua and M. Zamith|a3f9bb343703d6cb8a5f772bc99fa0c9013b1ecd
2018|Research on Matrix Multiplication Based on the Combination of OpenACC and CUDA|10.1007/978-981-13-7025-0_10|2|0|Yuexing Wang|ad94df1c7457f50fde21feda7b646a3d681c10b0
2016|A Performance Study of Random Neural Network as Supervised Learning Tool Using CUDA|10.6138/JIT.2016.17.4.20141014D|2|0|S. Basterrech and J. Janousek and V. Snášel|c175a68fbf783a77d34357ae0977ecf1824aaf5c
2017|GPU accelerated foreground segmentation using CodeBook model and shadow removal using CUDA|10.1109/CCAA.2017.8229924|2|0|Praveen Gudivaka and N. Mishra and A. Agrawal|088708e87e9e34445bcccd414ab4b729acd9219c
2021|Impact of CUDA and OpenCL on Parallel and Distributed Computing|10.1109/ICEEE52452.2021.9415927|2|0|A. Asaduzzaman and Alec Trent and S. Osborne and C. Aldershof and F. Sibai|b8dd58407502f25fdc07b2ae83659e247c4b1f9b
2019|Cuda Parallelization of Commit Framework for Efficient Microstructure-Informed Tractography|10.1109/ISBI.2019.8759098|1|0|Erick Hernandez-Gutierrez and Alonso Ramirez-Manzanares and J. Marroquín and Mario Ocampo-Pineda and Alessandro Daducci|65c7d25ad189d1d58232eb304035f22f0ee3c59e
2019|Detecting Undefined Behaviors in CUDA C|10.1109/ACCESS.2019.2954143|1|0|Wentao Li and Jianhua Sun and Hao Chen|00add03c6fa715baf0ad2798848ffc1817bf6a7e
2014|A Compiler Translate Directive-Based Language to Optimized CUDA|10.1109/HPCC.2014.162|1|0|Feng Li and Hong An and Weihao Liang and Xiaoqiang Li and Yichao Cheng and Xia Jiang|2b948c35fdb64183cc0a88fc9a84960187a15d6d
goodreads
title|year|author|goodreadsId|rating|ratings|reviews
Cuda by Example: An Introduction to General-Purpose Gpu Programming|2010|Jason Sanders|12911195|4.03|131|13
Professional Cuda C Programming|2014|John Cheng|39965022|4.14|7|0