<a href="https://colab.research.google.com/github/ellariona/labHPC/blob/lab1/Lab0_Serebryakova_6133.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install pycuda

Collecting pycuda
  Downloading pycuda-2021.1.tar.gz (1.7 MB)
[K     |████████████████████████████████| 1.7 MB 4.2 MB/s 
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
    Preparing wheel metadata ... [?25l[?25hdone
Collecting mako
  Downloading Mako-1.1.5-py2.py3-none-any.whl (75 kB)
[K     |████████████████████████████████| 75 kB 4.3 MB/s 
[?25hCollecting pytools>=2011.2
  Downloading pytools-2021.2.8.tar.gz (63 kB)
[K     |████████████████████████████████| 63 kB 2.0 MB/s 
Building wheels for collected packages: pycuda, pytools
  Building wheel for pycuda (PEP 517) ... [?25l[?25hdone
  Created wheel for pycuda: filename=pycuda-2021.1-cp37-cp37m-linux_x86_64.whl size=627558 sha256=c87a5bb39ebef5de5dec984fb5cd4dfb41d97cf296ac0d972b5c9fe78355d153
  Stored in directory: /root/.cache/pip/wheels/c4/ef/49/dc6a5feb8d980b37c83d465ecab24949a6aa19458522a9e001
  Building wheel for pytools (setup.py) ... [?25l[?25hdo

In [3]:
import pycuda.autoinit

from pycuda.tools import make_default_context
make_default_context().get_device().name()

'Tesla K80'

In [2]:
!lscpu |grep 'Model name'

Model name:          Intel(R) Xeon(R) CPU @ 2.30GHz


In [4]:
import numpy as np
from numpy import linalg as la
from pycuda import driver, compiler, gpuarray, tools
import pycuda.autoinit
import time
import seaborn as sns
import pandas as pd


matrix_size = 100

#Вычисляем GPU
def matmul_GPU(a_gpu,b_gpu,matrix_size=matrix_size):
    kernel_code_template = """
    __global__ void MatrixMulKernel(float *A, float *B, float *C)
    {
const int row =  threadIdx.y + blockIdx.y * blockDim.y;
                const int column = threadIdx.x + blockIdx.x * blockDim.x;
                const int N = 128;
                for(int i = 0; i < N; i++){
                        C[column + row * N] += A[i + row * N] * B[column + i * N];              
                }       
        }
"""

    t_size = 2
    b_size = t_size

    kernel_code = kernel_code_template % {
        'matrix_size': matrix_size,
        'b_size': b_size,
        }

    mod = compiler.SourceModule(kernel_code)
    
    c_gpu = gpuarray.empty((matrix_size, matrix_size), np.float32)

    matrixmul = mod.get_function("MatrixMulKernel")

    matrixmul(
        a_gpu, b_gpu,
        c_gpu,
        grid = (matrix_size // t_size, matrix_size // t_size),
        block = (t_size, t_size, 1),
        )

    return c_gpu

#Вычисляем, CPU
def matmul_CPU(A, B): #A - первая матрица; #B - вторая матрица.
    C = np.zeros(shape=(A.shape[0], B.shape[1]))
    for i in range(len(A)):
        for j in range(len(B[0])):
            for k in range(len(B)):
                C[i][j] += A[i][k] * B[k][j]
    return C

In [5]:
cpu_time = []
gpu_time = []
diffs = []

for size in [128, 256, 512]:
      a_cpu = np.random.randn(size, size).astype(np.float32)
      b_cpu = np.random.randn(size, size).astype(np.float32)
      #Измерение времени CPU
      startCPU = time.time()
      c_cpu = matmul_CPU(a_cpu, b_cpu)
      timeCPU = time.time() -startCPU
      cpu_time.append(timeCPU)
      #Измерение времени GPU
      a_gpu = gpuarray.to_gpu(a_cpu)
      b_gpu = gpuarray.to_gpu(b_cpu)
      startGPU = time.time()
      c_gpu = matmul_GPU(a_gpu, b_gpu, size)
      timeGPU = time.time()-startGPU
      gpu_time.append(timeGPU)
      #Работаем с разницей CPU-GPU
      differensetime = timeCPU-timeGPU
      diffs.append(differensetime)

      print("Размерность матрицы:", size)
      print("CPU:", timeCPU)
      print("GPU:", timeGPU)
      print("CPU-GPU:", differensetime)
      print("\n")




Размерность матрицы: 128
CPU: 3.0748579502105713
GPU: 0.9866104125976562
CPU-GPU: 2.088247537612915


Размерность матрицы: 256
CPU: 24.28925085067749
GPU: 0.0068547725677490234
CPU-GPU: 24.28239607810974


Размерность матрицы: 512
CPU: 192.6665575504303
GPU: 0.02336859703063965
CPU-GPU: 192.64318895339966


