In [1]:
import torch

print("Torch version:", torch.__version__)
print("CUDA runtime (torch):", torch.version.cuda)
print("CUDA available:", torch.cuda.is_available())
print("GPU count:", torch.cuda.device_count())

if torch.cuda.is_available():
    print("GPU name:", torch.cuda.get_device_name(0))
    x = torch.randn(1000, 1000, device="cuda")
    y = torch.matmul(x, x)
    print("Computation OK, tensor device:", y.device)

Torch version: 2.0.1
CUDA runtime (torch): 11.8
CUDA available: True
GPU count: 1
GPU name: Quadro M2000
Computation OK, tensor device: cuda:0


In [7]:
import torch
import time

# 使用较大的矩阵，避免太小导致测量不稳定
N = 4000

# CPU
x_cpu = torch.randn(N, N)
start = time.time()
y_cpu = torch.matmul(x_cpu, x_cpu)
cpu_time = time.time() - start
print("CPU time:", cpu_time)

# GPU
x_gpu = torch.randn(N, N, device="cuda")

# 预热（GPU第一次会有启动开销）
_ = torch.matmul(x_gpu, x_gpu)
torch.cuda.synchronize()

start = time.time()
y_gpu = torch.matmul(x_gpu, x_gpu)
torch.cuda.synchronize()
gpu_time = time.time() - start
print("GPU time:", gpu_time)

print("Speedup (CPU/GPU):", cpu_time / gpu_time)

CPU time: 0.4007446765899658
GPU time: 0.14691758155822754
Speedup (CPU/GPU): 2.727683591981396


In [8]:
import torch
import time
import torch.nn.functional as F

# 模拟图像堆栈
x_cpu = torch.randn(16, 1, 512, 512)
kernel = torch.randn(1, 1, 5, 5)

# CPU
start = time.time()
for _ in range(50):
    F.conv2d(x_cpu, kernel, padding=2)
cpu_time = time.time() - start
print("CPU conv time:", cpu_time)

# GPU
x_gpu = x_cpu.cuda()
kernel_gpu = kernel.cuda()

# 预热
F.conv2d(x_gpu, kernel_gpu, padding=2)
torch.cuda.synchronize()

start = time.time()
for _ in range(50):
    F.conv2d(x_gpu, kernel_gpu, padding=2)
torch.cuda.synchronize()
gpu_time = time.time() - start
print("GPU conv time:", gpu_time)

print("Speedup:", cpu_time / gpu_time)

CPU conv time: 1.9020602703094482
GPU conv time: 0.3018076419830322
Speedup: 6.302227000654884


In [3]:
import torch
import time

print("CUDA available:", torch.cuda.is_available())
print("GPU:", torch.cuda.get_device_name(0))

N = 12000

# -------- CPU 测试 --------
x_cpu = torch.randn(N, N)
start = time.time()
y_cpu = torch.matmul(x_cpu, x_cpu)
cpu_time = time.time() - start
print("CPU time:", cpu_time)

# -------- GPU 测试 --------
x_gpu = torch.randn(N, N, device="cuda")

# 预热
_ = torch.matmul(x_gpu, x_gpu)
torch.cuda.synchronize()

start = time.time()
y_gpu = torch.matmul(x_gpu, x_gpu)
torch.cuda.synchronize()
gpu_time = time.time() - start
print("GPU time:", gpu_time)

print("Speedup (CPU/GPU):", cpu_time / gpu_time)

CUDA available: True
GPU: Quadro M2000
CPU time: 9.478413105010986
GPU time: 2.6541922092437744
Speedup (CPU/GPU): 3.5711102880946033


In [5]:
import suite2p
from suite2p.registration import register

In [6]:
dir(register)

['Any',
 'Dict',
 'TqdmToLogger',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__spec__',
 'assign_reg_io',
 'bidi',
 'check_offsets',
 'compute_crop',
 'compute_filters_and_norm',
 'compute_reference',
 'compute_shifts',
 'default_settings',
 'device',
 'logger',
 'logging',
 'medfilt',
 'nonrigid',
 'normalize_reference_image',
 'np',
 'os',
 'path',
 'pick_initial_reference',
 'register_frames',
 'registration_outputs_to_dict',
 'registration_wrapper',
 'rigid',
 'save_tiff',
 'shift_frames',
 'shift_frames_and_write',
 'time',
 'torch',
 'trange',
 'utils',
 'warn']