In [None]:
import torch
import time
import numpy as np
import timm
import psutil
import os
import gc

# --- 1. check systeme ---
print("üìä --- diagnostic systeme ---")
ram_sys = psutil.virtual_memory()
print(f"ram totale: {ram_sys.total / 1e9:.2f} go")
print(f"ram dispo: {ram_sys.available / 1e9:.2f} go")

if torch.cuda.is_available():
    print(f"gpu: {torch.cuda.get_device_name(0)}")
    print(f"vram allou√©e: {torch.cuda.memory_allocated() / 1e9:.2f} go")
else:
    print("‚ùå alerte: pas de gpu detect√© !")
    exit()

device = torch.device('cuda')

# --- 2. test vitesse ram (cpu) ---
print("\nüß† --- test 1: lecture/ecriture ram (numpy) ---")
try:
    # creation bloc 1go
    size_mb = 1000
    print(f"creation array {size_mb}mo...", end="")
    t0 = time.time()
    dummy_data = np.random.randint(0, 255, (2000, 336, 336, 3), dtype=np.uint8)
    dt = time.time() - t0
    print(f" ok ({dt:.3f}s)")
    
    # lecture
    print("lecture aleatoire...", end="")
    t0 = time.time()
    _ = dummy_data[0:500] + 1 # operation simple
    dt = time.time() - t0
    speed = (500 * 336 * 336 * 3 / 1e6) / dt # mb/s
    print(f" ok | vitesse: {speed:.0f} mb/s")
except Exception as e:
    print(f"\n‚ùå echec ram: {e}")

# --- 3. test bande passante (ram -> gpu) ---
print("\nüöÄ --- test 2: transfert pcie (ram -> gpu) ---")
try:
    # on prend un batch de 64 images (taille standard)
    batch_cpu = torch.from_numpy(dummy_data[0:64])
    
    # warmup
    _ = batch_cpu.to(device, non_blocking=True)
    torch.cuda.synchronize()
    
    t0 = time.time()
    # on boucle 100 fois pour moyenner
    for _ in range(100):
        batch_gpu = batch_cpu.to(device, non_blocking=True)
    torch.cuda.synchronize()
    dt = time.time() - t0
    
    avg_time = dt / 100
    print(f"temps moyen transfert (batch 64): {avg_time*1000:.1f} ms")
    if avg_time > 0.1: print("‚ö†Ô∏è alerte: transfert tr√®s lent (>100ms). probleme drivers ou bus satur√©.")
    else: print("‚úÖ transfert ok (rapide).")
    
except Exception as e:
    print(f"\n‚ùå echec transfert: {e}")

# --- 4. test calcul gpu (convnext tiny) ---
print("\nüî• --- test 3: calcul brut gpu (forward/backward) ---")
try:
    model = timm.create_model('convnextv2_tiny.fcmae_ft_in22k_in1k', pretrained=False, num_classes=27).to(device)
    model.train()
    
    # input gpu deja pret (float)
    x = torch.rand(64, 3, 336, 336, device=device)
    y = torch.randint(0, 27, (64,), device=device)
    optimizer = torch.optim.Adam(model.parameters())
    criterion = torch.nn.CrossEntropyLoss()
    
    # warmup
    _ = model(x)
    torch.cuda.synchronize()
    
    print("mesure vitesse calcul pure (10 iters)...")
    t0 = time.time()
    for _ in range(10):
        optimizer.zero_grad()
        out = model(x)
        loss = criterion(out, y)
        loss.backward()
        optimizer.step()
    torch.cuda.synchronize()
    dt = time.time() - t0
    
    img_per_sec = (64 * 10) / dt
    print(f"vitesse calcul: {img_per_sec:.0f} img/s")
    
    if img_per_sec < 50: print("‚ö†Ô∏è alerte: gpu tres lent. probleme frequence ou chauffe.")
    else: print("‚úÖ gpu operationnel.")

except Exception as e:
    print(f"\n‚ùå echec gpu: {e}")

print("\nüèÅ --- fin du diagnostic ---")

  from .autonotebook import tqdm as notebook_tqdm


üìä --- diagnostic systeme ---
ram totale: 137.37 go
ram dispo: 121.37 go
gpu: NVIDIA GeForce RTX 4070
vram allou√©e: 0.00 go

üß† --- test 1: lecture/ecriture ram (numpy) ---
 ok (1.656s)ay 1000mo...
lecture aleatoire... ok | vitesse: 1568 mb/s

üöÄ --- test 2: transfert pcie (ram -> gpu) ---
temps moyen transfert (batch 64): 1.9 ms
‚úÖ transfert ok (rapide).

üî• --- test 3: calcul brut gpu (forward/backward) ---
mesure vitesse calcul pure (10 iters)...


In [1]:
import psutil
import torch
import gc

# --- check nettoyage ---
# on verifie juste que le reboot a march√©
ram = psutil.virtual_memory()
total = ram.total / 1e9
dispo = ram.available / 1e9

print(f"üìä etat ram apres reboot:")
print(f"   -> total: {total:.1f} go")
print(f"   -> libre: {dispo:.1f} go")

if dispo < 50:
    print("‚ùå alerte: quelque chose consomme encore ta ram !")
else:
    print("‚úÖ feu vert: le terrain est propre.")
    
if torch.cuda.is_available():
    print(f"‚úÖ gpu detect√©: {torch.cuda.get_device_name(0)}")
else:
    print("‚ùå gpu non detect√©")

üìä etat ram apres reboot:
   -> total: 137.4 go
   -> libre: 121.5 go
‚úÖ feu vert: le terrain est propre.
‚úÖ gpu detect√©: NVIDIA GeForce RTX 4070
