# Solving Eigenproblem

In this notebook we will get the Hessian and solve the first 64 modes of it using Inching-JDM. 
Note that the first-ever run of Inching on your computer may take additional time for compilation. On a laptop with an RTX3060, `1jj2`, a pretty small system with 98k atoms, takes around 4 minutes. The list `pdbavail` should contain a list of pdb you want to compute their normal mode. The string `Benchmark_folder` should contain the path to store output.

In [3]:
import glob
import platform

# A list of pdb available at different sizes

pdbavail = [ '../../DataRepo/PdbByAtomCount/1jj2.pdb' ] 
Benchmarking_folder = "../../DataRepo/VisualizationExample1jj2/"


|                  PyTorch CUDA memory summary, device ID 0                 |
|---------------------------------------------------------------------------|
|            CUDA OOMs: 0            |        cudaMalloc retries: 0         |
|        Metric         | Cur Usage  | Peak Usage | Tot Alloc  | Tot Freed  |
|---------------------------------------------------------------------------|
| Allocated memory      |       0 B  |       0 B  |       0 B  |       0 B  |
|---------------------------------------------------------------------------|
| Active memory         |       0 B  |       0 B  |       0 B  |       0 B  |
|---------------------------------------------------------------------------|
| GPU reserved memory   |       0 B  |       0 B  |       0 B  |       0 B  |
|---------------------------------------------------------------------------|
| Non-releasable memory |       0 B  |       0 B  |       0 B  |       0 B  |
|---------------------------------------------------------------

100%|██████████| 99/99 [00:01<00:00, 58.84it/s]


N_neighbor within 8.0 angstrom Mean 99.97464051226368, Std 24.62011211610712


  0%|          | 0/98543 [00:05<?, ?it/s]


start eigsh cupy


100%|██████████| 1024/1024 [00:00<?, ?it/s]
100%|██████████| 1024/1024 [00:03<00:00, 287.27it/s]


Mean number of Gaps > 100 is 17.998046875. Mean Gap Length Given Gap is 386.32246337493217
Max number of Gaps > 100 is 41. Max Gap Length Given Gap is 5318
Median number of Gaps > 100 is 19.0. Median Gap Length Given Gap is 224.0
Total Entry Savings 684354138 which is 66.2311271111961 percent of a Rectangular Batch
Nnz in Hessian (L+D) is 44776548.0. This will occupy 0.33030736818909645 GB for (L+D) data and at max 0.33030736818909645 GB for all indexings. Acceptable?


  self.frontal_gap_offset[i] = torch.tensor(frontal_gap_offset,dtype=torch.int32, device='cpu')#.clone().detach().cpu().requires_grad_(False) #hare_memory_()
100%|██████████| 1024/1024 [00:20<00:00, 49.76it/s]


(44473934,)
Start JDM Coarse Iter
0, 12.079529687981827, 33.97494059313371, 1e-06, 0
100, 0.8544897032975662, 2.4012954144571603, 1e-06, 0
200, 0.013569297995192934, 1.0001799676113279, 1e-06, 1
300, 1.9770407220015484e-13, 1.0056930046637298, 1e-06, 6
300, 0.0002642042465916983, 1.0075024546461189, 1e-06, 7
400, 5.278819739649337e-08, 1.0208027799014208, 1e-06, 14
500, 2.2888660685547714e-06, 1.0398542605385386, 1e-06, 22
600, 2.3168573798527376e-09, 1.055265600617888, 1e-06, 29
700, 5.0290971209272616e-12, 1.067388727076183, 1e-06, 36
800, 3.7464454936677796e-06, 1.084422735452767, 1e-06, 44
900, 1.1141221281211578e-07, 1.0990411620366534, 1e-06, 51
1000, 1.3675753682272196e-07, 1.1152570115371419, 1e-06, 58
DONE. We went through 1079 coarse iter, 64 eigval converged
RUNNNTIME 245.26331233978271
0.9999999999999947 6.331547367522808e-13
0.9999999999999988 3.099684976223204e-13
0.9999999999999998 4.730767706650559e-13
0.9999999999999999 3.2407877044927103e-13
1.0000000000000004 3.47687

In [None]:

User_Platform = platform.system() # Windows Darwin Linux

User_rc_Gamma = 8.0
User_maxleafsize = 100
User_n_mode = 64
User_tol = 1e-15
User_PlusI = 1.0
PDBCIF = "Pdb"
User_MaxIter = 15000

# JDM Params
User_GapEstimate = 1e-6
User_SolverName = 'gmres'
User_SolverMaxIter = 20
User_EigTolerance = 1e-12



PART00_Import = True
if PART00_Import:
   import os
   import gc
   import sys
   import pickle

   import numpy as np
   import time
   import tqdm

   import torch 


   import platform


   import time

   import cupy
   from cupy import cublas


   from scipy.spatial import cKDTree



   sys.path.append('..')
   sys.path.append('../../')
   sys.path.append('../../InchingLite/Burn/')



   import InchingLite.util
   import InchingLite.Fuel.Coordinate.T1
   import InchingLite.Fuel.Coordinate.T2
   import InchingLite.Burn.Coordinate.T1
   import InchingLite.Burn.Coordinate.T3

   from InchingLite.Fuel.T1 import Xnumpy_SparseCupyMatrixUngappped

   import InchingLite.Burn.Visualisation.T1
   import InchingLite.Burn.Visualisation.T2

   # ============================
   # Some torch speed up tips
   # =============================

   # Turn on cuda optimizer
   torch.backends.cudnn.is_available()
   torch.backends.cudnn.enabled = True
   torch.backends.cudnn.benchmark = True
   # disable debugs NOTE use only after debugging
   torch.autograd.set_detect_anomaly(False)
   torch.autograd.profiler.profile(False)
   torch.autograd.profiler.emit_nvtx(False)
   # Disable gradient tracking
   torch.no_grad()
   torch.inference_mode()
   torch.manual_seed(0)
   cupy.random.seed(seed = 0)
   os.environ['CUDA_LAUNCH_BLOCKING'] = "1" # NOTE In case any error showup
   # Reset Cuda and Torch
   device = torch.device(0)
   torch.set_default_dtype(torch.float64)
   torch.set_default_tensor_type(torch.cuda.DoubleTensor)
   try:
      InchingLite.util.TorchEmptyCache()
   except RuntimeError:
      print("The GPU is free to use. THere is no existing occupant")
   try:
      print(torch.cuda.memory_summary(device = 0, abbreviated=True))
   except KeyError:
      print("The GPU is free to use. THere is no existing occupant")


# =======================
# Determine N_atoms
# ==============================
# NOTE 2 minutes
PART01_ListOfPDB = False
if PART01_ListOfPDB:
   if os.path.exists("%s/%sSize.pkl" %(Benchmarking_folder, PDBCIF)):
      with open("%s/%sSize.pkl" %( Benchmarking_folder, PDBCIF),"rb") as fn:
         pdbavail, sizedict = pickle.load(fn)
   else:

      pdbavail = [InchingLite.util.WinFileDirLinux(i) for i in pdbavail]
      size = []
      for pdbfn in tqdm.tqdm(pdbavail):

         X_df, X_top = InchingLite.util.BasicPdbCifLoading(pdbfn)
         protein_xyz = X_df[['x','y','z']].to_numpy().astype(np.float64)
         size.append(protein_xyz.shape[0])
         del X_df, protein_xyz
         gc.collect()



      pdbavail = [pdbavail[i] for i in np.argsort(size).tolist()]
      print("Ranked file size in atom number")
      #print([os.path.getsize(i) for i in pdbavail])
      sizedict = dict(zip([i.split("/")[-1].split(".")[0] for i in pdbavail],sorted(size)))
      print(dict(zip([i.split("/")[-1].split(".")[0] for i in pdbavail],sorted(size))))

      with open("%s/%sSize.pkl" %(Benchmarking_folder, PDBCIF),"wb") as fn:
         pickle.dump((pdbavail, dict(zip([i.split("/")[-1].split(".")[0] for i in pdbavail],sorted(size)))),fn , protocol=4)




for pdbfn in pdbavail:
    


    devices_ = [d for d in range(torch.cuda.device_count())]
    device_names_  = [torch.cuda.get_device_name(d) for d in devices_]
    User_Device =  device_names_[0]


    pdbid = pdbfn.split("/")[-1].split(".")[0]


    st = time.time()

    X_df, X_top = InchingLite.util.BasicPdbCifLoading(pdbfn)
    protein_xyz = X_df[['x','y','z']].to_numpy().astype(np.float64)
    # NOTE PDB format digit decimal do no destroy collinearity!
    n_atoms = protein_xyz.shape[0]



    # ===============================================
    # K-d Cuthill (NOTE CPU np array)
    # ===================================
    # NOTE Cuthill Order and Undo
    st = time.time()
    cuthill_order, cuthill_undoorder = InchingLite.Fuel.Coordinate.T1.X_KdCuthillMckeeOrder(protein_xyz,  
                                rc_Gamma = User_rc_Gamma, Reverse = True,
                                )
    protein_xyz = protein_xyz[cuthill_order,:]
    protein_tree = cKDTree(protein_xyz, leafsize=16, compact_nodes=True, copy_data=False, balanced_tree=True, boxsize=None)


    from InchingLite.Burn.JacobiDavidsonHotellingDeflation.T1 import S_HeigvalJDMHD_HeigvecJDMHD
    print('start eigsh cupy')

    mempool = cupy.get_default_memory_pool()
    pinned_mempool = cupy.get_default_pinned_memory_pool()

    # ==================
    # Cupy hessian
    # =====================
    PART03_MakeCupyHessian = True
    if PART03_MakeCupyHessian:
        # NOTE Nnz neighborhood after cuthill
        NnzMinMaxDict, HalfNnz  = InchingLite.Fuel.Coordinate.T1.X_KdUngappedMinMaxNeighbor(protein_xyz, 
                                    rc_Gamma = User_rc_Gamma, 
                                    maxleafsize = User_maxleafsize,
                                    CollectStat = False,
                                    User_ReturnHalfNnz = True,
                                    SliceForm= True)


        # NOTE Pyotch tensor spend textra memory when dlpack has to be called and there are mmeleak
        #X = torch.tensor(protein_xyz, device=device, requires_grad= False)
        X = protein_xyz
        Xnumpy_SparseCupyMatrixUngapppedC = Xnumpy_SparseCupyMatrixUngappped(X, batch_head = None, 
            maxleafsize = User_maxleafsize, rc_Gamma = User_rc_Gamma,
            device  = torch.device(0), 
            User_PlusI = User_PlusI, 
            dtype_temp = torch.float64, 
            X_precision = torch.cuda.DoubleTensor,
            NnzMinMaxDict = NnzMinMaxDict)

        A = Xnumpy_SparseCupyMatrixUngapppedC.ReturnCupyHLowerTriangle(
                        User_MaxHalfNnzBufferSize = HalfNnz)
        print(A.data.shape)
        
        cupy.get_default_memory_pool().free_all_blocks()
        cupy.get_default_pinned_memory_pool().free_all_blocks()
        gc.collect()






    
    PART04_CalcualteEig = True
    if PART04_CalcualteEig:
        eigval, eigvec = S_HeigvalJDMHD_HeigvecJDMHD(A,
                    k = User_n_mode,
                    tol = User_EigTolerance,
                    maxiter = User_MaxIter,
                    User_CorrectionSolverMaxiter = User_SolverMaxIter,
                    User_HalfMemMode= True,
                    User_IntermediateConvergenceTol=1e-3, # NOTE Do not touch for this problem
                    User_GapEstimate = User_GapEstimate, # NOTE This will be used for theta - gap_estimate
                    User_FactoringToleranceOnCorrection = 1e-4, # NOTE Do not touch for this problem
                    User_HD_Eigvec = None,
                    User_HD_Eigval = None,
                    User_HotellingShift = 40, # NOTE 40 is generally safe for first 64 modes, of course if you want to guarentee it you know a norm

                    )
        runtime = time.time() - st
        print("RUNNNTIME %s" %(runtime))
        peak_mem = cupy.get_default_memory_pool().used_bytes() / 1024 / 1024


        runtime = time.time() - st
        peak_mem = cupy.get_default_memory_pool().used_bytes() / 1024 / 1024
        with open("%s/Eigval_InchingJDM_%s_%s_%s.pkl" %(
                    Benchmarking_folder, pdbid, User_Platform, 
                    User_Device.replace(" ","")),"wb") as fn:
            pickle.dump(cupy.asnumpy(eigval) - User_PlusI ,fn, protocol=4)
        
        with open("%s/Eigvec_InchingJDM_%s_%s_%s.pkl" %(
                    Benchmarking_folder, pdbid, User_Platform, 
                    User_Device.replace(" ","")),"wb") as fn:    
            tempeigvec = cupy.asnumpy(eigvec)
            tempeigvec = tempeigvec.T
            tempeigvec = tempeigvec.reshape((int(User_n_mode),int(n_atoms),int(3)))
            pickle.dump(tempeigvec[:,cuthill_undoorder,:] ,fn, protocol=4)

        
        del tempeigvec
        gc.collect()
    





    PART05_Performance = True
    if PART05_Performance:
        #===================================
        # Check correct
        # =====================================
        #print(eigval)
        #print(eigvec.shape)
        delta_lambda_list = []
        for jj in range(User_n_mode):
            B = A@eigvec[:,jj].T + A.T@eigvec[:,jj].T - cupy.multiply(A.diagonal(k=0), eigvec[:,jj])  - eigval[jj]* eigvec[:,jj].T
            delta_lambda_list.append(cupy.asnumpy(cublas.nrm2(B)))
            if jj < 20:
                print(eigval[jj], cupy.asnumpy(cublas.nrm2(B)))
        
        eigval = cupy.asnumpy(eigval)
        n_atoms = protein_xyz.shape[0]

        GPU = "%s %s" %(User_Platform, User_Device.replace(" GPU", ""))

        performance = ["Inching (JDM %s)" %(GPU), pdbfn, n_atoms, 
                        runtime, peak_mem, 
                        User_Platform, User_Device, 
                        User_maxleafsize]



        longperformance = []
        for i in range(len(delta_lambda_list)):
            longperformance.append(performance + [i ,delta_lambda_list[i], eigval[i] - User_PlusI])
        
        with open("%s/PerformanceList_InchingJDM_%s_%s_%s.pkl" %(Benchmarking_folder, 
            pdbid, User_Platform, User_Device.replace(" ","")),"wb") as fn:   
            pickle.dump(longperformance,fn, protocol=4)


        del X_df, protein_xyz
        gc.collect()



        B = None
        A.data = None
        A.indices = None
        A.indptr = None
        Q_HotellingDeflation = None
        del Q_HotellingDeflation

        del A.data, A.indices, A.indptr 
        del A, B
        Xnumpy_SparseCupyMatrixUngapppedC.X, Xnumpy_SparseCupyMatrixUngapppedC.X_unsqueezed = None, None
        del Xnumpy_SparseCupyMatrixUngapppedC.X, Xnumpy_SparseCupyMatrixUngapppedC.X_unsqueezed
        Xnumpy_SparseCupyMatrixUngapppedC = None
        del Xnumpy_SparseCupyMatrixUngapppedC
        eigvec, eigval = None, None
        del eigvec, eigval


        cupy.get_default_memory_pool().free_all_blocks()
        cupy.get_default_pinned_memory_pool().free_all_blocks()
        del X 
        torch.cuda.empty_cache()    
        torch.cuda.reset_peak_memory_stats(0)
        torch.cuda.memory_allocated(0)
        torch.cuda.max_memory_allocated(0)
