In [1]:
%reset -sf

In [1]:
import torch
from torch.utils.checkpoint import checkpoint
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from tqdm.auto import tqdm
from opt_einsum import contract
from torch.profiler import profile, record_function, ProfilerActivity
def _toN(t):
    return t.detach().cpu().numpy()
def printDiff(name,value,ref):
    if ref is not None:
        print(name+':',value,'diff(abs):',value-ref)
    else:
        print(name+':',value)
    
import ast
def eval_np_array_literal(array_string):
    array_string = ','.join(array_string.replace('[ ', '[').split())
    return np.array(ast.literal_eval(array_string))

In [2]:
# SRG BaseClass, can be used for HOTRG-like and XTRG-like contraction
class SRG(torch.nn.Module):
    def __init__(self,params,options):
        super(SRG,self).__init__()
        self.dtype={'float64':torch.float64,'float32':torch.float32}[options.get('dtype','float64')]
        self.device=options.get('device','cpu')
        self.max_dim=options.get('max_dim',16)
        self.nLayers=options.get('nLayers',20)
        self.use_checkpoint=options.get('use_checkpoint',True)
        self.observable_checkerboard=False
        
        self.params=torch.nn.ParameterDict({
            k:torch.nn.Parameter(torch.tensor(v,dtype=self.dtype,device=self.device)) for k,v in params.items()
        })
        self.persistent={}
        self.persistent['logZ']=0
        
    def __str__(self):
        rtval=""
        for k,v in self.params.items():
            rtval+=k+':'+v+'\n'
        rtval+='dtype:'+self.dtype+'\n'
        rtval+='device:'+self.device+'\n'
        rtval+='max_dim:'+self.max_dim+'\n'
        rtval+='nLayers:'+self.nLayers+'\n'
        rtval+='nSite:'+2**self.nLayers+'\n'
        
    def set_params(self,params):
        self.params=torch.nn.ParameterDict({
            k:torch.nn.Parameter(torch.tensor(v,dtype=self.dtype,device=self.device)) for k,v in params.items()
        })
        
    def toT(self,t):
        return torch.tensor(t,dtype=self.dtype,device=self.device)
    
    def generate_random_Isometry(self,dim1,dim2):
        dim=max(dim1,dim2)
        A=torch.randn(dim,dim,dtype=self.dtype,device=self.device)
        U=torch.matrix_exp(A-A.t())
        U=U[:dim1,:dim2]
        return U
    
    def TRG_same_T(self,T,*w):
        return self.TRG(T,T,*w)
    
    def _checkpoint(self,F,*ww):
        requires_grad=False
        for w in ww:
            if w.requires_grad:
                requires_grad=True
        if self.use_checkpoint and requires_grad:
            return torch.utils.checkpoint.checkpoint(F,*ww)
        else:
            return F(*ww)
    
    def forward_tensor(self,nLayers):
        logTotal=0
        T=self.get_T0()
        for i in range(nLayers):
            w=self.ws[(i*self.w_per_layer):((i+1)*self.w_per_layer)]
            T=self._checkpoint(self.TRG_same_T,T,*w)
                
            norm=torch.linalg.norm(T)
            T=T/norm
            logTotal=2*logTotal+torch.log(norm)
        return T,logTotal
    
    def forward_tensor_with_observable(self,T_op,nLayers,contract_method=None,start_layer=0):
        T=self.get_T0()
        for i in range(start_layer):
            w=self.ws[(i*self.w_per_layer):((i+1)*self.w_per_layer)]
            T=self._checkpoint(self.TRG_same_T,T,*w)
            
        logTotal=0
        print(nLayers)
        contracted=torch.zeros((int(nLayers),))
        Ts,T_ops=[T],[T_op]
        for i in tqdm(range(start_layer,nLayers)):
            w=self.ws[(i*self.w_per_layer):((i+1)*self.w_per_layer)]
            T1=self._checkpoint(self.TRG_same_T,T,*w)
            T2=self._checkpoint(self.TRG,T,T_op,*w)
            T3=self._checkpoint(self.TRG,T_op,T,*w)
            if self.observable_checkerboard and i<self.spacial_dim:
                T3=-T3

            T,T_op=T1,(T2+T3)/2
            norm=torch.linalg.norm(T)
            T,T_op=T/norm,T_op/norm
            logTotal=2*logTotal+torch.log(norm)
            
            if contract_method is not None:
                Z=contract(T,contract_method)
                Z_op=contract(T_op,contract_method)
                contracted[i]=Z_op/Z

            Ts.append(T);T_ops.append(T_op)

        model.Ts=Ts;model.T_ops=T_ops
            
        return T,T_op,logTotal,contracted
    
    
    #def dlogZ(self,param):
    #    self.requires_grad_(False)
    #    self.params[param].requires_grad_(True)
    #    self.zero_grad()
    #    logZ=self.forward(self.nLayers+self.nLayers_HOSVD)
    #    logZ.backward()
    #    result=_toN(self.params[param].grad)
    #    self.params[param].requires_grad_(False)
    #    return result
    
    def update_single_layer(self,layer):
        self.requires_grad_(False)
        
        for i in range(layer*self.w_per_layer,(layer+1)*self.w_per_layer):
            self.ws[i].requires_grad_(True)
        self.zero_grad()
        
        logZ=self.forward(self.nLayers)
        logZ.backward()
        
        with torch.no_grad():
            for i in range(layer*self.w_per_layer,(layer+1)*self.w_per_layer):
                E=self.ws[i].grad
                dim1,dim2=E.shape[0],E.shape[2]
                E=E.reshape(dim1*dim1,dim2)
                U,S,Vh=torch.linalg.svd(E,full_matrices=False)
                UVh=U@Vh
                #UVh=svd2UVh(E)
                del U,S,Vh,E
                
                #calculate diff
                UVh_old=self.ws[i].reshape(dim1*dim1,dim2)
                self.ws_diff[i]=_toN(torch.norm(UVh_old.t()@UVh@UVh.t()@UVh_old-torch.eye(dim2,device=UVh.device)))
                del UVh_old
                    
                self.ws[i].data=UVh.reshape(dim1,dim1,dim2)
                del UVh
                torch.cuda.empty_cache()
        return _toN(logZ)
        
    def optimize(self,nIter):
        self.ws_diff=np.zeros(len(self.ws))
        
        torch.cuda.empty_cache()
        if nIter>1:
            pbar2=tqdm(range(nIter), leave=False)
            pbar2.set_postfix({k:_toN(v) for k,v in self.params.items()})
        else:
            pbar2=range(nIter)
        for i in pbar2:
            pbar=tqdm([*range(self.nLayers-1,-1,-1)]+[*range(self.nLayers)], leave=False)
            for j in pbar:
                ws_shape=self.ws[j*self.w_per_layer].shape
                if ws_shape[0]**2>ws_shape[2]:
                    self.logZ=self.update_single_layer(j)
                #else:
                #    print(f'Skip layer {j} shape={ws_shape}')
        #lock all grads
        for param in self.params.values(): 
            param.requires_grad_(False)
        for i in range(self.nLayers): #slightly faster
            self.ws[i].requires_grad_(False)
        
        self.logZ_diff=np.abs(self.persistent['logZ']-self.logZ)
        self.persistent['logZ']=self.logZ
        
        # normalized by layer weight, number of elements in tensor
        # NOT USED but multiply by number of elements in last tensor to better match the effects in output
        self.ws_diff_normalized=np.zeros(len(self.ws))
        for i in range(self.nLayers):
            for j in range(self.w_per_layer):
                ij=i*self.w_per_layer+j
                self.ws_diff_normalized[ij]=self.ws_diff[ij]/2**i#/torch.numel(self.ws[ij])*torch.numel(self.ws[-1])
        # ignore the last layers we take trace directly
        # use 10-norm so layers of large error has beter contribution       
        self.ws_diff_total=np.average(self.ws_diff_normalized[:-self.w_per_layer*self.spacial_dim])

In [3]:
class HOTRG(SRG):
    def __init__(self,params,options):
        super(HOTRG,self).__init__(params,options)
        self.nLayers_HOSVD=options.get('nLayers_HOSVD',0)
        self.persistent['magnetization']=0
        self.persistent['energy']=0
    
    def create_isometries(self,start_dim,spacial_dim):
        ws=[]
        bond_dim=[start_dim]*spacial_dim
        for i in range(self.nLayers+self.nLayers_HOSVD):
            for j in range(1,spacial_dim):
                old_dim=bond_dim[j]
                new_dim=min(old_dim**2,self.max_dim)
                U=self.generate_random_Isometry(old_dim**2,new_dim).view(old_dim,old_dim,new_dim)
                ws.append(U.detach())
                bond_dim[j]=new_dim
            bond_dim=bond_dim[1:]+[bond_dim[0]]
        self.ws=torch.nn.ParameterList([
            torch.nn.Parameter(v) for v in ws
        ])
        self.w_per_layer=spacial_dim-1
        self.spacial_dim=spacial_dim
        self.TRG={2:self.HOTRG2D,3:self.HOTRG3D}[self.spacial_dim]
        self.HOSVD={2:self.HOSVD2D,3:self.HOSVD3D}[self.spacial_dim]
        self.ws_diff_normalized=np.zeros(len(self.ws))
        self.ws_diff=np.zeros(len(self.ws))
        self.ws_diff_total=0
        
        
    def HOTRG2D(self,T1,T2,w):
        return contract('ijkl,jmno,kna,lob->abim',T1,T2,w,w)#contract and rotate
    
    def HOTRG3D(self,T1,T2,w1,w2):
        return contract('ijklmn,jopqrs,kpa,lqb,mrc,nsd->abcdio',T1,T2,w1,w1,w2,w2)#contract and rotate
    
    def HOSVD2D(self,T1,T2):
        MM1=contract('ijkl,jmno,ipql,pmro->knqr',T1,T2,T1.conj(),T2.conj()).reshape(T1.shape[2]*T2.shape[2],-1)
        S1,U1=torch.linalg.eigh(MM1) #S1 ascending U S Uh=MM
        eps1=torch.sum(torch.abs(S1[-self.max_dim:])) # might be slightly minus due to numerical error
        
        MM2=contract('ijkl,jmno,ipql,pmro->knqr',T1.transpose(2,3),T2.transpose(2,3),T1.conj().transpose(2,3),T2.conj().transpose(2,3)).reshape(T1.shape[3]*T2.shape[3],-1)
        S2,U2=torch.linalg.eigh(MM2)
        eps2=torch.sum(torch.abs(S2[-self.max_dim:]))

        S,U=(S1,U1) if eps1<eps2 else (S2,U2)
        S,U=S1,U1
        w=U[:,-self.max_dim:].reshape(T1.shape[2],T2.shape[2],-1)
        return contract('ijkl,jmno,kna,lob->abim',T1,T2,w,w),[w]
    
    def HOSVD3D(self,T1,T2):
        #print(T1.shape)
        MM1=contract('ijklmn,jopqrs,itulmn,tovqrs->kpuv',T1,T2,T1.conj(),T2.conj()).reshape(T1.shape[2]*T2.shape[2],-1)
        S1,U1=torch.linalg.eigh(MM1) #S1 ascending U S Uh=MM
        eps1=torch.sum(torch.abs(S1[-self.max_dim:])) # might be slightly minus due to numerical error
        
        MM2=contract('ijklmn,jopqrs,itulmn,tovqrs->kpuv',T1.transpose(2,3),T2.transpose(2,3),T1.conj().transpose(2,3),T2.conj().transpose(2,3)).reshape(T1.shape[3]*T2.shape[3],-1)
        S2,U2=torch.linalg.eigh(MM2)
        eps2=torch.sum(torch.abs(S2[-self.max_dim:]))

        S,U=(S1,U1) if eps1<eps2 else (S2,U2)
        w1=U[:,-self.max_dim:].reshape(T1.shape[2],T2.shape[2],-1)
        
        MM1=contract('ijklmn,jopqrs,itklun,topqvs->mruv',T1,T2,T1.conj(),T2.conj()).reshape(T1.shape[4]*T2.shape[4],-1)
        S1,U1=torch.linalg.eigh(MM1) #S1 ascending U S Uh=MM
        eps1=torch.sum(torch.abs(S1[-self.max_dim:])) # might be slightly minus due to numerical error
        
        MM2=contract('ijklmn,jopqrs,itklun,topqvs->mruv',T1.transpose(4,5),T2.transpose(4,5),T1.conj().transpose(4,5),T2.conj().transpose(4,5)).reshape(T1.shape[5]*T2.shape[5],-1)
        S2,U2=torch.linalg.eigh(MM2)
        eps2=torch.sum(torch.abs(S2[-self.max_dim:]))

        S,U=(S1,U1) if eps1<eps2 else (S2,U2)
        w2=U[:,-self.max_dim:].reshape(T1.shape[4],T2.shape[4],-1)
        #print(w1.shape,w2.shape)
        
        return contract('ijklmn,jopqrs,kpa,lqb,mrc,nsd->abcdio',T1,T2,w1,w1,w2,w2),[w1,w2]
    
    
    def generate_isometries_HOSVD(self):
        with torch.no_grad():
            logTotal=0
            T=self.get_T0()
            Ts=[T]
            for i in tqdm(range(self.nLayers+self.nLayers_HOSVD), leave=False):
                T,ww=self.HOSVD(T,T)
                for j in range(self.w_per_layer):
                    self.ws[i*self.w_per_layer+j].data=ww[j]
                norm=torch.linalg.norm(T)
                T=T/norm
                logTotal=2*logTotal+torch.log(norm)
                Ts.append(T)

            contract_all=[i for i in range(len(T.shape)//2) for j in range(2)]
            Z=contract(T,contract_all)
            self.persistent['logZ']=self.logZ=_toN((torch.log(Z)+logTotal)/2**self.nLayers)
            self.Ts=Ts
    
    def forward(self,nLayers):
        T,logTotal=self.forward_tensor(nLayers)
        contract_all=[i for i in range(len(T.shape)//2) for j in range(2)]
        Z=contract(T,contract_all)
        return (torch.log(Z)+logTotal)/2**nLayers
    
    def forward_with_observable(self,T_op,nLayers,start_layer=0):
        contract_method=[i for i in range(len(T_op.shape)//2) for j in range(2)]
        _,_,_,contracted=self.forward_tensor_with_observable(T_op,nLayers,contract_method=contract_method,start_layer=start_layer)
        return contracted,contracted[-1]
    
    #def forward_and_HOTRG(self):
    #    T,logTotal=self.forward_tensor()
    #    for i in range(self.nLayers_HOSVD):
    #        #T,_=self.HOSVD(T,T)
    #        ws=
    #        T=self.TRG(T,T,*ws)
    #        norm=torch.linalg.norm(T)
    #        T=T/norm
    #        logTotal=2*logTotal+torch.log(norm)
    #    contract_all=[i for i in range(len(T.shape)//2) for j in range(2)]
    #    Z=contract(T,contract_all)
    #    return (torch.log(Z)+logTotal)/2**(self.nLayers+self.nLayers_HOSVD)
    #
    #def forward_with_observable_and_HOTRG(self,T_op,start_layer=0):
    #    contract_method=[i for i in range(len(T_op.shape)//2) for j in range(2)]
    #    T,T_op,logTotal,contracted=self.forward_tensor_with_observable(T_op,contract_method=contract_method,start_layer=start_layer)
    #    
    #    for i in range(self.nLayers_HOSVD):
    #        T1,ws=self.HOSVD(T,T)
    #        T2=self.TRG(T,T_op,*ws)
    #        T3=self.TRG(T_op,T,*ws)
    #        
    #        T,T_op=T1,(T2+T3)/2
    #        norm=torch.linalg.norm(T)
    #        T,T_op=T/norm,T_op/norm
    #        logTotal=2*logTotal+torch.log(norm)
    #        
    #    Z=contract(T,contract_method)
    #    Z_op=contract(T_op,contract_method)
    #    
    #    return contracted,Z_op/Z
    
    def calc_logZ(self):
        with torch.no_grad():
            self.logZ=_toN(self.forward(self.nLayers+self.nLayers_HOSVD))
            self.logZ_diff=np.abs(self.persistent['logZ']-self.logZ)
            self.persistent['logZ']=self.logZ
    
    def calc_magnetization(self):
        with torch.no_grad():
            print(self.nLayers,self.nLayers_HOSVD)
            a,b=self.forward_with_observable(self.get_SZT0(),self.nLayers+self.nLayers_HOSVD)
            self.magnetization_per_layer=_toN(torch.abs(a))
            self.magnetization=_toN(torch.abs(b))
            self.magnetization_diff=np.abs(self.persistent['magnetization']-self.magnetization)
            self.persistent['magnetization']=self.magnetization
            
    def calc_energy(self):
        with torch.no_grad():
            a,b=self.forward_with_observable(self.get_ET1(),self.nLayers+self.nLayers_HOSVD,start_layer=1)
            self.energy_per_layer=_toN(a)
            self.energy=_toN(b)
            self.energy_diff=np.abs(self.persistent['energy']-self.energy)
            self.persistent['energy']=self.energy
            
            
def NewRow(model,params,options):
    return {**params,**options,
                'logZ':model.logZ,
                'logZ_diff':model.logZ_diff,
                'magnetization':model.magnetization,
                'magnetization_diff':model.magnetization_diff,
                'magnetization_per_layer':model.magnetization_per_layer.copy(),
                'energy':model.energy,
                'energy_diff':model.energy_diff,
                'energy_per_layer':model.energy_per_layer.copy(),
                'ws_diff_total':model.ws_diff_total,
                'ws_diff':model.ws_diff.copy(),
                'ws_diff_normalized':model.ws_diff_normalized.copy(),
               }

In [4]:
from scipy.special import comb
def get_CG_no_normalization(n):
    if n==0:
        return np.eye(1)
    CG=np.zeros((n+1,)+(2,)*n)
    for i in range(2**n):
        indices=tuple(map(int,bin(i)[2:].zfill(n)))
        m=np.sum(indices)
        CG[(m,)+indices]=1
    return CG
def get_CG(n):
    if n==0:
        return np.eye(1)
    CG=np.zeros((n+1,)+(2,)*n)
    for i in range(2**n):
        indices=tuple(map(int,bin(i)[2:].zfill(n)))
        m=np.sum(indices)
        CG[(m,)+indices]=1/np.sqrt(comb(n,m))
    return CG
def get_Singlet():
    return np.array([[0,1.],[-1.,0]])


class AKLT2D(HOTRG):
    default_params={'a1':np.sqrt(6)/2,'a2':np.sqrt(6)}
    def __init__(self,params,options):
        super(AKLT2D,self).__init__(params,options)
        self.create_isometries(start_dim=4,spacial_dim=2)
        #self.TRG=self.HOTRG2D
        self.observable_checkerboard=True
        #self.persistent['magnetization']=0
        
    def get_T0(self):
        projector=self.toT(get_CG_no_normalization(4))
        singlet=self.toT([[0,-1],[1,0]])
        ac0,ac1,ac2=self.toT(1),self.params['a1'],self.params['a2']
        deform=torch.stack([ac2,ac1,ac0,ac1,ac2])
        #deform=torch.stack([ac2*(1-2e-6),ac1*(1-1e-6),ac0,ac1*(1+1e-6),ac2*(1+2e-6)])
        node=contract('aijkl,im,kn,a->amjnl',projector,singlet,singlet,deform)
        return contract('aijkl,amnop->imjnkolp',node,node).reshape(4,4,4,4)#UDLR

    def get_SZT0(self):
        projector=self.toT(get_CG_no_normalization(4))
        singlet=self.toT([[0,-1],[1,0]])
        ac0,ac1,ac2=self.toT(1),self.params['a1'],self.params['a2']
        deform=torch.stack([ac2,ac1,ac0,ac1,ac2])
        #deform=torch.stack([ac2*(1-2e-6),ac1*(1-1e-6),ac0,ac1*(1+1e-6),ac2*(1+2e-6)])
        node=contract('aijkl,im,kn,a->amjnl',projector,singlet,singlet,deform)
        op=self.toT([2,1,0,-1,-2])
        return contract('aijkl,amnop,a->imjnkolp',node,node,op).reshape(4,4,4,4)#UDLR
    

class AKLT3D(HOTRG):
    default_params={'a1':np.sqrt(20/15),'a2':np.sqrt(20/6),'a3':np.sqrt(20/1)}
    def __init__(self,params,options):
        super(AKLT3D,self).__init__(params,options)
        self.create_isometries(start_dim=4,spacial_dim=3)
        #self.TRG=self.HOTRG3D
        self.observable_checkerboard=True
        #self.persistent['magnetization']=0
        
    def get_T0(self):
        projector=self.toT(get_CG_no_normalization(6))
        singlet=self.toT([[0,-1],[1,0]])
        ac0,ac1,ac2,ac3=self.toT(1),self.params['a1'],self.params['a2'],self.params['a3']
        deform=torch.stack([ac3,ac2,ac1,ac0,ac1,ac2,ac3])
        #deform=torch.stack([ac3*(1-3e-6),ac2*(1-2e-6),ac1*(1-1e-6),ac0,ac1*(1+1e-6),ac2*(1+2e-6),ac3*(1+3e-6)])
        node=contract('aijklmn,io,kp,mq,a->aojplqn',projector,singlet,singlet,singlet,deform)
        return contract('aijklmn,aopqrst->iojpkqlrmsnt',node,node).reshape(4,4,4,4,4,4)#UDLRFB

    def get_SZT0(self):
        projector=self.toT(get_CG_no_normalization(6))
        singlet=self.toT([[0,-1],[1,0]])
        ac0,ac1,ac2,ac3=self.toT(1),self.params['a1'],self.params['a2'],self.params['a3']
        deform=torch.stack([ac3,ac2,ac1,ac0,ac1,ac2,ac3])
        #deform=torch.stack([ac3*(1-3e-6),ac2*(1-2e-6),ac1*(1-1e-6),ac0,ac1*(1+1e-6),ac2*(1+2e-6),ac3*(1+3e-6)])
        node=contract('aijklmn,io,kp,mq,a->aojplqn',projector,singlet,singlet,singlet,deform)
        op=self.toT([3,2,1,0,-1,-2,-3])
        return contract('aijklmn,aopqrst,a->iojpkqlrmsnt',node,node,op).reshape(4,4,4,4,4,4)#UDLRFB

In [5]:
options={
    'dtype':'float64',
    'device':'cuda:0',
    'max_dim':10, # 10 discussed with wei
    'nLayers':15, # 30
    'use_checkpoint':True
}
params=AKLT3D.default_params.copy()
params['a1']+=.008
model=AKLT3D(params,options)
model.generate_isometries_HOSVD()

# model.calc_logZ()
model.calc_magnetization()
# print(model.magnetization)

  0%|          | 0/15 [00:00<?, ?it/s]

15 0
15


  0%|          | 0/15 [00:00<?, ?it/s]

In [6]:

torch.set_default_tensor_type(torch.cuda.DoubleTensor)

In [7]:
model.magnetization

array(3.5552844e-06, dtype=float32)

In [8]:
from HOTRG import trace_two_tensors
tt=[(trace_two_tensors(T).item(),trace_two_tensors(T_op).item()) for T,T_op in zip(model.Ts,model.T_ops)]
[((b/a if abs(a)>0 else 0),a,b) for a,b in tt]

[(0, 0.0, 0.0),
 (-2.7897324047749574, 0.04779658942265784, -0.13333969435011256),
 (-1.821997652876098, 1.0153369211760674, -1.8499414872612385),
 (1.4392149863065409, 1.192677601453903, 1.716519477844597),
 (1.0142565934901129, 1.030020219631409, 1.0447047991892908),
 (0.8214542841935257, 0.8949524798322831, 0.7351625487078489),
 (0.72428624019466, 1.030275604994425, 0.7462144443056908),
 (0.8787220902480029, 0.9880340700172763, 0.8682073632418226),
 (1.0420737876185537, 0.9505671077898153, 0.990561066400147),
 (1.0852982191023912, 0.9742598610125194, 1.0573624920998306),
 (1.0945176855210959, 0.9899900614161253, 1.083561630710065),
 (1.0951954357030456, 0.993951124144083, 1.088570734474511),
 (1.0952695158500627, 0.9955685004135967, 1.090415829443573),
 (1.0952769848202044, 0.9967958559812493, 1.0917675596204175),
 (1.0952768607987813, 0.9973498644867291, 1.0923742286931146),
 (1.0952768238304655, 0.9974733446743865, 1.0925094368105133)]

In [9]:
from HOTRG import HOTRGLayer
T0=model.get_T0()
T0_op=model.get_SZT0()
layers=[HOTRGLayer(tensor_shape=model.Ts[i].shape,ww=[model.ws[2*i],model.ws[2*i+1]]) for i in range(model.nLayers)]

In [10]:
from HOTRG import forward_observable_tensor
Ts,T_ops,logTotals=forward_observable_tensor(T0,T0_op,layers)

0it [00:00, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

OutOfMemoryError: CUDA out of memory. Tried to allocate 764.00 MiB (GPU 0; 11.17 GiB total capacity; 9.37 GiB already allocated; 524.25 MiB free; 10.24 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF