<a href="https://colab.research.google.com/github/achanhon/AdversarialModel/blob/master/Untitled20.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##quantification
L'objectif de ce notebook est d'apprendre un réseau qui pourra être utilisé en uint8.

### speed up
Regardons d'abord quelle est l'accélération qu'on peut espérer.

In [None]:
import torch
import time
import torchvision
import matplotlib.pyplot as plt


with torch.no_grad():
    T = torch.zeros(100,5)
    for i in range(100):
        A = torch.randint(0, 256, (2000,1, 128), dtype=torch.uint8).cuda()
        B = torch.randint(0, 256, (1,1000, 128), dtype=torch.uint8).cuda()

        t0 = time.time()
        C = torch.matmul(A[:,0,:].float(),B[0].float().t()).to(dtype=torch.uint8)
        tf = time.time()
        T[i][0] = tf-t0

        t0 = time.time()
        C = torch._int_mm(A[:,0,:].to(dtype=torch.int8),B[0].t().to(dtype=torch.int8)).to(dtype=torch.uint8)
        tf = time.time()
        T[i][1] = tf-t0

        t0 = time.time()
        C = (A.float()*B.float()).sum(-1).to(dtype=torch.uint8)
        tf = time.time()
        T[i][2] = tf-t0

        t0 = time.time()
        C = (A.half()*B.half()).sum(-1,dtype=torch.half).to(dtype=torch.uint8)
        tf = time.time()
        T[i][3] = tf-t0

        t0 = time.time()
        C = (A*B).sum(-1,dtype=torch.uint8)
        tf = time.time()
        T[i][4] = tf-t0


print(T.mean(0))

tensor([1.4892e-04, 8.6820e-05, 7.7779e-05, 6.6257e-05, 2.7828e-05])



on observe que le gain à espérer n'est pas du tout impressionnant - pire, la variance est très forte (ça semble dépendre du contenu de la matrice).
néanmoins, cela testons quand même.

### architecture

Le point central du réseau sera que
- si x est de dimension 8 avec ||x||_oo <= 5
- si ||w||_oo<=3
- alors |wx| <= 3 x 5 x 8 = 120
-

donc si les poids sont des entiers dans [0,255], alors l'operation de produit suivi de cycle donnera le même résultat en float ou en uint8.

on peut donc construire le module "MyLinear"

In [None]:
def activation(x):
    if x.dtype==torch.int8:
        return max(min(x,81),-17)//16
    else:
        return max(min(x,81),-17)/16

class MyLinear(torch.nn.Module):
    def __init__(self,inS,outS):
        super(MyLinear, self).__init__()
        assert inS%8==0 and outS%8==0
        self.inS,self.outS=inS, outS
        self.I = inS//8
        self.J = outS//8
        self.data = torch.rand(self.I,1,self.J,8).cuda() *
        self.bias = torch.randint(0,256,(1,outS)).cuda().float()
        self.data = torch.nn.Parameter(self.data)
        self.bias = torch.nn.Parameter(self.bias)
        #self.data.requires_grad_(True)
        #self.bias.requires_grad_(True)
        self.floatmode = True

    def goINT(self):
        if self.floatmode:
            with torch.no_grad():
                tmp = torch.round(self.data.clone()).to(dtype=torch.uint8)
                tmpbis = torch.round(self.bias.clone()).to(dtype=torch.uint8)
                del self.data, self.bias
                self.data,self.bias = tmp,tmpbis
                self.floatmode = False

    def mm(self,i,x):
        return ((self.data[i]*x.unsqueeze(1)).sum(-1)).to(dtype=x.dtype)

    def forward(self,x):
        assert not (self.floatmode and x.dtype == torch.uint8)
        assert not (not self.floatmode and x.dtype != torch.uint8)
        assert x.shape[1]== self.inS

        out = torch.zeros(x.shape[0],self.outS).cuda().to(dtype=x.dtype)
        for i in range(self.innerS):
            out[:,i*self.J:(i+1)*self.J] = self.mm(i,x[:,i*self.I:(i+1)*self.I])
        return cycle(out+self.bias)

    """
    def step(self):
        assert self.floatmode
        with torch.no_grad():
            self.data = (self.data - self.data.grad.sign())%256
            self.bias = (self.bias - self.bias.grad.sign())%256
        self.data.grad=None
        self.bias.grad=None
        self.data.requires_grad_(True)
        self.bias.requires_grad_(True)
    """

layer = MyLinear(32,64)
x = torch.randint(0,256,(128,32)).float().cuda()
t0=time.time()
y = layer(x)
t1=time.time()-t0
layer.goINT()
t0=time.time()
z = layer(x.to(dtype=torch.uint8)).float()
t2 = time.time()-t0

print((y-z).abs().sum(),t1,t2)

tensor(0., device='cuda:0', grad_fn=<SumBackward0>) 0.001730203628540039 0.0014481544494628906


bon comme on a la flemme de faire une myconvolution, ben on va faire du transformer...
(de toute façon, la durée d'inférence est quasiment identique !)

### baseline
construisons maintenant un petit réseau qui pourrait absorber des poids (en partie) uint8

In [None]:
def channelPool(x):
    return torch.max(x[:,::2],x[:,1::2])

class SwinLike(torch.nn.Module):
    def __init__(self):
        super(SwinLike, self).__init__()
        self.m1 = MyLinear(48,64)
        self.m2 = MyLinear(80,128)

        self.m3 = MyLinear(4096,4096,innerS=64)
        self.m4 = MyLinear(2048,2048,innerS=32)
        self.m5 = MyLinear(1024,1024,innerS=16)

        self.final = torch.nn.Linear(1024,10).cuda()
        self.floatmode = True

    def goINT(self):
        if self.floatmode:
            self.m1.goINT()
            self.m2.goINT()
            self.m3.goINT()
            self.m4.goINT()
            self.m5.goINT()
            self.floatmode = False

    """
    def step(self):
        with torch.no_grad():
            self.m1.step()
            self.m2.step()
            self.m3.step()
            self.m4.step()
            self.m5.step()
            self.final.weight = torch.nn.Parameter(self.final.weight-0.001*self.final.weight.grad)
            self.final.bias = torch.nn.Parameter(self.final.bias-0.001*self.final.bias.grad)
    """

    def forward(self,x_):
        assert not (self.floatmode and x_.dtype == torch.uint8)
        assert not (not self.floatmode and x_.dtype != torch.uint8)

        x = torch.zeros(x_.shape[0],64,8,8).cuda().to(dtype=x_.dtype)
        for i in range(8):
            for j in range(8):
                tmp = x_[:,:,i*4:i*4+4,j*4:j*4+4].flatten(1)
                tmpbis = channelPool(self.m1(tmp))
                tmp = torch.cat([tmp,tmpbis],dim=1)
                x[:,:,i,j] = channelPool(self.m2(tmp))

        x = channelPool(self.m3(x.flatten(1)))
        x = channelPool(self.m4(x))

        x = self.m5(x).float()
        return self.final(x/256)

In [None]:
trainset = torchvision.datasets.CIFAR10(
    root="build",
    train=True,
    download=True,
    transform=torchvision.transforms.ToTensor(),
)
trainloader = torch.utils.data.DataLoader(
    trainset, batch_size=128, shuffle=True, num_workers=2
)

testset = torchvision.datasets.CIFAR10(
    root="build",
    train=False,
    download=True,
    transform=torchvision.transforms.ToTensor(),
)
testloader = torch.utils.data.DataLoader(
    testset, batch_size=128, shuffle=True, num_workers=2
)


Files already downloaded and verified
Files already downloaded and verified


In [None]:
net = SwinLike()
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(), lr=0.0001)
meanloss = []
nb,nbOK = 0,0
for i in range(60):
    print("######",i,"######")
    for x,y in trainloader:
        z = net(x.cuda())
        loss = criterion(z,y.cuda().long())

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        floss = float(loss)
        meanloss.append(floss)
        _,z = z.max(1)
        good = (y.cuda()==z).float()
        nb+=good.shape[0]
        nbOK+=good.sum().cpu().numpy()
        if len(meanloss)==100:
            print(sum(meanloss)/100, nbOK/nb)
            # print(net.m3.data.dtype,net.m3.data.abs().sum())
            meanloss=[]

    with torch.no_grad():
        for x,y in trainloader:
            z = net(x.cuda())
            _,z = z.max(1)
            good = (y.cuda()==z).float()
            nb+=good.shape[0]
            nbOK+=good.sum().cpu().numpy()
        print("eval :",nbOK/nb)

        #print([param for param in net.parameters()])

###### 0 ######
2.3129075598716735 0.102890625
torch.float32 tensor(33357384., device='cuda:0', grad_fn=<SumBackward0>)
2.315036644935608 0.1005859375
torch.float32 tensor(33359692., device='cuda:0', grad_fn=<SumBackward0>)
2.312225079536438 0.09997395833333333
torch.float32 tensor(33358964., device='cuda:0', grad_fn=<SumBackward0>)
eval : 0.10043
###### 1 ######
2.3100241899490355 0.10037369503321733
torch.float32 tensor(33360920., device='cuda:0', grad_fn=<SumBackward0>)
2.309630353450775 0.10034926986801461
torch.float32 tensor(33359568., device='cuda:0', grad_fn=<SumBackward0>)
2.3095856142044067 0.1006374652865438
torch.float32 tensor(33359048., device='cuda:0', grad_fn=<SumBackward0>)
2.307660541534424 0.10085846136207292
torch.float32 tensor(33360716., device='cuda:0', grad_fn=<SumBackward0>)
eval : 0.100155
###### 2 ######
2.310331916809082 0.10006722556153116
torch.float32 tensor(33360376., device='cuda:0', grad_fn=<SumBackward0>)
2.3098173570632934 0.10006787414459982
torch.f

KeyboardInterrupt: ignored

In [None]:
t0 = time.time()
with torch.no_grad():
    for x,y in trainloader:
        z = net(x.cuda())
        _,z = z.max(1)
        good = (y.cuda()==z).float()
        nb+=good.shape[0]
        nbOK+=good.sum().cpu().numpy()
    print("eval :",nbOK/nb,time.time()-t0)

In [None]:
t0 = time.time()
net.goINT()
with torch.no_grad():
    for x,y in trainloader:
        z = net(torch.round((x.cuda()*256)).to(dtype=torch.uint8))
        _,z = z.max(1)
        good = (y.cuda()==z).float()
        nb+=good.shape[0]
        nbOK+=good.sum().cpu().numpy()
    print("eval :",nbOK/nb,time.time()-t0)

ben c'est vraiment pas terrible :-( et c'est pas plus rapide ?????