In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import time

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

trainset = torchvision.datasets.CIFAR10(
    root='./data', train=True, download=True, transform=transform
)
testset = torchvision.datasets.CIFAR10(
    root='./data', train=False, download=True, transform=transform
)

trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True)
testloader = torch.utils.data.DataLoader(testset, batch_size=128, shuffle=False)


class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()

        self.conv_layers = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )

        self.fc_layers = nn.Sequential(
            nn.Linear(64 * 8 * 8, 256),
            nn.ReLU(),
            nn.Linear(256, 10)
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = torch.flatten(x, 1)
        x = self.fc_layers(x)
        return x

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SimpleCNN().to(device)


criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


epochs = 300
start_time = time.time()

for epoch in range(epochs):
    running_loss = 0.0

    for images, labels in trainloader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch {epoch+1}/{epochs} | Loss: {running_loss/len(trainloader):.4f}")

training_time = time.time() - start_time
print(f"\nTotal Training Time: {training_time:.2f} seconds")



correct = 0
total = 0
model.eval()

with torch.no_grad():
    for images, labels in testloader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

test_accuracy = 100 * correct / total
print(f"Test Accuracy: {test_accuracy:.2f}%")


100%|██████████| 170M/170M [00:03<00:00, 43.7MB/s]


Epoch 1/300 | Loss: 1.3964
Epoch 2/300 | Loss: 1.0250
Epoch 3/300 | Loss: 0.8576
Epoch 4/300 | Loss: 0.7355
Epoch 5/300 | Loss: 0.6287
Epoch 6/300 | Loss: 0.5295
Epoch 7/300 | Loss: 0.4304
Epoch 8/300 | Loss: 0.3396
Epoch 9/300 | Loss: 0.2543
Epoch 10/300 | Loss: 0.1823
Epoch 11/300 | Loss: 0.1260
Epoch 12/300 | Loss: 0.0938
Epoch 13/300 | Loss: 0.0722
Epoch 14/300 | Loss: 0.0635
Epoch 15/300 | Loss: 0.0510
Epoch 16/300 | Loss: 0.0550
Epoch 17/300 | Loss: 0.0506
Epoch 18/300 | Loss: 0.0544
Epoch 19/300 | Loss: 0.0365
Epoch 20/300 | Loss: 0.0376
Epoch 21/300 | Loss: 0.0438
Epoch 22/300 | Loss: 0.0430
Epoch 23/300 | Loss: 0.0323
Epoch 24/300 | Loss: 0.0395
Epoch 25/300 | Loss: 0.0353
Epoch 26/300 | Loss: 0.0196
Epoch 27/300 | Loss: 0.0343
Epoch 28/300 | Loss: 0.0360
Epoch 29/300 | Loss: 0.0355
Epoch 30/300 | Loss: 0.0314
Epoch 31/300 | Loss: 0.0255
Epoch 32/300 | Loss: 0.0365
Epoch 33/300 | Loss: 0.0216
Epoch 34/300 | Loss: 0.0191
Epoch 35/300 | Loss: 0.0352
Epoch 36/300 | Loss: 0.0258
E

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import time


transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

trainset = torchvision.datasets.CIFAR10(
    root='./data', train=True, download=True, transform=transform
)
testset = torchvision.datasets.CIFAR10(
    root='./data', train=False, download=True, transform=transform
)

trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True)
testloader = torch.utils.data.DataLoader(testset, batch_size=128, shuffle=False)


class CNN_3Layer(nn.Module):
    def __init__(self):
        super(CNN_3Layer, self).__init__()

        self.conv_layers = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )

        self.fc_layers = nn.Sequential(
            nn.Linear(128 * 4 * 4, 256),
            nn.ReLU(),
            nn.Linear(256, 10)
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = torch.flatten(x, 1)
        x = self.fc_layers(x)
        return x


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CNN_3Layer().to(device)


criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


epochs = 300
start = time.time()

for epoch in range(epochs):
    running_loss = 0.0

    for images, labels in trainloader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch {epoch+1}/{epochs} | Loss: {running_loss/len(trainloader):.4f}")

train_time = time.time() - start
print(f"\nTraining Time: {train_time:.2f} seconds")


correct = 0
total = 0
model.eval()

with torch.no_grad():
    for images, labels in testloader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f"Test Accuracy: {accuracy:.2f}%")


Epoch 1/300 | Loss: 1.4421
Epoch 2/300 | Loss: 1.0373
Epoch 3/300 | Loss: 0.8517
Epoch 4/300 | Loss: 0.7177
Epoch 5/300 | Loss: 0.6153
Epoch 6/300 | Loss: 0.5193
Epoch 7/300 | Loss: 0.4349
Epoch 8/300 | Loss: 0.3666
Epoch 9/300 | Loss: 0.2914
Epoch 10/300 | Loss: 0.2299
Epoch 11/300 | Loss: 0.1703
Epoch 12/300 | Loss: 0.1397
Epoch 13/300 | Loss: 0.1090
Epoch 14/300 | Loss: 0.0875
Epoch 15/300 | Loss: 0.0906
Epoch 16/300 | Loss: 0.0702
Epoch 17/300 | Loss: 0.0649
Epoch 18/300 | Loss: 0.0655
Epoch 19/300 | Loss: 0.0696
Epoch 20/300 | Loss: 0.0524
Epoch 21/300 | Loss: 0.0589
Epoch 22/300 | Loss: 0.0486
Epoch 23/300 | Loss: 0.0510
Epoch 24/300 | Loss: 0.0539
Epoch 25/300 | Loss: 0.0414
Epoch 26/300 | Loss: 0.0536
Epoch 27/300 | Loss: 0.0427
Epoch 28/300 | Loss: 0.0547
Epoch 29/300 | Loss: 0.0396
Epoch 30/300 | Loss: 0.0502
Epoch 31/300 | Loss: 0.0391
Epoch 32/300 | Loss: 0.0362
Epoch 33/300 | Loss: 0.0347
Epoch 34/300 | Loss: 0.0436
Epoch 35/300 | Loss: 0.0417
Epoch 36/300 | Loss: 0.0397
E

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import time


class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super(ResidualBlock, self).__init__()

        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3,
                               stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)

        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1,
                          stride=stride, bias=False),
                nn.BatchNorm2d(out_channels)
            )

    def forward(self, x):
        out = torch.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        return torch.relu(out)


class ResNet10(nn.Module):
    def __init__(self):
        super(ResNet10, self).__init__()

        self.in_channels = 16

        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1,
                               padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(16)

        self.stage1 = self._make_stage(16, num_blocks=4, stride=1)

        self.stage2 = self._make_stage(32, num_blocks=3, stride=2)

        self.stage3 = self._make_stage(64, num_blocks=3, stride=2)

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(64, 10)

    def _make_stage(self, out_channels, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        blocks = []
        for s in strides:
            blocks.append(ResidualBlock(self.in_channels, out_channels, s))
            self.in_channels = out_channels
        return nn.Sequential(*blocks)

    def forward(self, x):
        out = torch.relu(self.bn1(self.conv1(x)))
        out = self.stage1(out)
        out = self.stage2(out)
        out = self.stage3(out)
        out = self.avgpool(out)
        out = out.view(out.size(0), -1)
        return self.fc(out)


transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5),
                         (0.5, 0.5, 0.5))
])

trainset = torchvision.datasets.CIFAR10("./data", train=True, download=True,
                                        transform=transform)
testset = torchvision.datasets.CIFAR10("./data", train=False, download=True,
                                       transform=transform)

trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True)
testloader = torch.utils.data.DataLoader(testset, batch_size=128, shuffle=False)


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ResNet10().to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


epochs = 300
start_time = time.time()

for epoch in range(epochs):
    model.train()
    running_loss = 0.0

    for images, labels in trainloader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch {epoch+1}/{epochs} | Loss: {running_loss/len(trainloader):.4f}")

training_time = time.time() - start_time
print(f"\nTraining Time: {training_time:.2f} seconds")


model.eval()
correct = 0
total = 0

with torch.no_grad():
    for images, labels in testloader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f"Test Accuracy: {accuracy:.2f}%")


Epoch 1/300 | Loss: 1.3548
Epoch 2/300 | Loss: 0.9212
Epoch 3/300 | Loss: 0.7429
Epoch 4/300 | Loss: 0.6382
Epoch 5/300 | Loss: 0.5565
Epoch 6/300 | Loss: 0.4957
Epoch 7/300 | Loss: 0.4454
Epoch 8/300 | Loss: 0.4024
Epoch 9/300 | Loss: 0.3627
Epoch 10/300 | Loss: 0.3266
Epoch 11/300 | Loss: 0.2900
Epoch 12/300 | Loss: 0.2584
Epoch 13/300 | Loss: 0.2330
Epoch 14/300 | Loss: 0.2008
Epoch 15/300 | Loss: 0.1880
Epoch 16/300 | Loss: 0.1590
Epoch 17/300 | Loss: 0.1432
Epoch 18/300 | Loss: 0.1291
Epoch 19/300 | Loss: 0.1175
Epoch 20/300 | Loss: 0.1033
Epoch 21/300 | Loss: 0.0993
Epoch 22/300 | Loss: 0.0939
Epoch 23/300 | Loss: 0.0849
Epoch 24/300 | Loss: 0.0819
Epoch 25/300 | Loss: 0.0720
Epoch 26/300 | Loss: 0.0674
Epoch 27/300 | Loss: 0.0709
Epoch 28/300 | Loss: 0.0675
Epoch 29/300 | Loss: 0.0578
Epoch 30/300 | Loss: 0.0608
Epoch 31/300 | Loss: 0.0606
Epoch 32/300 | Loss: 0.0525
Epoch 33/300 | Loss: 0.0523
Epoch 34/300 | Loss: 0.0520
Epoch 35/300 | Loss: 0.0538
Epoch 36/300 | Loss: 0.0439
E

PART 2 B L2 wd

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import time

class ResidualBlock(nn.Module):
    def __init__(self,in_channels,out_channels,stride=1):
        super(ResidualBlock,self).__init__()
        self.conv1=nn.Conv2d(in_channels,out_channels,3,stride,1,bias=False)
        self.bn1=nn.BatchNorm2d(out_channels)
        self.conv2=nn.Conv2d(out_channels,out_channels,3,1,1,bias=False)
        self.bn2=nn.BatchNorm2d(out_channels)
        self.shortcut=nn.Sequential()
        if stride!=1 or in_channels!=out_channels:
            self.shortcut=nn.Sequential(
                nn.Conv2d(in_channels,out_channels,1,stride,bias=False),
                nn.BatchNorm2d(out_channels)
            )
    def forward(self,x):
        out=torch.relu(self.bn1(self.conv1(x)))
        out=self.bn2(self.conv2(out))
        out+=self.shortcut(x)
        return torch.relu(out)

class ResNet10(nn.Module):
    def __init__(self):
        super(ResNet10,self).__init__()
        self.in_channels=16
        self.conv1=nn.Conv2d(3,16,3,1,1,bias=False)
        self.bn1=nn.BatchNorm2d(16)
        self.stage1=self.make_stage(16,4,1)
        self.stage2=self.make_stage(32,3,2)
        self.stage3=self.make_stage(64,3,2)
        self.avgpool=nn.AdaptiveAvgPool2d((1,1))
        self.fc=nn.Linear(64,10)
    def make_stage(self,out_channels,num_blocks,stride):
        layers=[]
        strides=[stride]+[1]*(num_blocks-1)
        for s in strides:
            layers.append(ResidualBlock(self.in_channels,out_channels,s))
            self.in_channels=out_channels
        return nn.Sequential(*layers)
    def forward(self,x):
        out=torch.relu(self.bn1(self.conv1(x)))
        out=self.stage1(out)
        out=self.stage2(out)
        out=self.stage3(out)
        out=self.avgpool(out)
        out=out.view(out.size(0),-1)
        return self.fc(out)

transform=transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))
])

trainset=torchvision.datasets.CIFAR10('./data',train=True,download=True,transform=transform)
testset=torchvision.datasets.CIFAR10('./data',train=False,download=True,transform=transform)

trainloader=torch.utils.data.DataLoader(trainset,batch_size=128,shuffle=True)
testloader=torch.utils.data.DataLoader(testset,batch_size=128,shuffle=False)

device=torch.device("cuda" if torch.cuda.is_available() else "cpu")
model=ResNet10().to(device)

criterion=nn.CrossEntropyLoss()
optimizer=optim.Adam(model.parameters(),lr=0.001,weight_decay=1e-4)

epochs=300
start=time.time()

for epoch in range(epochs):
    loss_value=0
    for images,labels in trainloader:
        images,labels=images.to(device),labels.to(device)
        optimizer.zero_grad()
        outputs=model(images)
        loss=criterion(outputs,labels)
        loss.backward()
        optimizer.step()
        loss_value+=loss.item()
    print(epoch+1,loss_value/len(trainloader))

print(time.time()-start)

correct=0
total=0
model.eval()

with torch.no_grad():
    for images,labels in testloader:
        images,labels=images.to(device),labels.to(device)
        outputs=model(images)
        _,predicted=torch.max(outputs,1)
        total+=labels.size(0)
        correct+=(predicted==labels).sum().item()

print(100*correct/total)


100%|██████████| 170M/170M [00:13<00:00, 12.5MB/s]


1 1.34564039073027
2 0.9060933730181526
3 0.725706082704427
4 0.6275252693754327
5 0.5481794509284027
6 0.49428434620427963
7 0.4411477628342636
8 0.3902010237012068
9 0.35330462329985235
10 0.3136383768390207
11 0.28392247318306846
12 0.25223422507800713
13 0.2237092358300753
14 0.19788213929785486
15 0.1811166701986052
16 0.1627510543674459
17 0.14765328166010738
18 0.1365276743726962
19 0.12381856927119406
20 0.11721470100266854
21 0.09973119047310804
22 0.11097677999064136


KeyboardInterrupt: 

resnet dropout

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import time

class ResidualBlock(nn.Module):
    def __init__(self,in_channels,out_channels,stride=1):
        super(ResidualBlock,self).__init__()
        self.conv1=nn.Conv2d(in_channels,out_channels,3,stride,1,bias=False)
        self.bn1=nn.BatchNorm2d(out_channels)
        self.conv2=nn.Conv2d(out_channels,out_channels,3,1,1,bias=False)
        self.bn2=nn.BatchNorm2d(out_channels)
        self.shortcut=nn.Sequential()
        if stride!=1 or in_channels!=out_channels:
            self.shortcut=nn.Sequential(
                nn.Conv2d(in_channels,out_channels,1,stride,bias=False),
                nn.BatchNorm2d(out_channels)
            )
    def forward(self,x):
        out=torch.relu(self.bn1(self.conv1(x)))
        out=self.bn2(self.conv2(out))
        out+=self.shortcut(x)
        return torch.relu(out)

class ResNet10(nn.Module):
    def __init__(self):
        super(ResNet10,self).__init__()
        self.in_channels=16
        self.conv1=nn.Conv2d(3,16,3,1,1,bias=False)
        self.bn1=nn.BatchNorm2d(16)
        self.stage1=self.make_stage(16,4,1)
        self.stage2=self.make_stage(32,3,2)
        self.stage3=self.make_stage(64,3,2)
        self.avgpool=nn.AdaptiveAvgPool2d((1,1))
        self.dropout=nn.Dropout(0.3)
        self.fc=nn.Linear(64,10)
    def make_stage(self,out_channels,num_blocks,stride):
        layers=[]
        strides=[stride]+[1]*(num_blocks-1)
        for s in strides:
            layers.append(ResidualBlock(self.in_channels,out_channels,s))
            self.in_channels=out_channels
        return nn.Sequential(*layers)
    def forward(self,x):
        out=torch.relu(self.bn1(self.conv1(x)))
        out=self.stage1(out)
        out=self.stage2(out)
        out=self.stage3(out)
        out=self.avgpool(out)
        out=out.view(out.size(0),-1)
        out=self.dropout(out)
        return self.fc(out)

transform=transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))
])

trainset=torchvision.datasets.CIFAR10('./data',train=True,download=True,transform=transform)
testset=torchvision.datasets.CIFAR10('./data',train=False,download=True,transform=transform)

trainloader=torch.utils.data.DataLoader(trainset,batch_size=128,shuffle=True)
testloader=torch.utils.data.DataLoader(testset,batch_size=128,shuffle=False)

device=torch.device("cuda" if torch.cuda.is_available() else "cpu")
model=ResNet10().to(device)

criterion=nn.CrossEntropyLoss()
optimizer=optim.Adam(model.parameters(),lr=0.001)

epochs=300
start=time.time()

for epoch in range(epochs):
    loss_value=0
    for images,labels in trainloader:
        images,labels=images.to(device),labels.to(device)
        optimizer.zero_grad()
        outputs=model(images)
        loss=criterion(outputs,labels)
        loss.backward()
        optimizer.step()
        loss_value+=loss.item()
    print(epoch+1,loss_value/len(trainloader))

print(time.time()-start)

correct=0
total=0
model.eval()

with torch.no_grad():
    for images,labels in testloader:
        images,labels=images.to(device),labels.to(device)
        outputs=model(images)
        _,predicted=torch.max(outputs,1)
        total+=labels.size(0)
        correct+=(predicted==labels).sum().item()

print(100*correct/total)


1 1.4226526712517604
2 1.0028401959277784
3 0.8169270017567802
4 0.699581999608013
5 0.6207336869538592
6 0.5480870617472607
7 0.5006152335030344
8 0.4507504332705837
9 0.4112901256974701
10 0.3708884808642175
11 0.3311370444648406
12 0.3066494017458328
13 0.2700009365444598
14 0.24589617331238353
15 0.22188837427998442
16 0.2061799835518498
17 0.18280285209074348
18 0.17726460356465387
19 0.1569375036203343
20 0.14500575695577486
21 0.12596051251072712
22 0.1192762791024297
23 0.11520477810688794
24 0.11179160611594424
25 0.10666920942113832


resnet data aug

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import time

class ResidualBlock(nn.Module):
    def __init__(self,in_channels,out_channels,stride=1):
        super(ResidualBlock,self).__init__()
        self.conv1=nn.Conv2d(in_channels,out_channels,3,stride,1,bias=False)
        self.bn1=nn.BatchNorm2d(out_channels)
        self.conv2=nn.Conv2d(out_channels,out_channels,3,1,1,bias=False)
        self.bn2=nn.BatchNorm2d(out_channels)
        self.shortcut=nn.Sequential()
        if stride!=1 or in_channels!=out_channels:
            self.shortcut=nn.Sequential(
                nn.Conv2d(in_channels,out_channels,1,stride,bias=False),
                nn.BatchNorm2d(out_channels)
            )
    def forward(self,x):
        out=torch.relu(self.bn1(self.conv1(x)))
        out=self.bn2(self.conv2(out))
        out+=self.shortcut(x)
        return torch.relu(out)

class ResNet10(nn.Module):
    def __init__(self):
        super(ResNet10,self).__init__()
        self.in_channels=16
        self.conv1=nn.Conv2d(3,16,3,1,1,bias=False)
        self.bn1=nn.BatchNorm2d(16)
        self.stage1=self.make_stage(16,4,1)
        self.stage2=self.make_stage(32,3,2)
        self.stage3=self.make_stage(64,3,2)
        self.avgpool=nn.AdaptiveAvgPool2d((1,1))
        self.fc=nn.Linear(64,10)
    def make_stage(self,out_channels,num_blocks,stride):
        layers=[]
        strides=[stride]+[1]*(num_blocks-1)
        for s in strides:
            layers.append(ResidualBlock(self.in_channels,out_channels,s))
            self.in_channels=out_channels
        return nn.Sequential(*layers)
    def forward(self,x):
        out=torch.relu(self.bn1(self.conv1(x)))
        out=self.stage1(out)
        out=self.stage2(out)
        out=self.stage3(out)
        out=self.avgpool(out)
        out=out.view(out.size(0),-1)
        return self.fc(out)

transform=transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32,padding=4),
    transforms.ToTensor(),
    transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))
])

trainset=torchvision.datasets.CIFAR10('./data',train=True,download=True,transform=transform)
testset=torchvision.datasets.CIFAR10('./data',train=False,download=True,
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))
    ])
)

trainloader=torch.utils.data.DataLoader(trainset,batch_size=128,shuffle=True)
testloader=torch.utils.data.DataLoader(testset,batch_size=128,shuffle=False)

device=torch.device("cuda" if torch.cuda.is_available() else "cpu")
model=ResNet10().to(device)

criterion=nn.CrossEntropyLoss()
optimizer=optim.Adam(model.parameters(),lr=0.001)

epochs=300
start=time.time()

for epoch in range(epochs):
    loss_value=0
    for images,labels in trainloader:
        images,labels=images.to(device),labels.to(device)
        optimizer.zero_grad()
        outputs=model(images)
        loss=criterion(outputs,labels)
        loss.backward()
        optimizer.step()
        loss_value+=loss.item()
    print(epoch+1,loss_value/len(trainloader))

print(time.time()-start)

correct=0
total=0
model.eval()

with torch.no_grad():
    for images,labels in testloader:
        images,labels=images.to(device),labels.to(device)
        outputs=model(images)
        _,predicted=torch.max(outputs,1)
        total+=labels.size(0)
        correct+=(predicted==labels).sum().item()

print(100*correct/total)


KeyboardInterrupt: 