In [34]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

import torch
import torch.nn as nn
from torch.optim import Adam, SGD
import torch.optim.lr_scheduler as scheduler
from torch.utils.data import random_split,DataLoader

import torchvision
from torchvision.datasets import CIFAR10,CIFAR100
import torchvision.transforms as transforms
from torchvision.transforms import ToTensor,Normalize

In [35]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)
print(torch.cuda.current_device())
print(torch.cuda.device_count())
print(torch.cuda.get_device_name(0))
print(torch.cuda.device(0))

cuda
0
1
GeForce RTX 3090
<torch.cuda.device object at 0x0000019F18600C48>


In [36]:
USE_CUDA = torch.cuda.is_available()
print(USE_CUDA)

device = torch.device('cuda:0' if USE_CUDA else 'cpu')
print('학습을 진행하는 기기:',device)

True
학습을 진행하는 기기: cuda:0


In [37]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

In [38]:
ds10=CIFAR10(root="./data",download=True,train=True,transform=transform)
ts10=CIFAR10(root="./data",download=True,train=False,transform=transform)

Files already downloaded and verified
Files already downloaded and verified


In [39]:
ds10[0]

(tensor([[[-0.5373, -0.6627, -0.6078,  ...,  0.2392,  0.1922,  0.1608],
          [-0.8745, -1.0000, -0.8588,  ..., -0.0353, -0.0667, -0.0431],
          [-0.8039, -0.8745, -0.6157,  ..., -0.0745, -0.0588, -0.1451],
          ...,
          [ 0.6314,  0.5765,  0.5529,  ...,  0.2549, -0.5608, -0.5843],
          [ 0.4118,  0.3569,  0.4588,  ...,  0.4431, -0.2392, -0.3490],
          [ 0.3882,  0.3176,  0.4039,  ...,  0.6941,  0.1843, -0.0353]],
 
         [[-0.5137, -0.6392, -0.6235,  ...,  0.0353, -0.0196, -0.0275],
          [-0.8431, -1.0000, -0.9373,  ..., -0.3098, -0.3490, -0.3176],
          [-0.8118, -0.9451, -0.7882,  ..., -0.3412, -0.3412, -0.4275],
          ...,
          [ 0.3333,  0.2000,  0.2627,  ...,  0.0431, -0.7569, -0.7333],
          [ 0.0902, -0.0353,  0.1294,  ...,  0.1608, -0.5137, -0.5843],
          [ 0.1294,  0.0118,  0.1137,  ...,  0.4431, -0.0745, -0.2784]],
 
         [[-0.5059, -0.6471, -0.6627,  ..., -0.1529, -0.2000, -0.1922],
          [-0.8431, -1.0000,

In [40]:
tr10,vs10,_=random_split(ds10,[10000,10000,30000],torch.manual_seed(1))

In [41]:
class_count={}
for i in tr10:
    if i[1] not in class_count:
        class_count[i[1]]=0
    class_count[i[1]]+=1
print(class_count)

{0: 1010, 1: 1011, 6: 978, 3: 1002, 4: 1011, 8: 1048, 7: 998, 2: 999, 5: 982, 9: 961}


In [42]:
class MyCNN(nn.Module):
    def __init__(self):
        super(MyCNN, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(in_channels=3,out_channels=6,kernel_size=5,stride=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
            )
        self.layer2 = nn.Sequential(
            nn.Conv2d(in_channels=6,out_channels=16,kernel_size=5,stride=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
            )
        self.layer3 = nn.Sequential(
            nn.Linear(16*5*5, 120),
            nn.ReLU(),
            nn.Linear(120,84),
            nn.ReLU(),
            nn.Linear(84,10),
            )
        
    def forward(self,x):
        out = self.layer1(x)
        out = self.layer2(out)
        #out = out.view(out.size(0),-1) #Flatten feature mapes to one-dimension vector
        out = out.view(-1,16*5*5)
        out = self.layer3(out)
        return out

myCnn=MyCNN().to(device)

In [43]:
learning_rate=5e-4
training_epochs=75
batch_size=4

In [44]:
obj=nn.CrossEntropyLoss().to(device)
opt=Adam(myCnn.parameters(),lr=0.001)

In [45]:
sch=scheduler.StepLR(optimizer=opt,step_size=40,gamma=0.1,verbose=True)

Adjusting learning rate of group 0 to 1.0000e-03.


In [46]:
tr10_loader=DataLoader(dataset=tr10,
                      batch_size=batch_size,
                      shuffle=True,
                      drop_last=True)

vs10_loader=DataLoader(dataset=vs10,
                      batch_size=batch_size,
                      shuffle=False)

In [47]:
tr10_batch_num=len(tr10_loader)
tr10_batch_num

2500

In [48]:
vs10_batch_num=len(vs10_loader)
vs10_batch_num

2500

In [49]:
training_loss_history=[]
validation_loss_history=[]

for epoch in range(training_epochs):
    training_loss=0.0
    validation_loss=0.0
    for X,y in tr10_loader:
        X=X.to(device)
        y=y.to(device)
        
        opt.zero_grad()
        output=myCnn(X) 
        cost=obj(output,y)
        cost.backward()
        opt.step()
        
        training_loss+=cost.item()/batch_size
        
    with torch.no_grad():
        for X,y in vs10_loader:
            X=X.to(device)
            y=y.to(device)
            
            output=myCnn(X)
            cost=obj(output,y)
            
            validation_loss+=cost.item()/batch_size
            
#    if epoch==39:        
#        sch.step()        
    training_loss_history.append(training_loss/(10000/batch_size))
    validation_loss_history.append(validation_loss/(10000/batch_size))
    print("[ Epoch: ",epoch+1," loss :",training_loss/(10000/batch_size)," validation :",validation_loss/(10000/batch_size),"]")

[ Epoch:  1  loss : 1110.397786334157  validation : 934.6314012482762 ]
[ Epoch:  2  loss : 935.4422144219279  validation : 847.9473419934511 ]
[ Epoch:  3  loss : 854.4651643186808  validation : 768.4901445321739 ]


KeyboardInterrupt: 

In [None]:
sch.get_last_lr()

In [None]:
df1=pd.Series(training_loss)
df1

In [None]:
ts10_loader=DataLoader(dataset=ts10,
                      batch_size=batch_size,
                      shuffle=False, num_workers=2)

In [None]:
correct = 0
total = 0
# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
    for data in ts10_loader:
        images, labels = data[0].to(device), data[1].to(device)
        # calculate outputs by running images through the network
        outputs = myCnn(images)
        # the class with the highest energy is what we choose as prediction
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))
