In [107]:
import pandas as pd
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchmetrics.functional as metrics
from torch.utils.data import Dataset, DataLoader, random_split
import cv2
import matplotlib.pyplot as plt
from torchvision import transforms
from torchvision.datasets import ImageFolder
import tqdm


In [108]:
image_path = 'Image/'


In [109]:
trans = transforms.Compose([
                    transforms.Resize((64,32)),
                    transforms.ToTensor(),
                    transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))
                    ])

In [110]:
datasets = ImageFolder(root=image_path, transform = trans)
datasets

Dataset ImageFolder
    Number of datapoints: 756977
    Root location: Image/
    StandardTransform
Transform: Compose(
               Resize(size=(64, 32), interpolation=bilinear, max_size=None, antialias=True)
               ToTensor()
               Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
           )

In [111]:
seed_gen=torch.Generator().manual_seed(42)

trainDS, validDS, testDS = random_split(datasets,
                                        [0.7, 0.1, 0.2],
                                        generator=seed_gen)

In [112]:
len(trainDS), len(validDS), len(testDS)


(529884, 75698, 151395)

In [113]:
TRAIN_DL = DataLoader(trainDS, batch_size=9)
VALID_DL = DataLoader(validDS, batch_size=9)
TEST_DL = DataLoader(testDS, batch_size=9)


In [114]:
device = 'mps'


# 원본 사이즈 모델

In [115]:


# class CNN(nn.Module):
#     def __init__(self, num_classes):
#         super(CNN, self).__init__()
#         self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1)
#         self.relu1 = nn.ReLU()
#         self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)  # Output size: (32, 36, 64)

#         self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
#         self.relu2 = nn.ReLU()
#         self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)  # Output size: (64, 18, 32)

#         self.fc1 = nn.Linear(64 * 18 * 32, 128)  #After pool2: torch.Size([9, 64, 18, 32]) 64, 18, 32 꼴 하기
#         self.relu3 = nn.ReLU()
#         self.fc2 = nn.Linear(128, num_classes)

#     def forward(self, x):
#         x = self.conv1(x)
#         x = self.relu1(x)
#         x = self.pool1(x)
#         print("After pool1:", x.shape)  # Debug: print the shape


#         x = self.conv2(x)
#         x = self.relu2(x)
#         x = self.pool2(x)
#         print("After pool2:", x.shape)  # Debug: print the shape

#         x = x.view(x.size(0), -1)
#         print("Before FC:", x.shape)  # Debug: print the shape

#         x = self.fc1(x)
#         x = self.relu3(x)
#         x = self.fc2(x)
#         return x


# resize 64*32

In [116]:


class CNN(nn.Module):
    def __init__(self, num_classes):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)  # Output size: (32, 36, 64)

        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)  # Output size: (64, 18, 32)

        self.fc1 = nn.Linear(64 * 16 * 8, 128)  #After pool2: torch.Size([9, 64, 16, 8]) 64 * 16* 8 꼴 하기
        self.relu3 = nn.ReLU()
        self.fc2 = nn.Linear(128, num_classes)

    def forward(self, x):
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.pool1(x)
        # print("After pool1:", x.shape)  # Debug: print the shape


        x = self.conv2(x)
        x = self.relu2(x)
        x = self.pool2(x)
        # print("After pool2:", x.shape)  # Debug: print the shape

        x = x.view(x.size(0), -1)
        # print("Before FC:", x.shape)  # Debug: print the shape

        x = self.fc1(x)
        x = self.relu3(x)
        x = self.fc2(x)
        return x


In [117]:
num_epochs = 20
learning_rate = 0.001
model = CNN(num_classes=5).to(device)  # Assuming you have 10 classes
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)

In [118]:
from tqdm import tqdm

for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0
    train_accuracy = 0.0
    train_progress_bar = tqdm(TRAIN_DL, desc=f"Training Epoch {epoch+1}/{num_epochs}", leave=False)

    for inputs, labels in train_progress_bar:
        inputs = inputs.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        batch_loss = loss.item()
        train_loss += batch_loss
        _, predicted = torch.max(outputs.data, 1)
        batch_accuracy = (predicted == labels).sum().item() / labels.size(0)
        train_accuracy += batch_accuracy

        train_progress_bar.set_postfix(loss=batch_loss, acc=batch_accuracy)
        torch.save(model.state_dict(), 'dancer.pth')  # Save the model after each batch

    model.eval()
    valid_loss = 0.0
    valid_accuracy = 0.0
    valid_progress_bar = tqdm(VALID_DL, desc=f"Validation Epoch {epoch+1}/{num_epochs}", leave=False)

    with torch.no_grad():
        for inputs, labels in valid_progress_bar:
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            batch_loss = loss.item()
            valid_loss += batch_loss
            _, predicted = torch.max(outputs.data, 1)
            batch_accuracy = (predicted == labels).sum().item() / labels.size(0)
            valid_accuracy += batch_accuracy

            valid_progress_bar.set_postfix(loss=batch_loss, acc=batch_accuracy)

    print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss/len(TRAIN_DL):.4f}, Train Accuracy: {train_accuracy/len(TRAIN_DL):.4f}, Valid Loss: {valid_loss/len(VALID_DL):.4f}, Valid Accuracy: {valid_accuracy/len(VALID_DL):.4f}')


Training Epoch 1/20:   0%|          | 0/58876 [00:00<?, ?it/s]

                                                                                                   

Epoch [1/20], Train Loss: 0.6015, Train Accuracy: 0.7674, Valid Loss: 0.3590, Valid Accuracy: 0.8686


                                                                                                 

KeyboardInterrupt: 

In [None]:
model.eval()
test_loss = 0.0
test_accuracy = 0.0

with torch.no_grad():
    for inputs, labels in TEST_DL:
        inputs = inputs.to(device)
        labels = labels.to(device)
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        test_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        test_accuracy += (predicted == labels).sum().item() / labels.size(0)

print(f'Test Loss: {test_loss/len(TEST_DL):.4f}, Test Accuracy: {test_accuracy/len(TEST_DL):.4f}')