# 방향
- 이중분류 할것이다: 사진이 주어졌을때 이게 개냐 ~ 아니냐 ~

# 할거
- 3개의 함수 : 학습, 검증 및 테스트, 예측
- 모델 : 층, 층별 노드, 퍼셉트론 수 조절한게 있어야 함

# 30 * 30 으로 resize 된 사진으로 모델 돌림

In [2]:
from PIL import Image
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_openml 
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
import torch
import torch.nn.functional as F 
import torch.nn as nn 
from torch.utils.data import Dataset, DataLoader, random_split
import torch.optim as optim 
import torchmetrics.functional as metrics

In [3]:
cat_file = 'resize_cat_df.csv'
dog_file = 'resize_dog_df.csv'
cat_data = pd.read_csv(cat_file)
dog_data = pd.read_csv(dog_file)
data = pd.concat([cat_data, dog_data])

In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 891 entries, 0 to 438
Columns: 901 entries, Unnamed: 0 to 899
dtypes: int64(901)
memory usage: 6.1 MB


In [9]:
cat_data.shape

(452, 901)

In [10]:
dog_data.shape

(439, 901)

In [5]:
data.shape

(891, 901)

In [7]:
col_list = []
for i in range(1, 902):
    col_list.append(f"pixel{i}")
data.columns = col_list
data.columns

Index(['pixel1', 'pixel2', 'pixel3', 'pixel4', 'pixel5', 'pixel6', 'pixel7',
       'pixel8', 'pixel9', 'pixel10',
       ...
       'pixel892', 'pixel893', 'pixel894', 'pixel895', 'pixel896', 'pixel897',
       'pixel898', 'pixel899', 'pixel900', 'pixel901'],
      dtype='object', length=901)

In [8]:
data.index

Index([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,
       ...
       429, 430, 431, 432, 433, 434, 435, 436, 437, 438],
      dtype='int64', length=891)

In [11]:
type_list = []
for i in range(cat_data.shape[0]):
    type_list.append('cat')
for i in range(dog_data.shape[0]):
    type_list.append('dog')
# type_list

data['type'] = type_list

In [12]:
data.columns

Index(['pixel1', 'pixel2', 'pixel3', 'pixel4', 'pixel5', 'pixel6', 'pixel7',
       'pixel8', 'pixel9', 'pixel10',
       ...
       'pixel893', 'pixel894', 'pixel895', 'pixel896', 'pixel897', 'pixel898',
       'pixel899', 'pixel900', 'pixel901', 'type'],
      dtype='object', length=902)

In [13]:
data.shape

(891, 902)

# 피처, 타겟 분리 & 정규화

In [14]:
feature = data[data.columns[:-1]]
target = data[data.columns[-1]]

In [15]:
print(f"feature.shape: {feature.shape}, target.shape: {target.shape}")

feature.shape: (891, 901), target.shape: (891,)


In [16]:
# cat을 0으로 dog를 1로 변경
target.replace('cat',0, inplace = True)
target.replace('dog',1, inplace = True)
scaled_target = target

In [17]:
# 피처를 정규화 시킴
scaled_feature = feature/512.

In [18]:
scaled_feature.shape

(891, 901)

# 학습 데이터셋 준비 (학습용, 검증용, 테스트용)

In [19]:
# 사용자 정의 데이터셋 및 전체 데이터셋 준비

class all_dataset(Dataset):
    def __init__(self, feature_data, target_data):
        super().__init__()  # 어미를 상속받았다
        # 해당 클래스의 인스턴스 초기화함
        self.data = torch.FloatTensor(feature_data)
        self.target = torch.FloatTensor(target_data)
        
    def __len__(self):
        return self.target.shape[0]
    
    def __getitem__(self, index):
        return self.data[index], self.target[index]

In [20]:
# 전체 DataSet 생성
feature_np = np.array(scaled_feature)
target_np = np.array(scaled_target)

animal_dataset = all_dataset(feature_np, target_np)

print(f"feature.shape: {animal_dataset.data.shape}, target.shape: {animal_dataset.target.shape}")

feature.shape: torch.Size([891, 901]), target.shape: torch.Size([891])


In [21]:
# 학습용, 검증용, 테스트용 데이터셋 분리

In [22]:
seed_generator = torch.Generator().manual_seed(11)

train_size, valid_size, test_size = 0.7, 0.1, 0.2

train_dataset, valid_dataset, test_dataset = random_split(animal_dataset, [train_size, valid_size, test_size], generator = seed_generator)

In [23]:
print(f"train dataset length : {len(train_dataset)}")
print(f"valid dataset length : {len(valid_dataset)}")
print(f"test dataset length : {len(test_dataset)}")

train dataset length : 624
valid dataset length : 89
test dataset length : 178


# 데이터 로더 생성

In [24]:
batch = 5

train_loader = DataLoader(train_dataset, batch_size = batch)
val_loader = DataLoader(valid_dataset, batch_size = batch)
test_loader = DataLoader(test_dataset, batch_size = batch)

print(train_loader, val_loader, test_loader, sep='\n\n')

<torch.utils.data.dataloader.DataLoader object at 0x0000021710070490>

<torch.utils.data.dataloader.DataLoader object at 0x0000021710070E50>

<torch.utils.data.dataloader.DataLoader object at 0x00000217100756A0>


# 모델 준비

In [28]:
# 전방향 학습용 모델 클래스를 생성해보자
# 이중 분류  | 입력 피처 수 : 512 * 512  |  출력 피처 수 : 1 (0 or 1)

class myModel(nn.Module):
    def __init__(self, input_num, output_num):
        super().__init__()
        self.layer1 = nn.Linear(input_num, 800)
        self.layer2 = nn.Linear(800, 600)
        self.layer3 = nn.Linear(600, 400)
        self.layer4 = nn.Linear(400, 300)
        self.layer5 = nn.Linear(300, 200)
        self.layer6 = nn.Linear(100, 50)
        self.layer7 = nn.Linear(50, output_num)
        self.relu = nn.ReLU()
    
    def forward(self, x):
        x = self.layer1(x)
        x = self.relu(x)
        x = self.layer2(x)
        x = self.relu(x)
        x = self.layer3(x)
        x = self.relu(x)
        x = self.layer4(x)
        x = self.relu(x)
        x = self.layer5(x)
        x = self.relu(x)
        x = self.layer6(x)
        x = self.relu(x)
        x = self.layer7(x)
        return x

### Q : 은닉층이면 relu 밖에 안됨? sigmoid,,, 흠,,, 왜 활성함수로 저거를 쓴 이유가 있어야할듯

# 함수 정의 : training, testing, predicting

In [29]:
# 디바이스 설정
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cpu


In [31]:
# 학습 모델 인스턴스 생성
dogmodel = myModel(feature_np.shape[1], 2).to(device)
dogmodel

myModel(
  (layer1): Linear(in_features=901, out_features=800, bias=True)
  (layer2): Linear(in_features=800, out_features=600, bias=True)
  (layer3): Linear(in_features=600, out_features=400, bias=True)
  (layer4): Linear(in_features=400, out_features=300, bias=True)
  (layer5): Linear(in_features=300, out_features=200, bias=True)
  (layer6): Linear(in_features=100, out_features=50, bias=True)
  (layer7): Linear(in_features=50, out_features=2, bias=True)
  (relu): ReLU()
)

In [35]:
dogmodel.parameters()
for param in dogmodel.parameters():
    print(len(param))

800
800
600
600
400
400
300
300
200
200
50
50
2
2


In [36]:
# 최적화 인스턴스 생성
optimizer = optim.SGD(dogmodel.parameters())

### Q. 왜 이 optimizer를 썻는가?

In [40]:
# 손실함수
loss_fn = nn.BCELoss().to(device)

# https://pytorch.org/docs/stable/generated/torch.nn.BCELoss.html#torch.nn.BCELoss
# Creates a criterion that measures the Binary Cross Entropy between the target and the input probabilities:

In [41]:
# 학습 횟수
epoch = 10

In [43]:
# 분류 갯수
class_num = 2

In [45]:
train_loader

<torch.utils.data.dataloader.DataLoader at 0x21710070490>

In [46]:
# 학습용 함수

def training():
    myModel.train()
    
    loss_list = []
    evaluation = {'accuracy_list':[], 'recall_list':[], 'precision_list':[], 'f1_score_list':[]}
    for (f, t) in train_loader:
        feature, target = f.to(device), t.to(device)
        
        # 학습
        train_predict = myModel(feature)
        
        # 손실 계산
        loss = loss_fn(train_predict, target)
        loss_list.append(loss)
        
        # 성능 평가
        accuracy = metrics.accuracy(train_predict, target, task = 'binary', num_classes = class_num)
        recall = metrics.recall(train_predict, target, task = 'binary', num_classes = class_num)
        precision = metrics.precision(train_predict, target, task = 'binary', num_classes = class_num)
        f1_score = metrics.f1_score(train_predict, target, task = 'binary', num_classes = class_num)
        
        evaluation['accuracy_list'].append(accuracy)
        evaluation['recall_list'].append(recall)
        evaluation['precision_list'].append(precision)
        evaluation['f1_score_list'].append(f1_score)
        
        # 최적화 (역방향 학습 + 업데이트)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        print('  학  습  중')
        
    loss_total = (sum(loss_list) / len(loss_list))
    accuracy_total= (sum(evaluation['accuracy_list']) / len(evaluation['accuracy_list']))
    recall_total = (sum(evaluation['recall_list'])/len(evaluation['recall_list']))
    precision_total = (sum(evaluation['precision_list'])/len(evaluation['precision_list']))
    f1_score_total = (sum(evaluation['f1_score_list'])/len(evaluation['f1_score_list']))
    
    print('[ TRAINING - Total ]')
    print(f"loss : {loss_total} \naccuracy : {accuracy_total} \nrecall : {recall_total} \nprecision : {precision_total} \nf1_score : {f1_score_total}")
    
    return loss_list, evaluation, loss_total, accuracy_total, recall_total, precision_total, f1_score_total

### Q for문 안에 저 to device는 왜하는거?

In [ ]:
# 검증용 함수

def testing():
    myModel.eval()
    
    loss_list = []
    evaluation = {'accuracy_list':[], 'recall_list':[], 'precision_list':[], 'f1_score_list':[]}
    for (f, t) in train_loader:
        feature, target = f.to(device), t.to(device)
        
        # 학습
        train_predict = myModel(feature)
        
        # 손실 계산
        loss = loss_fn(train_predict, target)
        loss_list.append(loss)
        
        # 성능 평가
        accuracy = metrics.accuracy(train_predict, target, task = 'binary', num_classes = class_num)
        recall = metrics.recall(train_predict, target, task = 'binary', num_classes = class_num)
        precision = metrics.precision(train_predict, target, task = 'binary', num_classes = class_num)
        f1_score = metrics.f1_score(train_predict, target, task = 'binary', num_classes = class_num)
        
        evaluation['accuracy_list'].append(accuracy)
        evaluation['recall_list'].append(recall)
        evaluation['precision_list'].append(precision)
        evaluation['f1_score_list'].append(f1_score)
        
        
    loss_total = (sum(loss_list) / len(loss_list))
    accuracy_total= (sum(evaluation['accuracy_list']) / len(evaluation['accuracy_list']))
    recall_total = (sum(evaluation['recall_list'])/len(evaluation['recall_list']))
    precision_total = (sum(evaluation['precision_list'])/len(evaluation['precision_list']))
    f1_score_total = (sum(evaluation['f1_score_list'])/len(evaluation['f1_score_list']))
    
    print('[ TESTING - Total ]')
    print(f"loss : {loss_total} \naccuracy : {accuracy_total} \nrecall : {recall_total} \nprecision : {precision_total} \nf1_score : {f1_score_total}")
    
    return loss_list, evaluation, loss_total, accuracy_total, recall_total, precision_total, f1_score_total

In [ ]:
# 예측용 함수
def predicting():
    myModel.eval()
    
    loss_list = []
    evaluation = {'accuracy_list':[], 'recall_list':[], 'precision_list':[], 'f1_score_list':[]}
    for (f, t) in train_loader:
        feature, target = f.to(device), t.to(device)
        
        # 학습
        train_predict = myModel(feature)
        
        # 손실 계산
        loss = loss_fn(train_predict, target)
        loss_list.append(loss)
        
        # 성능 평가
        accuracy = metrics.accuracy(train_predict, target, task = 'binary', num_classes = class_num)
        recall = metrics.recall(train_predict, target, task = 'binary', num_classes = class_num)
        precision = metrics.precision(train_predict, target, task = 'binary', num_classes = class_num)
        f1_score = metrics.f1_score(train_predict, target, task = 'binary', num_classes = class_num)
        
        evaluation['accuracy_list'].append(accuracy)
        evaluation['recall_list'].append(recall)
        evaluation['precision_list'].append(precision)
        evaluation['f1_score_list'].append(f1_score)
        
        
    loss_total = (sum(loss_list) / len(loss_list))
    accuracy_total= (sum(evaluation['accuracy_list']) / len(evaluation['accuracy_list']))
    recall_total = (sum(evaluation['recall_list'])/len(evaluation['recall_list']))
    precision_total = (sum(evaluation['precision_list'])/len(evaluation['precision_list']))
    f1_score_total = (sum(evaluation['f1_score_list'])/len(evaluation['f1_score_list']))
    
    print('[ TESTING - Total ]')
    print(f"loss : {loss_total} \naccuracy : {accuracy_total} \nrecall : {recall_total} \nprecision : {precision_total} \nf1_score : {f1_score_total}")
    
    return loss_list, evaluation, loss_total, accuracy_total, recall_total, precision_total, f1_score_total

# 학습 에포크별 모델 저장

# 학습 진행 및 학습 결과 저장