In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import sys
import os

# 모듈이 있는 디렉토리의 절대 경로 가져오기
module_dir = '/content/drive/MyDrive/pythondeep'

# Python 경로에 디렉토리 추가
sys.path.append(module_dir)

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from scipy.io import loadmat
from torch.optim.lr_scheduler import StepLR
import matplotlib.pyplot as plt
from model_ import boReEsNet, ReEsNet # 외부 설정한 모델 불러옴

# 파일 경로 설정
file_path_Xt = '/content/drive/My Drive/pythondeep/data/XTrain_RSRP.mat'
file_path_Yt = '/content/drive/My Drive/pythondeep/data/YTrain_RSRP.mat'
file_path_Xv = '/content/drive/My Drive/pythondeep/data/XValidation_RSRP.mat'
file_path_Yv = '/content/drive/My Drive/pythondeep/data/YValidation_RSRP.mat'

# .mat 파일 읽기
data_Xt = loadmat(file_path_Xt)
data_Yt = loadmat(file_path_Yt)
data_Xv = loadmat(file_path_Xv)
data_Yv = loadmat(file_path_Yv)

# 데이터 불러오기
X_train = torch.tensor(data_Xt['XTrain_RSRP'], dtype=torch.float32)
Y_train = torch.tensor(data_Yt['YTrain_RSRP'], dtype=torch.float32)

X_val = torch.tensor(data_Xv['XValidation_RSRP'], dtype=torch.float32)
Y_val = torch.tensor(data_Yv['YValidation_RSRP'], dtype=torch.float32)


In [4]:
# PyTorch Dataset과 DataLoader 설정
X_train = X_train.permute(3,2,0,1)
Y_train = Y_train.permute(3,2,0,1)
X_val = X_val.permute(3,2,0,1)
Y_val = Y_val.permute(3,2,0,1)

# data shape 확인
print(X_train.shape)
print(Y_train.shape)
print(X_val.shape)
print(Y_val.shape)

class CustomDataset(Dataset):
    def __init__(self, X, Y):
        self.X = X
        self.Y = Y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.Y[idx]

# DataLoader 생성
batch_size = 128
train_dataset = CustomDataset(X_train, Y_train)
val_dataset = CustomDataset(X_val, Y_val)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

# 모델 초기화
input_channels = X_train.shape[1]  # 입력 채널 크기
num_filters = 8                    # N_filter 값
num_classes = Y_train.shape[1] if len(Y_train.shape) > 1 else 1  # 출력 크기
model = boReEsNet(input_channels=input_channels, num_filters=num_filters, num_classes=num_classes)
# model2 = ReEsNet(input_channels = input_channels, num_filters=num_filters, num_classes=num_classes)

# 손실 함수와 옵티마이저 설정
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=0.001)  # L2Regularization = weight_decay

# 학습률 스케줄러 설정
scheduler = StepLR(optimizer, step_size=10, gamma=0.5)  # LearnRateDropPeriod, LearnRateDropFactor

# 학습 설정
# num_epochs = 100
num_epochs = 5
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
# 학습 루프
train_loss_history = []
val_loss_history = []

torch.Size([124375, 2, 24, 2])
torch.Size([124375, 2, 72, 14])
torch.Size([625, 2, 24, 2])
torch.Size([625, 2, 72, 14])


In [5]:
# 두 모델 초기화
model1 = boReEsNet(input_channels=2, num_filters=8, num_classes=2).to(device)
model2 = ReEsNet(input_channels=2, num_filters=8, num_classes=2).to(device)

# 손실 함수
criterion = nn.MSELoss()

# 옵티마이저
optimizer = torch.optim.Adam(
    list(model1.parameters()) + list(model2.parameters()),  # 두 모델의 파라미터를 함께 최적화
    lr=1e-3,
    weight_decay=1e-4
)

# 학습률 스케줄러
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5)

# 손실 기록
train_loss_history1, val_loss_history1 = [], []
train_loss_history2, val_loss_history2 = [], []

# 학습 루프
for epoch in range(num_epochs):
    model1.train()
    model2.train()
    running_loss1, running_loss2 = 0.0, 0.0

    for i, (inputs, targets) in enumerate(train_loader):
        inputs, targets = inputs.to(device), targets.to(device)

        # Forward, loss calculation
        outputs1 = model1(inputs)
        outputs2 = model2(inputs)
        loss1 = criterion(outputs1, targets)
        loss2 = criterion(outputs2, targets)

        # Backward, optimization
        optimizer.zero_grad()
        (loss1 + loss2).backward()
        optimizer.step()

        running_loss1 += loss1.item()
        running_loss2 += loss2.item()

    # Epoch당 학습 손실 계산
    train_loss1 = running_loss1 / len(train_loader)
    train_loss2 = running_loss2 / len(train_loader)
    train_loss_history1.append(train_loss1)
    train_loss_history2.append(train_loss2)

    # 검증 루프
    model1.eval()
    model2.eval()
    val_loss1, val_loss2 = 0.0, 0.0
    with torch.no_grad():
        for val_inputs, val_targets in val_loader:
            val_inputs, val_targets = val_inputs.to(device), val_targets.to(device)

            # Validation loss
            val_outputs1 = model1(val_inputs)
            val_outputs2 = model2(val_inputs)
            val_loss1 += criterion(val_outputs1, val_targets).item()
            val_loss2 += criterion(val_outputs2, val_targets).item()

    # Epoch당 검증 손실 계산
    val_loss1 /= len(val_loader)
    val_loss2 /= len(val_loader)
    val_loss_history1.append(val_loss1)
    val_loss_history2.append(val_loss2)

    # Epoch마다 결과 출력
    print(f"Epoch [{epoch+1}/{num_epochs}]")
    print(f"Model 1 - Train Loss: {train_loss1:.4f}, Validation Loss: {val_loss1:.4f}")
    print(f"Model 2 - Train Loss: {train_loss2:.4f}, Validation Loss: {val_loss2:.4f}")

    # 학습률 스케줄러 업데이트
    scheduler.step()

# 학습된 모델 저장
torch.save(model1.state_dict(), 'boReEsNet12.pth')
torch.save(model2.state_dict(), 'ReEsNet12.pth')


Epoch [1/5]
Model 1 - Train Loss: 0.0144, Validation Loss: 0.0056
Model 2 - Train Loss: 0.0190, Validation Loss: 0.0174
Epoch [2/5]
Model 1 - Train Loss: 0.0057, Validation Loss: 0.0041
Model 2 - Train Loss: 0.0083, Validation Loss: 0.0145
Epoch [3/5]
Model 1 - Train Loss: 0.0050, Validation Loss: 0.0067
Model 2 - Train Loss: 0.0076, Validation Loss: 0.0233
Epoch [4/5]
Model 1 - Train Loss: 0.0049, Validation Loss: 0.0033
Model 2 - Train Loss: 0.0073, Validation Loss: 0.0189
Epoch [5/5]
Model 1 - Train Loss: 0.0048, Validation Loss: 0.0037
Model 2 - Train Loss: 0.0071, Validation Loss: 0.0150


- out of memory가 났다 그래서 데이터 shape를 일치시키는 데 우여곡절이 있엇음
- 학습 로스가 에포크 초반에도 줄지 않아 학습이 안되는 것처럼 보엿음
- 데이터 쉐입이 매트랩이랑 토치랑 달랏고
- 채널이 2엿는데 24로 넣었고
- dilated에서 패딩 세임에ㅓㅅ 2*dilation 수정
- transpose쪽 stride도
- 배치 사이즈 변경해보고
- 배치 정규화 추가해보고
- 에폭 줄이고
- 스텝 사이즈 줄이고
- 왜냐면 오버피팅 같아서 학습은 로스 주는데 검증은 늘어남