In [1]:
# ----------------------------------------------------------------------------------------------------
# 라이브러리 목록

# 기본 라이브러리 
import re
import pandas as pd

# torch 라이브러리 
import torch
from torch.utils.data import DataLoader, TensorDataset

# pandas 설정
pd.set_option('display.max_rows', None)  # 모든 행 출력
pd.set_option('display.max_columns', None)  # 모든 열 출력

# 개인 라이브러리  
from preparation_for_analysis.show_window import DataVisualizer
from preparation_for_analysis.model_3 import MLP

# preparation_for_analysis 설정
visualizer = DataVisualizer(line="=", length=100, start="#")
# ----------------------------------------------------------------------------------------------------

In [2]:
# ----------------------------------------------------------------------------------------------------
# best_learning_encoding 불러오기 
best_learning_encoding = pd.read_csv(
    "C:\\Users\\ssalt\\Documents\\ev_price_predict_project\\data\\train\\A_df\\c_learning\\5_best_learning_encoding.csv",
    sep=",",
    header=0
    )

best_learning_encoding = best_learning_encoding.astype("float32")
visualizer.show_df_info(title="best_learning_encoding", df=best_learning_encoding)
# ----------------------------------------------------------------------------------------------------


# Title: best_learning_encoding


# DataFrame Information:

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5398 entries, 0 to 5397
Data columns (total 55 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   EV6         5398 non-null   float32
 1   ID4         5398 non-null   float32
 2   ION5        5398 non-null   float32
 3   ION6        5398 non-null   float32
 4   IONIQ       5398 non-null   float32
 5   KNE         5398 non-null   float32
 6   M3          5398 non-null   float32
 7   MS          5398 non-null   float32
 8   MX          5398 non-null   float32
 9   MY          5398 non-null   float32
 10  Niro        5398 non-null   float32
 11  Q4eT        5398 non-null   float32
 12  RSeTGT      5398 non-null   float32
 13  Soul        5398 non-null   float32
 14  Tay         5398 non-null   float32
 15  TayCT       5398 non-null   float32
 16  TayGTS      5398 non-null   float32
 17  eT          5398 non-null   float32
 18  i3     

In [3]:
# ----------------------------------------------------------------------------------------------------
# best_validation_encoding 불러오기 
best_validation_encoding = pd.read_csv(
    "C:\\Users\\ssalt\\Documents\\ev_price_predict_project\\data\\train\\A_df\\d_validation\\4_best_validation_encoding.csv",
    sep=",",
    header=0
    )

best_validation_encoding = best_validation_encoding.astype("float32")
visualizer.show_df_info(title="best_validation_encoding", df=best_validation_encoding)
# ----------------------------------------------------------------------------------------------------


# Title: best_validation_encoding


# DataFrame Information:

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 750 entries, 0 to 749
Data columns (total 55 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   EV6         750 non-null    float32
 1   ID4         750 non-null    float32
 2   ION5        750 non-null    float32
 3   ION6        750 non-null    float32
 4   IONIQ       750 non-null    float32
 5   KNE         750 non-null    float32
 6   M3          750 non-null    float32
 7   MS          750 non-null    float32
 8   MX          750 non-null    float32
 9   MY          750 non-null    float32
 10  Niro        750 non-null    float32
 11  Q4eT        750 non-null    float32
 12  RSeTGT      750 non-null    float32
 13  Soul        750 non-null    float32
 14  Tay         750 non-null    float32
 15  TayCT       750 non-null    float32
 16  TayGTS      750 non-null    float32
 17  eT          750 non-null    float32
 18  i3     

In [4]:
# ----------------------------------------------------------------------------------------------------
# 훈련 데이터 준비
x_train = best_learning_encoding.iloc[:,:-2]
y_train = best_learning_encoding.iloc[:,-1]
x_test = best_validation_encoding.iloc[:,:-2]
y_test = best_validation_encoding.iloc[:,-1]

# Tensor 변환
x_train_tensor = torch.tensor(x_train.to_numpy()).float()
y_train_tensor = torch.tensor(y_train.to_numpy()).float()
x_test_tensor = torch.tensor(x_test.to_numpy()).float()
y_test_tensor = torch.tensor(y_test.to_numpy()).float()

# y_train_tensor와 y_test_tensor를 (batch_size, 1) 크기로 변환
y_train_tensor = y_train_tensor.view(-1, 1)
y_test_tensor = y_test_tensor.view(-1, 1)

# 데이터셋 및 데이터로더
train_dataset = TensorDataset(x_train_tensor, y_train_tensor)
test_dataset = TensorDataset(x_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=512, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=512, shuffle=False)
# ----------------------------------------------------------------------------------------------------

In [5]:
# ----------------------------------------------------------------------------------------------------
# 모델 객체 선언
model = MLP(continue_logging=False, initalize_weights=True)

# 훈련
train_losses, val_losses, val_metrics = model.train_model(
    train_loader, test_loader, num_epochs=500
)
# ----------------------------------------------------------------------------------------------------

2025-02-25 15:50:36,694 - INFO - Epoch 1/500 - Training Loss: 3.2876, Validation Loss: 3.4601
2025-02-25 15:50:36,695 - INFO - Epoch 1/500 - Validation Metrics: {'r2_score': -49.04241180419922, 'rmse': 4.0178857}
2025-02-25 15:50:36,858 - INFO - Epoch 2/500 - Training Loss: 3.4198, Validation Loss: 3.3457
2025-02-25 15:50:36,858 - INFO - Epoch 2/500 - Validation Metrics: {'r2_score': -46.29267883300781, 'rmse': 3.9059386}
2025-02-25 15:50:36,973 - INFO - Epoch 3/500 - Training Loss: 3.2167, Validation Loss: 3.0041
2025-02-25 15:50:36,973 - INFO - Epoch 3/500 - Validation Metrics: {'r2_score': -38.561153411865234, 'rmse': 3.5724235}
2025-02-25 15:50:37,104 - INFO - Epoch 4/500 - Training Loss: 2.7074, Validation Loss: 2.2320
2025-02-25 15:50:37,105 - INFO - Epoch 4/500 - Validation Metrics: {'r2_score': -23.67421531677246, 'rmse': 2.8213055}
2025-02-25 15:50:37,222 - INFO - Epoch 5/500 - Training Loss: 1.5777, Validation Loss: 0.6168
2025-02-25 15:50:37,226 - INFO - Epoch 5/500 - Valida

In [6]:
# ----------------------------------------------------------------------------------------------------
# 모델 평가
test_loss, metrics = model.evaluate(test_loader)
print(f"Test Loss: {test_loss:.4f}")
print("Test Loss Metrics:", metrics)
# ----------------------------------------------------------------------------------------------------

                                                 

Test Loss: 0.0009
Test Loss Metrics: {'r2_score': 0.9948658347129822, 'rmse': 0.040697105}




In [7]:
# ----------------------------------------------------------------------------------------------------
# 모델 저장
model.save_model()
# ----------------------------------------------------------------------------------------------------

Model saved to C:\Users\ssalt\Documents\ev_price_predict_project\data\train\B_models\c_model_3\best_model.pth


In [8]:
# ----------------------------------------------------------------------------------------------------
# loss 데이터프레임 구성하기 

# 로그 파일 경로
log_file = "C:\\Users\\ssalt\\Documents\\ev_price_predict_project\\data\\train\\B_models\\c_model_3\\training.log"

# 로그 파일 읽기
with open(log_file, 'r') as file:
    log_lines = file.readlines()

# 데이터를 저장할 리스트
log_data = []

# 로그 라인에서 epoch과 rmse 값을 추출하는 정규식 패턴
epoch_pattern = re.compile(r'Epoch (\d+)')
training_loss_pattern = re.compile(r'Training Loss: ([\d\.]+)')
validation_loss_pattern = re.compile(r'Validation Loss: ([\d\.]+)')

# 로그 라인에서 데이터 추출
epoch = None
for line in log_lines:
    epoch_match = epoch_pattern.search(line)
    training_loss_match = training_loss_pattern.search(line)
    validation_loss_match = validation_loss_pattern.search(line)
    
    if epoch_match:
        epoch = int(epoch_match.group(1))
    
    if training_loss_match:
        training_loss = float(training_loss_match.group(1))
        validation_loss = float(validation_loss_match.group(1))
        log_data.append({'Epoch': epoch, 'Training_Loss': training_loss, 'Validation_Loss': validation_loss})

# DataFrame으로 변환
loss = pd.DataFrame(log_data)

# DataFrame 출력
visualizer.show_data(title="loss", data=loss)
# ----------------------------------------------------------------------------------------------------


# Title: loss


# Data:

     Epoch  Training_Loss  Validation_Loss
0        1         3.2876           3.4601
1        2         3.4198           3.3457
2        3         3.2167           3.0041
3        4         2.7074           2.2320
4        5         1.5777           0.6168
5        6         0.4142           0.4160
6        7         0.2417           0.1697
7        8         0.1309           0.0720
8        9         0.0700           0.0449
9       10         0.0428           0.0311
10      11         0.0286           0.0218
11      12         0.0197           0.0155
12      13         0.0142           0.0116
13      14         0.0106           0.0091
14      15         0.0082           0.0075
15      16         0.0068           0.0065
16      17         0.0059           0.0058
17      18         0.0052           0.0053
18      19         0.0048           0.0050
19      20         0.0044           0.0047
20      21         0.0041           0.0045
21      22         0.0040   

In [9]:
# ----------------------------------------------------------------------------------------------------
# loss 저장
loss.to_csv(path_or_buf="C:\\Users\\ssalt\\Documents\\ev_price_predict_project\\data\\train\\A_df\\e_model\\model_3\\1_loss.csv",
            index=False)
# ----------------------------------------------------------------------------------------------------

In [10]:
# ----------------------------------------------------------------------------------------------------
# rmse 데이터프레임 구성하기 

# 로그 파일 경로
log_file = "C:\\Users\\ssalt\\Documents\\ev_price_predict_project\\data\\train\\B_models\\c_model_3\\training.log"

# 로그 파일 읽기
with open(log_file, 'r') as file:
    log_lines = file.readlines()

# 데이터를 저장할 리스트
log_data = []

# 로그 라인에서 epoch과 rmse 값을 추출하는 정규식 패턴
epoch_pattern = re.compile(r'Epoch (\d+)')
rmse_pattern = re.compile(r'Metrics: \{.*?\'rmse\': ([\d\.]+)')

# 로그 라인에서 데이터 추출
epoch = None
for line in log_lines:
    epoch_match = epoch_pattern.search(line)
    rmse_match = rmse_pattern.search(line)
    
    if epoch_match:
        epoch = int(epoch_match.group(1))
    
    if rmse_match:
        rmse = float(rmse_match.group(1))
        log_data.append({'Epoch': epoch, 'RMSE': rmse})

# DataFrame으로 변환
rmse = pd.DataFrame(log_data)

# DataFrame 출력
visualizer.show_data(title="rmse", data=rmse)
# ----------------------------------------------------------------------------------------------------


# Title: rmse


# Data:

     Epoch      RMSE
0        1  4.017886
1        2  3.905939
2        3  3.572424
3        4  2.821305
4        5  1.230012
5        6  0.953770
6        7  0.599112
7        8  0.382327
8        9  0.301737
9       10  0.250716
10      11  0.209534
11      12  0.176773
12      13  0.152883
13      14  0.135059
14      15  0.122817
15      16  0.113801
16      17  0.107311
17      18  0.102392
18      19  0.098781
19      20  0.095673
20      21  0.093105
21      22  0.090804
22      23  0.088993
23      24  0.087078
24      25  0.085223
25      26  0.083725
26      27  0.082138
27      28  0.080567
28      29  0.079229
29      30  0.077826
30      31  0.076529
31      32  0.075473
32      33  0.074288
33      34  0.073197
34      35  0.072297
35      36  0.071410
36      37  0.070512
37      38  0.069660
38      39  0.068903
39      40  0.068558
40      41  0.067282
41      42  0.066649
42      43  0.066153
43      44  0.065429
44      45  0.064891
45      

In [11]:
# ----------------------------------------------------------------------------------------------------
# rmse 저장
rmse.to_csv(path_or_buf="C:\\Users\\ssalt\\Documents\\ev_price_predict_project\\data\\train\\A_df\\e_model\\model_3\\2_rmse.csv",
            index=False)
# ----------------------------------------------------------------------------------------------------