In [1]:
# ----------------------------------------------------------------------------------------------------
# 라이브러리 목록

# 기본 라이브러리 
import re
import pandas as pd

# torch 라이브러리 
import torch
from torch.utils.data import DataLoader, TensorDataset

# pandas 설정
pd.set_option('display.max_rows', None)  # 모든 행 출력
pd.set_option('display.max_columns', None)  # 모든 열 출력

# 개인 라이브러리  
from preparation_for_analysis.show_window import DataVisualizer
from preparation_for_analysis.model_2 import MultiAttentionMLP

# preparation_for_analysis 설정
visualizer = DataVisualizer(line="=", length=100, start="#")
# ----------------------------------------------------------------------------------------------------

In [2]:
# ----------------------------------------------------------------------------------------------------
# new_learning_encoding 불러오기 
new_learning_encoding = pd.read_csv(
    "C:\\Users\\ssalt\\Documents\\ev_price_predict_project\\data\\train\\A_df\\c_learning\\3_new_learning_encoding.csv",
    sep=",",
    header=0
    )

new_learning_encoding = new_learning_encoding.astype("float32")
visualizer.show_df_info(title="new_learning_encoding", df=new_learning_encoding)
# ----------------------------------------------------------------------------------------------------


# Title: new_learning_encoding


# DataFrame Information:

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6747 entries, 0 to 6746
Data columns (total 55 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   EV6         6747 non-null   float32
 1   ID4         6747 non-null   float32
 2   ION5        6747 non-null   float32
 3   ION6        6747 non-null   float32
 4   IONIQ       6747 non-null   float32
 5   KNE         6747 non-null   float32
 6   M3          6747 non-null   float32
 7   MS          6747 non-null   float32
 8   MX          6747 non-null   float32
 9   MY          6747 non-null   float32
 10  Niro        6747 non-null   float32
 11  Q4eT        6747 non-null   float32
 12  RSeTGT      6747 non-null   float32
 13  Soul        6747 non-null   float32
 14  Tay         6747 non-null   float32
 15  TayCT       6747 non-null   float32
 16  TayGTS      6747 non-null   float32
 17  eT          6747 non-null   float32
 18  i3      

In [3]:
# ----------------------------------------------------------------------------------------------------
# new_validation_encoding 불러오기 
new_validation_encoding = pd.read_csv(
    "C:\\Users\\ssalt\\Documents\\ev_price_predict_project\\data\\train\\A_df\\d_validation\\3_new_validation_encoding.csv",
    sep=",",
    header=0
    )

new_validation_encoding = new_validation_encoding.astype("float32")
visualizer.show_df_info(title="new_validation_encoding", df=new_validation_encoding)
# ----------------------------------------------------------------------------------------------------


# Title: new_validation_encoding


# DataFrame Information:

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 750 entries, 0 to 749
Data columns (total 55 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   EV6         750 non-null    float32
 1   ID4         750 non-null    float32
 2   ION5        750 non-null    float32
 3   ION6        750 non-null    float32
 4   IONIQ       750 non-null    float32
 5   KNE         750 non-null    float32
 6   M3          750 non-null    float32
 7   MS          750 non-null    float32
 8   MX          750 non-null    float32
 9   MY          750 non-null    float32
 10  Niro        750 non-null    float32
 11  Q4eT        750 non-null    float32
 12  RSeTGT      750 non-null    float32
 13  Soul        750 non-null    float32
 14  Tay         750 non-null    float32
 15  TayCT       750 non-null    float32
 16  TayGTS      750 non-null    float32
 17  eT          750 non-null    float32
 18  i3      

In [4]:
# ----------------------------------------------------------------------------------------------------
# 훈련 데이터 준비
x_train = new_learning_encoding.iloc[:,:-3]
y_train = new_learning_encoding.iloc[:,-1]
x_test = new_validation_encoding.iloc[:,:-3]
y_test = new_validation_encoding.iloc[:,-1]

# Tensor 변환
x_train_tensor = torch.tensor(x_train.to_numpy()).float()
y_train_tensor = torch.tensor(y_train.to_numpy()).float()
x_test_tensor = torch.tensor(x_test.to_numpy()).float()
y_test_tensor = torch.tensor(y_test.to_numpy()).float()

# y_train_tensor와 y_test_tensor를 (batch_size, 1) 크기로 변환
y_train_tensor = y_train_tensor.view(-1, 1)
y_test_tensor = y_test_tensor.view(-1, 1)

# 데이터셋 및 데이터로더
train_dataset = TensorDataset(x_train_tensor, y_train_tensor)
test_dataset = TensorDataset(x_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=512, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=512, shuffle=False)
# ----------------------------------------------------------------------------------------------------

In [5]:
# ----------------------------------------------------------------------------------------------------
# 모델 객체 선언
model = MultiAttentionMLP(continue_logging=False, initalize_weights=True)

# 훈련
train_losses, val_losses, val_metrics = model.train_model(
    train_loader, test_loader, num_epochs=200
)
# ----------------------------------------------------------------------------------------------------

2025-02-26 02:40:21,236 - INFO - Epoch 1/200 - Training Loss: 3.5778, Validation Loss: 3.4801
2025-02-26 02:40:21,236 - INFO - Epoch 1/200 - Validation Metrics: {'r2_score': -49.48596954345703, 'rmse': 4.035653}
2025-02-26 02:40:21,433 - INFO - Epoch 2/200 - Training Loss: 3.4681, Validation Loss: 3.4536
2025-02-26 02:40:21,433 - INFO - Epoch 2/200 - Validation Metrics: {'r2_score': -48.835205078125, 'rmse': 4.009559}
2025-02-26 02:40:21,612 - INFO - Epoch 3/200 - Training Loss: 3.4487, Validation Loss: 3.4394
2025-02-26 02:40:21,612 - INFO - Epoch 3/200 - Validation Metrics: {'r2_score': -48.485286712646484, 'rmse': 3.9954576}
2025-02-26 02:40:21,795 - INFO - Epoch 4/200 - Training Loss: 3.4351, Validation Loss: 3.4250
2025-02-26 02:40:21,795 - INFO - Epoch 4/200 - Validation Metrics: {'r2_score': -48.13304138183594, 'rmse': 3.981212}
2025-02-26 02:40:21,978 - INFO - Epoch 5/200 - Training Loss: 3.4134, Validation Loss: 3.4105
2025-02-26 02:40:21,978 - INFO - Epoch 5/200 - Validation 

In [6]:
# ----------------------------------------------------------------------------------------------------
# 모델 평가
test_loss, metrics = model.evaluate(test_loader)
print(f"Test Loss: {test_loss:.4f}")
print("Test Loss Metrics:", metrics)
# ----------------------------------------------------------------------------------------------------

                                                 

Test Loss: 0.0012
Test Loss Metrics: {'r2_score': 0.9929386377334595, 'rmse': 0.04772787}




In [7]:
# ----------------------------------------------------------------------------------------------------
# 모델 저장
model.save_model()
# ----------------------------------------------------------------------------------------------------

Model saved to C:\Users\ssalt\Documents\ev_price_predict_project\data\train\B_models\b_model_2\best_model.pth


In [8]:
# ----------------------------------------------------------------------------------------------------
# loss 데이터프레임 구성하기 

# 로그 파일 경로
log_file = "C:\\Users\\ssalt\\Documents\\ev_price_predict_project\\data\\train\\B_models\\b_model_2\\training.log"

# 로그 파일 읽기
with open(log_file, 'r') as file:
    log_lines = file.readlines()

# 데이터를 저장할 리스트
log_data = []

# 로그 라인에서 epoch과 rmse 값을 추출하는 정규식 패턴
epoch_pattern = re.compile(r'Epoch (\d+)')
training_loss_pattern = re.compile(r'Training Loss: ([\d\.]+)')
validation_loss_pattern = re.compile(r'Validation Loss: ([\d\.]+)')

# 로그 라인에서 데이터 추출
epoch = None
for line in log_lines:
    epoch_match = epoch_pattern.search(line)
    training_loss_match = training_loss_pattern.search(line)
    validation_loss_match = validation_loss_pattern.search(line)
    
    if epoch_match:
        epoch = int(epoch_match.group(1))
    
    if training_loss_match:
        training_loss = float(training_loss_match.group(1))
        validation_loss = float(validation_loss_match.group(1))
        log_data.append({'Epoch': epoch, 'Training_Loss': training_loss, 'Validation_Loss': validation_loss})

# DataFrame으로 변환
loss = pd.DataFrame(log_data)

# DataFrame 출력
visualizer.show_data(title="loss", data=loss)
# ----------------------------------------------------------------------------------------------------


# Title: loss


# Data:

     Epoch  Training_Loss  Validation_Loss
0        1         3.5778           3.4801
1        2         3.4681           3.4536
2        3         3.4487           3.4394
3        4         3.4351           3.4250
4        5         3.4134           3.4105
5        6         3.3993           3.3959
6        7         3.3883           3.3811
7        8         3.3774           3.3661
8        9         3.3474           3.2956
9       10         3.1844           2.9281
10      11         2.0171           0.2970
11      12         0.4423           0.3547
12      13         0.2235           0.1445
13      14         0.1156           0.1014
14      15         0.0860           0.0793
15      16         0.0675           0.0648
16      17         0.0558           0.0542
17      18         0.0466           0.0461
18      19         0.0401           0.0398
19      20         0.0343           0.0348
20      21         0.0304           0.0307
21      22         0.0271   

In [9]:
# ----------------------------------------------------------------------------------------------------
# loss 저장
loss.to_csv(path_or_buf="C:\\Users\\ssalt\\Documents\\ev_price_predict_project\\data\\train\\A_df\\e_model\\model_2\\1_loss.csv",
            index=False)
# ----------------------------------------------------------------------------------------------------

In [10]:
# ----------------------------------------------------------------------------------------------------
# rmse 데이터프레임 구성하기 

# 로그 파일 경로
log_file = "C:\\Users\\ssalt\\Documents\\ev_price_predict_project\\data\\train\\B_models\\b_model_2\\training.log"

# 로그 파일 읽기
with open(log_file, 'r') as file:
    log_lines = file.readlines()

# 데이터를 저장할 리스트
log_data = []

# 로그 라인에서 epoch과 rmse 값을 추출하는 정규식 패턴
epoch_pattern = re.compile(r'Epoch (\d+)')
rmse_pattern = re.compile(r'Metrics: \{.*?\'rmse\': ([\d\.]+)')

# 로그 라인에서 데이터 추출
epoch = None
for line in log_lines:
    epoch_match = epoch_pattern.search(line)
    rmse_match = rmse_pattern.search(line)
    
    if epoch_match:
        epoch = int(epoch_match.group(1))
    
    if rmse_match:
        rmse = float(rmse_match.group(1))
        log_data.append({'Epoch': epoch, 'RMSE': rmse})

# DataFrame으로 변환
rmse = pd.DataFrame(log_data)

# DataFrame 출력
visualizer.show_data(title="rmse", data=rmse)
# ----------------------------------------------------------------------------------------------------


# Title: rmse


# Data:

     Epoch      RMSE
0        1  4.035653
1        2  4.009559
2        3  3.995458
3        4  3.981212
4        5  3.966849
5        6  3.952398
6        7  3.937807
7        8  3.922921
8        9  3.853130
9       10  3.490633
10      11  0.804888
11      12  0.886664
12      13  0.526937
13      14  0.438081
14      15  0.388353
15      16  0.349752
16      17  0.318902
17      18  0.293585
18      19  0.272837
19      20  0.254947
20      21  0.239685
21      22  0.225895
22      23  0.212856
23      24  0.201385
24      25  0.191079
25      26  0.181821
26      27  0.173195
27      28  0.165407
28      29  0.158555
29      30  0.151882
30      31  0.146175
31      32  0.140174
32      33  0.134957
33      34  0.129904
34      35  0.125246
35      36  0.120930
36      37  0.117353
37      38  0.113416
38      39  0.110150
39      40  0.106739
40      41  0.103638
41      42  0.100801
42      43  0.098136
43      44  0.095708
44      45  0.094243
45      

In [11]:
# ----------------------------------------------------------------------------------------------------
# rmse 저장
rmse.to_csv(path_or_buf="C:\\Users\\ssalt\\Documents\\ev_price_predict_project\\data\\train\\A_df\\e_model\\model_2\\2_rmse.csv",
            index=False)
# ----------------------------------------------------------------------------------------------------