In [1]:
# ----------------------------------------------------------------------------------------------------
# 라이브러리 목록

# 기본 라이브러리 
import re
import pandas as pd

# torch 라이브러리 
import torch
from torch.utils.data import DataLoader, TensorDataset

# pandas 설정
pd.set_option('display.max_rows', None)  # 모든 행 출력
pd.set_option('display.max_columns', None)  # 모든 열 출력

# 개인 라이브러리  
from preparation_for_analysis.show_window import DataVisualizer
from preparation_for_analysis.model_1 import LinearRegression

# preparation_for_analysis 설정
visualizer = DataVisualizer(line="=", length=100, start="#")
# ----------------------------------------------------------------------------------------------------

In [2]:
# ----------------------------------------------------------------------------------------------------
# new_learning_encoding 불러오기 
new_learning_encoding = pd.read_csv(
    "C:\\Users\\ssalt\\Documents\\ev_price_predict_project\\data\\train\\A_df\\c_learning\\3_new_learning_encoding.csv",
    sep=",",
    header=0
    )

new_learning_encoding = new_learning_encoding.astype("float32")
visualizer.show_df_info(title="new_learning_encoding", df=new_learning_encoding)
# ----------------------------------------------------------------------------------------------------



# Title: new_learning_encoding


# DataFrame Information:

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6747 entries, 0 to 6746
Data columns (total 54 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   EV6         6747 non-null   float32
 1   ID4         6747 non-null   float32
 2   ION5        6747 non-null   float32
 3   ION6        6747 non-null   float32
 4   IONIQ       6747 non-null   float32
 5   KNE         6747 non-null   float32
 6   M3          6747 non-null   float32
 7   MS          6747 non-null   float32
 8   MX          6747 non-null   float32
 9   MY          6747 non-null   float32
 10  Niro        6747 non-null   float32
 11  Q4eT        6747 non-null   float32
 12  RSeTGT      6747 non-null   float32
 13  Soul        6747 non-null   float32
 14  Tay         6747 non-null   float32
 15  TayCT       6747 non-null   float32
 16  TayGTS      6747 non-null   float32
 17  eT          6747 non-null   float32
 18  i3      

In [3]:
# ----------------------------------------------------------------------------------------------------
# new_validation_encoding 불러오기 
new_validation_encoding = pd.read_csv(
    "C:\\Users\\ssalt\\Documents\\ev_price_predict_project\\data\\train\\A_df\\d_validation\\3_new_validation_encoding.csv",
    sep=",",
    header=0
    )

new_validation_encoding = new_validation_encoding.astype("float32")
visualizer.show_df_info(title="new_validation_encoding", df=new_validation_encoding)
# ----------------------------------------------------------------------------------------------------


# Title: new_validation_encoding


# DataFrame Information:

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 750 entries, 0 to 749
Data columns (total 54 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   EV6         750 non-null    float32
 1   ID4         750 non-null    float32
 2   ION5        750 non-null    float32
 3   ION6        750 non-null    float32
 4   IONIQ       750 non-null    float32
 5   KNE         750 non-null    float32
 6   M3          750 non-null    float32
 7   MS          750 non-null    float32
 8   MX          750 non-null    float32
 9   MY          750 non-null    float32
 10  Niro        750 non-null    float32
 11  Q4eT        750 non-null    float32
 12  RSeTGT      750 non-null    float32
 13  Soul        750 non-null    float32
 14  Tay         750 non-null    float32
 15  TayCT       750 non-null    float32
 16  TayGTS      750 non-null    float32
 17  eT          750 non-null    float32
 18  i3      

In [4]:
# ----------------------------------------------------------------------------------------------------
# 훈련 데이터 준비
x_train = new_learning_encoding.iloc[:,:-1]
y_train = new_learning_encoding.iloc[:,-1]
x_test = new_validation_encoding.iloc[:,:-1]
y_test = new_validation_encoding.iloc[:,-1]

# Tensor 변환
x_train_tensor = torch.tensor(x_train.to_numpy()).float()
y_train_tensor = torch.tensor(y_train.to_numpy()).float()
x_test_tensor = torch.tensor(x_test.to_numpy()).float()
y_test_tensor = torch.tensor(y_test.to_numpy()).float()

# y_train_tensor와 y_test_tensor를 (batch_size, 1) 크기로 변환
y_train_tensor = y_train_tensor.view(-1, 1)
y_test_tensor = y_test_tensor.view(-1, 1)

# 데이터셋 및 데이터로더
train_dataset = TensorDataset(x_train_tensor, y_train_tensor)
test_dataset = TensorDataset(x_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=512, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=512, shuffle=False)
# ----------------------------------------------------------------------------------------------------

In [5]:
# ----------------------------------------------------------------------------------------------------
# 모델 객체 생성
model = LinearRegression(input_dim=53, output_dim=1,
                         continue_logging=False, initalize_weights=True)

# 훈련
train_losses, val_losses, val_metrics = model.train_model(
    train_loader, test_loader, num_epochs=500
)
# ----------------------------------------------------------------------------------------------------

2025-02-23 18:48:26,648 - INFO - Epoch 1/500 - Training Loss: 2.6135, Validation Loss: 1.8883
2025-02-23 18:48:26,648 - INFO - Epoch 1/500 - Validation Metrics: {'r2_score': -18.574447631835938, 'rmse': 2.5128884}
2025-02-23 18:48:26,747 - INFO - Epoch 2/500 - Training Loss: 1.3597, Validation Loss: 0.7131
2025-02-23 18:48:26,747 - INFO - Epoch 2/500 - Validation Metrics: {'r2_score': -4.553180694580078, 'rmse': 1.3384415}
2025-02-23 18:48:26,847 - INFO - Epoch 3/500 - Training Loss: 0.4207, Validation Loss: 0.1836
2025-02-23 18:48:26,847 - INFO - Epoch 3/500 - Validation Metrics: {'r2_score': -0.16525304317474365, 'rmse': 0.61311024}
2025-02-23 18:48:26,947 - INFO - Epoch 4/500 - Training Loss: 0.1618, Validation Loss: 0.1477
2025-02-23 18:48:26,947 - INFO - Epoch 4/500 - Validation Metrics: {'r2_score': 0.11840164661407471, 'rmse': 0.5332905}
2025-02-23 18:48:27,047 - INFO - Epoch 5/500 - Training Loss: 0.1045, Validation Loss: 0.0701
2025-02-23 18:48:27,047 - INFO - Epoch 5/500 - Va

In [6]:
# ----------------------------------------------------------------------------------------------------
# 모델 평가
test_loss, metrics = model.evaluate(test_loader)
print(f"Test Loss: {test_loss:.4f}")
print("Test Loss Metrics:", metrics)
# ----------------------------------------------------------------------------------------------------

                                                 

Test Loss: 0.0013
Test Loss Metrics: {'r2_score': 0.9925359487533569, 'rmse': 0.04906991}




In [7]:
# ----------------------------------------------------------------------------------------------------
# 모델 저장
model.save_model()
# ----------------------------------------------------------------------------------------------------

Model saved to C:\Users\ssalt\Documents\ev_price_predict_project\data\train\B_models\a_model_1\best_model.pth


In [8]:
# ----------------------------------------------------------------------------------------------------
# loss 데이터프레임 구성하기 

# 로그 파일 경로
log_file = "C:\\Users\\ssalt\\Documents\\ev_price_predict_project\\data\\train\\B_models\\a_model_1\\training.log"

# 로그 파일 읽기
with open(log_file, 'r') as file:
    log_lines = file.readlines()

# 데이터를 저장할 리스트
log_data = []

# 로그 라인에서 epoch과 rmse 값을 추출하는 정규식 패턴
epoch_pattern = re.compile(r'Epoch (\d+)')
training_loss_pattern = re.compile(r'Training Loss: ([\d\.]+)')
validation_loss_pattern = re.compile(r'Validation Loss: ([\d\.]+)')

# 로그 라인에서 데이터 추출
epoch = None
for line in log_lines:
    epoch_match = epoch_pattern.search(line)
    training_loss_match = training_loss_pattern.search(line)
    validation_loss_match = validation_loss_pattern.search(line)
    
    if epoch_match:
        epoch = int(epoch_match.group(1))
    
    if training_loss_match:
        training_loss = float(training_loss_match.group(1))
        validation_loss = float(validation_loss_match.group(1))
        log_data.append({'Epoch': epoch, 'Training_Loss': training_loss, 'Validation_Loss': validation_loss})

# DataFrame으로 변환
loss = pd.DataFrame(log_data)

# DataFrame 출력
visualizer.show_data(title="loss", data=loss)
# ----------------------------------------------------------------------------------------------------


# Title: loss


# Data:

     Epoch  Training_Loss  Validation_Loss
0        1         2.6135           1.8883
1        2         1.3597           0.7131
2        3         0.4207           0.1836
3        4         0.1618           0.1477
4        5         0.1045           0.0701
5        6         0.0532           0.0397
6        7         0.0347           0.0258
7        8         0.0229           0.0185
8        9         0.0163           0.0136
9       10         0.0125           0.0106
10      11         0.0101           0.0088
11      12         0.0082           0.0075
12      13         0.0071           0.0067
13      14         0.0065           0.0062
14      15         0.0059           0.0058
15      16         0.0055           0.0054
16      17         0.0053           0.0052
17      18         0.0052           0.0050
18      19         0.0048           0.0048
19      20         0.0047           0.0047
20      21         0.0045           0.0045
21      22         0.0044   

In [9]:
# ----------------------------------------------------------------------------------------------------
# loss 저장
loss.to_csv(path_or_buf="C:\\Users\\ssalt\\Documents\\ev_price_predict_project\\data\\train\\A_df\\e_model\\model_1\\1_loss.csv",
            index=False)
# ----------------------------------------------------------------------------------------------------

In [12]:
# ----------------------------------------------------------------------------------------------------
# rmse 데이터프레임 구성하기 

# 로그 파일 경로
log_file = "C:\\Users\\ssalt\\Documents\\ev_price_predict_project\\data\\train\\B_models\\a_model_1\\training.log"

# 로그 파일 읽기
with open(log_file, 'r') as file:
    log_lines = file.readlines()

# 데이터를 저장할 리스트
log_data = []

# 로그 라인에서 epoch과 rmse 값을 추출하는 정규식 패턴
epoch_pattern = re.compile(r'Epoch (\d+)')
rmse_pattern = re.compile(r'Metrics: \{.*?\'rmse\': ([\d\.]+)')

# 로그 라인에서 데이터 추출
epoch = None
for line in log_lines:
    epoch_match = epoch_pattern.search(line)
    rmse_match = rmse_pattern.search(line)
    
    if epoch_match:
        epoch = int(epoch_match.group(1))
    
    if rmse_match:
        rmse = float(rmse_match.group(1))
        log_data.append({'Epoch': epoch, 'RMSE': rmse})

# DataFrame으로 변환
rmse = pd.DataFrame(log_data)

# DataFrame 출력
visualizer.show_data(title="model_1_rmse", data=rmse)
# ----------------------------------------------------------------------------------------------------


# Title: model_1_rmse


# Data:

     Epoch      RMSE
0        1  2.512888
1        2  1.338442
2        3  0.613110
3        4  0.533291
4        5  0.366579
5        6  0.276875
6        7  0.222940
7        8  0.188964
8        9  0.162722
9       10  0.144496
10      11  0.132002
11      12  0.122770
12      13  0.116275
13      14  0.111441
14      15  0.108079
15      16  0.104901
16      17  0.102419
17      18  0.100357
18      19  0.098447
19      20  0.096998
20      21  0.095344
21      22  0.094075
22      23  0.092643
23      24  0.091542
24      25  0.090225
25      26  0.088698
26      27  0.087768
27      28  0.086533
28      29  0.085573
29      30  0.084574
30      31  0.083634
31      32  0.082672
32      33  0.081689
33      34  0.081053
34      35  0.080013
35      36  0.079358
36      37  0.078427
37      38  0.077716
38      39  0.077075
39      40  0.076373
40      41  0.075735
41      42  0.075161
42      43  0.074504
43      44  0.073909
44      45  0.073239


In [13]:
# ----------------------------------------------------------------------------------------------------
# rmse 저장
rmse.to_csv(path_or_buf="C:\\Users\\ssalt\\Documents\\ev_price_predict_project\\data\\train\\A_df\\e_model\\model_1\\2_rmse.csv",
            index=False)
# ----------------------------------------------------------------------------------------------------