In [2]:
# ----------------------------------------------------------------------------------------------------
# 라이브러리 목록

# 기본 라이브러리 
import math
import numpy as np 
import pandas as pd

# torch 라이브러리 
import torch
from torch.utils.data import DataLoader, TensorDataset

# pandas 설정
pd.set_option('display.max_rows', None)  # 모든 행 출력
pd.set_option('display.max_columns', None)  # 모든 열 출력

# 개인 라이브러리  
from preparation_for_analysis.show_window import DataVisualizer
from preparation_for_analysis.model_2 import MultiAttentionMLP

# preparation_for_analysis 설정
visualizer = DataVisualizer(line="=", length=100, start="#")
# ----------------------------------------------------------------------------------------------------

In [3]:
# ----------------------------------------------------------------------------------------------------
# new_learning 불러오기 
new_learning = pd.read_csv("C:\\Users\\ssalt\\Documents\\ev_price_predict_project\\data\\train\\A_df\\c_learning\\2_new_learning.csv",
                       sep=",",
                       header=0)

visualizer.show_df_info(title="new_learning.info", df=new_learning)
# -----------------------------------------------------------------------------------------------------


# Title: new_learning.info


# DataFrame Information:

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6747 entries, 0 to 6746
Data columns (total 12 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Model         6747 non-null   object 
 1   Manufacturer  6747 non-null   object 
 2   Model_year    6747 non-null   object 
 3   Drivetrain    6747 non-null   object 
 4   Warranty      6747 non-null   object 
 5   Accident      6747 non-null   object 
 6   Condition     6747 non-null   object 
 7   Battery       6747 non-null   float64
 8   Mileage       6747 non-null   int64  
 9   knn           6747 non-null   float64
 10  knn_2         6747 non-null   float64
 11  Price         6747 non-null   float64
dtypes: float64(4), int64(1), object(7)
memory usage: 632.7+ KB







In [4]:
# ----------------------------------------------------------------------------------------------------
# new_validation 불러오기 
new_validation = pd.read_csv(
    "C:\\Users\\ssalt\\Documents\\ev_price_predict_project\\data\\train\\A_df\\d_validation\\2_new_validation.csv",
    sep=",",
    header=0
)

visualizer.show_df_info(title="new_validation.info", df=new_validation)
# ---------------------------------------------------------------------------------------------------


# Title: new_validation.info


# DataFrame Information:

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 750 entries, 0 to 749
Data columns (total 12 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Model         750 non-null    object 
 1   Manufacturer  750 non-null    object 
 2   Model_year    750 non-null    object 
 3   Drivetrain    750 non-null    object 
 4   Warranty      750 non-null    object 
 5   Accident      750 non-null    object 
 6   Condition     750 non-null    object 
 7   Battery       750 non-null    float64
 8   Mileage       750 non-null    int64  
 9   knn           750 non-null    float64
 10  knn_2         750 non-null    float64
 11  Price         750 non-null    float64
dtypes: float64(4), int64(1), object(7)
memory usage: 70.4+ KB







In [9]:
# ----------------------------------------------------------------------------------------------------
# new_validation_encoding 불러오기 
new_validation_encoding = pd.read_csv(
    "C:\\Users\\ssalt\\Documents\\ev_price_predict_project\\data\\train\\A_df\\d_validation\\3_new_validation_encoding.csv",
    sep=",",
    header=0
    )

new_validation_encoding = new_validation_encoding.astype("float32")
visualizer.show_df_info(title="new_validation_encoding", df=new_validation_encoding)
# ----------------------------------------------------------------------------------------------------


# Title: new_validation_encoding


# DataFrame Information:

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 750 entries, 0 to 749
Data columns (total 55 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   EV6         750 non-null    float32
 1   ID4         750 non-null    float32
 2   ION5        750 non-null    float32
 3   ION6        750 non-null    float32
 4   IONIQ       750 non-null    float32
 5   KNE         750 non-null    float32
 6   M3          750 non-null    float32
 7   MS          750 non-null    float32
 8   MX          750 non-null    float32
 9   MY          750 non-null    float32
 10  Niro        750 non-null    float32
 11  Q4eT        750 non-null    float32
 12  RSeTGT      750 non-null    float32
 13  Soul        750 non-null    float32
 14  Tay         750 non-null    float32
 15  TayCT       750 non-null    float32
 16  TayGTS      750 non-null    float32
 17  eT          750 non-null    float32
 18  i3      

In [6]:
# ----------------------------------------------------------------------------------------------------
# model_2 불러오기 
model = MultiAttentionMLP(continue_logging=True, initalize_weights=False)

model.load_model()
# ----------------------------------------------------------------------------------------------------

Model loaded from C:\Users\ssalt\Documents\ev_price_predict_project\data\train\B_models\b_model_2\best_model.pth.


In [10]:
# ----------------------------------------------------------------------------------------------------
# ("Price"컬럼 로그 변환 했을 때)
# 데이터 준비하기
new_validation_encoding = new_validation_encoding.iloc[:,:-3]

# validation_encoding 텐서로 바꾸기 

# validation_encoding을 텐서로
tensor_input = torch.tensor(new_validation_encoding.to_numpy(), dtype=torch.float32)
dataset_input = TensorDataset(tensor_input)
dataloader_input = DataLoader(dataset_input, batch_size=750, shuffle=False)

# 모델 예측 수행
predictions = model.predict(dataloader_input)

# 예측값을 DataFrame의 "Price" 컬럼에 저장
new_validation["Model_2"] = predictions

# 원래의 가격으로 변환 
new_validation["Model_2"] = new_validation["Model_2"].apply(lambda x: np.expm1(x))

# 소수점 두 자리로 제한
new_validation["Model_2"] = new_validation["Model_2"].apply(lambda x: math.floor(x * 100) / 100)

# 컬럼 순서 재정렬
new_validation = new_validation[
    ["Model","Manufacturer","Model_year","Drivetrain",
     "Warranty","Accident","Condition","Battery","Mileage",
     "Model_2","Price"]
]

# DataFrame 정보를 시각화
visualizer.show_df_info(title="new_validation", df=new_validation)
# ----------------------------------------------------------------------------------------------------

                                                         


# Title: new_validation


# DataFrame Information:

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 750 entries, 0 to 749
Data columns (total 11 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Model         750 non-null    object 
 1   Manufacturer  750 non-null    object 
 2   Model_year    750 non-null    object 
 3   Drivetrain    750 non-null    object 
 4   Warranty      750 non-null    object 
 5   Accident      750 non-null    object 
 6   Condition     750 non-null    object 
 7   Battery       750 non-null    float64
 8   Mileage       750 non-null    int64  
 9   Model_2       750 non-null    float64
 10  Price         750 non-null    float64
dtypes: float64(3), int64(1), object(7)
memory usage: 64.6+ KB









In [11]:
# ----------------------------------------------------------------------------------------------------
# new_validation_predict로 저장 
new_validation.to_csv("C:\\Users\\ssalt\\Documents\\ev_price_predict_project\\data\\train\\A_df\\e_model\\model_2\\3_new_validation_predict.csv",
                      index=False)
# ----------------------------------------------------------------------------------------------------