In [12]:
# ----------------------------------------------------------------------------------------------------
# 라이브러리 목록
# 기본 라이브러리 
import math
import numpy as np
import pandas as pd

# torch 라이브러리 
import torch
from torch.utils.data import DataLoader, TensorDataset

# pandas 설정
pd.set_option('display.max_rows', None)  # 모든 행 출력
pd.set_option('display.max_columns', None)  # 모든 열 출력

# 개인 라이브러리  
from preparation_for_analysis.show_window import DataVisualizer
from preparation_for_analysis.model_1 import LinearRegression

# preparation_for_analysis 설정
visualizer = DataVisualizer(line="=", length=100, start="#")
# ----------------------------------------------------------------------------------------------------

In [17]:
# ----------------------------------------------------------------------------------------------------
# new_learning 불러오기 
new_learning = pd.read_csv("C:\\Users\\ssalt\\Documents\\ev_price_predict_project\\data\\train\\A_df\\c_learning\\2_new_learning.csv",
                       sep=",",
                       header=0)

visualizer.show_df_info(title="new_learning.info", df=new_learning)
# -----------------------------------------------------------------------------------------------------


# Title: new_learning.info


# DataFrame Information:

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6372 entries, 0 to 6371
Data columns (total 11 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Cluster       6372 non-null   int64  
 1   Model         6372 non-null   object 
 2   Manufacturer  6372 non-null   object 
 3   Model_year    6372 non-null   object 
 4   Drivetrain    6372 non-null   object 
 5   Warranty      6372 non-null   object 
 6   Accident      6372 non-null   object 
 7   Condition     6372 non-null   object 
 8   Battery       6372 non-null   float64
 9   Mileage       6372 non-null   int64  
 10  Price         6372 non-null   float64
dtypes: float64(2), int64(2), object(7)
memory usage: 547.7+ KB







In [18]:
# ----------------------------------------------------------------------------------------------------
# new_validation 불러오기 
new_validation = pd.read_csv(
    "C:\\Users\\ssalt\\Documents\\ev_price_predict_project\\data\\train\\A_df\\d_validation\\2_new_validation.csv",
    sep=",",
    header=0
)

visualizer.show_df_info(title="new_validation.info", df=new_validation)
# ---------------------------------------------------------------------------------------------------


# Title: new_validation.info


# DataFrame Information:

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1125 entries, 0 to 1124
Data columns (total 11 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Cluster       1125 non-null   int64  
 1   Model         1125 non-null   object 
 2   Manufacturer  1125 non-null   object 
 3   Model_year    1125 non-null   object 
 4   Drivetrain    1125 non-null   object 
 5   Warranty      1125 non-null   object 
 6   Accident      1125 non-null   object 
 7   Condition     1125 non-null   object 
 8   Battery       1125 non-null   float64
 9   Mileage       1125 non-null   int64  
 10  Price         1125 non-null   float64
dtypes: float64(2), int64(2), object(7)
memory usage: 96.8+ KB







In [19]:
# ----------------------------------------------------------------------------------------------------
# new_validation_encoding 불러오기 
new_validation_encoding = pd.read_csv(
    "C:\\Users\\ssalt\\Documents\\ev_price_predict_project\\data\\train\\A_df\\d_validation\\3_new_validation_encoding.csv",
    sep=",",
    header=0
    )

new_validation_encoding = new_validation_encoding.astype("float32")
visualizer.show_df_info(title="new_validation_encoding", df=new_validation_encoding)
# ----------------------------------------------------------------------------------------------------


# Title: new_validation_encoding


# DataFrame Information:

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1125 entries, 0 to 1124
Columns: 168 entries, c0 to Price
dtypes: float32(168)
memory usage: 738.4 KB







In [20]:
# ----------------------------------------------------------------------------------------------------
# model_1 불러오기 
model = LinearRegression(input_dim=167, output_dim=1, 
                         continue_logging=True, initalize_weights=False)

model.load_model()
# ----------------------------------------------------------------------------------------------------

Model loaded from C:\Users\ssalt\Documents\ev_price_predict_project\data\train\B_models\a_model_1\best_model.pth.


In [21]:
# ----------------------------------------------------------------------------------------------------
# ("Price"컬럼 로그 변환 했을 때)
# 데이터 준비하기
new_validation_encoding = new_validation_encoding.iloc[:,:-1]

# validation_encoding 텐서로 바꾸기 

# validation_encoding을 텐서로
tensor_input = torch.tensor(new_validation_encoding.to_numpy(), dtype=torch.float32)
dataset_input = TensorDataset(tensor_input)
dataloader_input = DataLoader(dataset_input, batch_size=750, shuffle=False)

# 모델 예측 수행
predictions = model.predict(dataloader_input)

# 예측값을 DataFrame의 "Price" 컬럼에 저장
new_validation["Model_1"] = predictions

# 원래의 가격으로 변환 
new_validation["Model_1"] = new_validation["Model_1"].apply(lambda x: np.expm1(x))

# 소수점 두 자리로 제한
new_validation["Model_1"] = new_validation["Model_1"].apply(lambda x: math.floor(x * 100) / 100)

# 컬럼 순서 재정렬
new_validation = new_validation[
    ["Cluster","Model","Manufacturer","Model_year","Drivetrain",
     "Warranty","Accident","Condition","Battery","Mileage",
     "Model_1","Price"]
]

# DataFrame 정보를 시각화
visualizer.show_df_info(title="new_validation", df=new_validation)
# ----------------------------------------------------------------------------------------------------

                                                         


# Title: new_validation


# DataFrame Information:

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1125 entries, 0 to 1124
Data columns (total 12 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Cluster       1125 non-null   int64  
 1   Model         1125 non-null   object 
 2   Manufacturer  1125 non-null   object 
 3   Model_year    1125 non-null   object 
 4   Drivetrain    1125 non-null   object 
 5   Warranty      1125 non-null   object 
 6   Accident      1125 non-null   object 
 7   Condition     1125 non-null   object 
 8   Battery       1125 non-null   float64
 9   Mileage       1125 non-null   int64  
 10  Model_1       1125 non-null   float64
 11  Price         1125 non-null   float64
dtypes: float64(3), int64(2), object(7)
memory usage: 105.6+ KB









In [22]:
# ----------------------------------------------------------------------------------------------------
# new_validation_predict로 저장 
new_validation.to_csv("C:\\Users\\ssalt\\Documents\\ev_price_predict_project\\data\\train\\A_df\\e_model\\model_1\\3_new_validation_predict.csv",
                      index=False)
# ----------------------------------------------------------------------------------------------------