In [3]:
# ----------------------------------------------------------------------------------------------------
# 라이브러리 목록

# 기본 라이브러리 
import numpy as np
import pandas as pd

# sklearn 라이브러리
from sklearn.cluster import KMeans
from sklearn.neighbors import KNeighborsRegressor

# 개인 라이브러리 
from preparation_for_analysis.show_window import DataVisualizer
from preparation_for_analysis.encoding import DataFrameEncoder

# pandas 설정
pd.set_option('display.max_rows', None)  # 모든 행 출력
pd.set_option('display.max_columns', None)  # 모든 열 출력

# preparation_for_analysis 설정
visualizer = DataVisualizer(line="=", length=100, start="#")
# ----------------------------------------------------------------------------------------------------

In [4]:
# ----------------------------------------------------------------------------------------------------
# new_learning 불러오기 
new_learning = pd.read_csv("C:\\Users\\ssalt\\Documents\\ev_price_predict_project\\data\\train\\A_df\\c_learning\\2_new_learning.csv",
                       sep=",",
                       header=0)

visualizer.show_df_info(title="new_learning.info", df=new_learning)
# ----------------------------------------------------------------------------------------------------


# Title: new_learning.info


# DataFrame Information:

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6747 entries, 0 to 6746
Data columns (total 12 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Model         6747 non-null   object 
 1   Manufacturer  6747 non-null   object 
 2   Model_year    6747 non-null   object 
 3   Drivetrain    6747 non-null   object 
 4   Warranty      6747 non-null   object 
 5   Accident      6747 non-null   object 
 6   Condition     6747 non-null   object 
 7   Battery       6747 non-null   float64
 8   Mileage       6747 non-null   int64  
 9   knn           6747 non-null   float64
 10  knn_2         6747 non-null   float64
 11  Price         6747 non-null   float64
dtypes: float64(4), int64(1), object(7)
memory usage: 632.7+ KB







In [5]:
# ----------------------------------------------------------------------------------------------------
# ("Price"컬럼 log(y+1)변환)
# learning_encoding 구성하기

# onehot 인코더 호출
new_learning_encoder = DataFrameEncoder(
    df=new_learning,
    columns=[
        "Model","Manufacturer","Model_year","Drivetrain",
        "Warranty","Accident","Condition"
    ],
    ascending_order=[True,True,True,
                     True,True,True,True],
    sort_by_number=[False,False,True,
                    False,True,False,False]
)

# onehot 인코딩 변환 / learning_encoding 생성 
new_learning_encoding = new_learning_encoder.fit_transform(encoding_type="onehot")

# MinMax Normalization 변수 
battery_max = new_learning["Battery"].max()
battery_min = new_learning["Battery"].min()
mileage_max = new_learning["Mileage"].max()
mileage_min = new_learning["Mileage"].min()
knn_max = new_learning["knn"].max()
knn_min = new_learning["knn"].min()
knn_2_max = new_learning["knn_2"].max()
knn_2_min = new_learning["knn_2"].min()

# MinMax Normalization 
new_learning_encoding["Battery"] = new_learning["Battery"].apply(lambda x : (x-battery_min)/(battery_max-battery_min))
new_learning_encoding["Mileage"] = new_learning["Mileage"].apply(lambda x : (x-mileage_min)/(mileage_max-mileage_min))
new_learning_encoding["knn"] = new_learning["knn"].apply(lambda x : (x-knn_min)/(knn_max-knn_min))
new_learning_encoding["knn_2"] = new_learning["knn_2"].apply(lambda x : (x-knn_2_min)/(knn_2_max-knn_2_min))

# log(y+1) 변환 
new_learning_encoding["Price"] = new_learning["Price"].apply(lambda x : np.log1p(x))

# learning_encoding "float32"로 전환  
new_learning_encoding = new_learning_encoding.astype("float32")

# learning_encoding 정보 확인
visualizer.show_df_info(title="new_learning_encoding.info",df=new_learning_encoding)
# ----------------------------------------------------------------------------------------------------


# Title: new_learning_encoding.info


# DataFrame Information:

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6747 entries, 0 to 6746
Data columns (total 55 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   EV6         6747 non-null   float32
 1   ID4         6747 non-null   float32
 2   ION5        6747 non-null   float32
 3   ION6        6747 non-null   float32
 4   IONIQ       6747 non-null   float32
 5   KNE         6747 non-null   float32
 6   M3          6747 non-null   float32
 7   MS          6747 non-null   float32
 8   MX          6747 non-null   float32
 9   MY          6747 non-null   float32
 10  Niro        6747 non-null   float32
 11  Q4eT        6747 non-null   float32
 12  RSeTGT      6747 non-null   float32
 13  Soul        6747 non-null   float32
 14  Tay         6747 non-null   float32
 15  TayCT       6747 non-null   float32
 16  TayGTS      6747 non-null   float32
 17  eT          6747 non-null   float32
 18  i3 

In [6]:
# ----------------------------------------------------------------------------------------------------
# new_learning_encoding 저장하기
new_learning_encoding.to_csv("C:\\Users\\ssalt\\Documents\\ev_price_predict_project\\data\\train\\A_df\\c_learning\\3_new_learning_encoding.csv",
                             index=False)
# ----------------------------------------------------------------------------------------------------