In [7]:
import pandas as pd
import numpy as np
import warnings
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [9]:
# 경고 무시
warnings.filterwarnings('ignore')


In [11]:
# 데이터 불러오기
df = pd.read_csv("C:\\Users\\LG\\Downloads\\archive\\car_evaluation.csv", header=None)
df.columns = ['price', 'maint', 'doors', 'persons', 'lug_capacity', 'safety', 'output']

In [13]:
# 모든 범주형 데이터를 숫자로 변환 (Label Encoding)
label_encoders = {}
for column in df.columns:
    label_encoders[column] = LabelEncoder()
    df[column] = label_encoders[column].fit_transform(df[column])

In [15]:
# 입력 데이터(X)와 타겟(y) 설정
X = df.drop(columns=['output'])  # 독립 변수
y = df['output']  # 종속 변수

In [17]:
# 훈련 데이터 & 테스트 데이터 분리 (80:20)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [21]:
# LR모델 학습
model = LinearRegression()
model.fit(X_train, y_train)

# 예측 수행
y_pred = model.predict(X_test)

# 모델 평가
print("=== Linear Regression ===")
print("MAE (Mean Absolute Error):", mean_absolute_error(y_test, y_pred))
print("MSE (Mean Squared Error):", mean_squared_error(y_test, y_pred))
print("R² Score:", r2_score(y_test, y_pred))  #R² (결정 계수): 모델이 데이터를 얼마나 잘 설명하는지에 대한 지표

=== Linear Regression ===
MAE (Mean Absolute Error): 0.6668909386175065
MSE (Mean Squared Error): 0.76434795515757
R² Score: 0.07533670372227508


In [23]:
from sklearn.ensemble import RandomForestRegressor

# RF모델 학습
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# 예측 수행
y_pred = model.predict(X_test)

# 모델 평가
print("=== Random Forest Regressor ===")
print("MAE (Mean Absolute Error):", mean_absolute_error(y_test, y_pred))
print("MSE (Mean Squared Error):", mean_squared_error(y_test, y_pred))
print("R² Score:", r2_score(y_test, y_pred))  #R² (결정 계수): 모델이 데이터를 얼마나 잘 설명하는지에 대한 지표

=== Random Forest Regressor ===
MAE (Mean Absolute Error): 0.12083815028901734
MSE (Mean Squared Error): 0.0849771676300578
R² Score: 0.8971996099434115


In [25]:
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.svm import SVR

#SVM모델 학습(스케일링 포함)

# 데이터 스케일링 (SVM 모델을 위한 표준화)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 모델 학습
model = SVR(kernel='rbf')
model.fit(X_train_scaled, y_train)

# 예측 수행
y_pred = model.predict(X_test_scaled)

# 모델 평가
print("=== SVM Regressor ===")
print("MAE (Mean Absolute Error):", mean_absolute_error(y_test, y_pred))
print("MSE (Mean Squared Error):", mean_squared_error(y_test, y_pred))
print("R² Score:", r2_score(y_test, y_pred))  #R² (결정 계수): 모델이 데이터를 얼마나 잘 설명하는지에 대한 지표

=== SVM Regressor ===
MAE (Mean Absolute Error): 0.4683673592347672
MSE (Mean Squared Error): 0.5459926698466008
R² Score: 0.33949011253682637


In [27]:
from sklearn.tree import DecisionTreeRegressor

# DT모델 학습
model = DecisionTreeRegressor(random_state=42)
model.fit(X_train, y_train)

# 예측 수행
y_pred = model.predict(X_test)

print("=== Decision Tree Regressor ===")
print("MAE (Mean Absolute Error):", mean_absolute_error(y_test, y_pred))
print("MSE (Mean Squared Error):", mean_squared_error(y_test, y_pred))
print("R² Score:", r2_score(y_test, y_pred))  #R² (결정 계수): 모델이 데이터를 얼마나 잘 설명하는지에 대한 지표

=== Decision Tree Regressor ===
MAE (Mean Absolute Error): 0.10982658959537572
MSE (Mean Squared Error): 0.2658959537572254
R² Score: 0.6783346806790622
