In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, confusion_matrix

file_path = "car_evaluation.csv"  # 로컬 파일 경로 지정

df = pd.read_csv(file_path)
print(df.columns)

# 결측치 확인 및 처리
df.fillna(method='ffill', inplace=True)

# 불필요한 컬럼 제거 (없음)

# 엔코딩 (모든 컬럼을 숫자로 변환)
label_encoders = {}
for column in df.columns:
    le = LabelEncoder()
    df[column] = le.fit_transform(df[column])
    label_encoders[column] = le

# 변환 전 레이블 분포
before_encoding = df.iloc[:, -1].value_counts()

# 데이터 분할
X = df.iloc[:, :-1]
y = df.iloc[:, -1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# 모델 리스트
models = {
    "Random Forest": RandomForestClassifier(),
    "Decision Tree": DecisionTreeClassifier(),
    "Logistic Regression": LogisticRegression(max_iter=200),
    "K-Nearest Neighbors": KNeighborsClassifier(),
    "Support Vector Machine": SVC()
}

# 모델 학습 및 평가
results = {}
conf_matrices = {}

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    conf_matrix = confusion_matrix(y_test, y_pred)
    
    results[name] = accuracy
    conf_matrices[name] = conf_matrix

# 변환 후 레이블 분포
after_encoding = df.iloc[:, -1].value_counts()

# 결과 출력
print("Accuracy Results:", results)
print("Before Encoding Label Distribution:\n", before_encoding)
print("After Encoding Label Distribution:\n", after_encoding)
print("Confusion Matrices:\n", conf_matrices)

Index(['vhigh', 'vhigh.1', '2', '2.1', 'small', 'low', 'unacc'], dtype='object')
Accuracy Results: {'Random Forest': 0.9855491329479769, 'Decision Tree': 0.9913294797687862, 'Logistic Regression': 0.6820809248554913, 'K-Nearest Neighbors': 0.930635838150289, 'Support Vector Machine': 0.930635838150289}
Before Encoding Label Distribution:
 unacc
2    1209
0     384
1      69
3      65
Name: count, dtype: int64
After Encoding Label Distribution:
 unacc
2    1209
0     384
1      69
3      65
Name: count, dtype: int64
Confusion Matrices:
 {'Random Forest': array([[ 74,   1,   2,   0],
       [  0,  14,   0,   0],
       [  1,   0, 241,   0],
       [  1,   0,   0,  12]]), 'Decision Tree': array([[ 76,   1,   0,   0],
       [  0,  14,   0,   0],
       [  1,   0, 241,   0],
       [  1,   0,   0,  12]]), 'Logistic Regression': array([[  6,   0,  63,   8],
       [  1,   0,  13,   0],
       [ 13,   0, 228,   1],
       [  3,   0,   8,   2]]), 'K-Nearest Neighbors': array([[ 66,   0,  11, 

  df.fillna(method='ffill', inplace=True)


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import LabelEncoder

# 1️⃣ 데이터 로딩
df = pd.read_csv("abalone.csv")

# 2️⃣ 데이터 전처리 (성별 Encoding)
le = LabelEncoder()
df['Sex'] = le.fit_transform(df['Sex'])

# 3️⃣ 독립 변수(X)와 종속 변수(y) 설정
X = df.drop(columns=['Rings'])  # 입력 데이터
y = df['Rings']  # 예측할 타겟 값

# 4️⃣ Train-Test 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 5️⃣ 여러 회귀 모델 선언
models = {
    "Linear Regression": LinearRegression(),
    "Decision Tree": DecisionTreeRegressor(),
    "Random Forest": RandomForestRegressor(),
    "Support Vector Machine": SVR(kernel='linear')
}

# 6️⃣ 모델 학습 & 평가
for name, model in models.items():
    model.fit(X_train, y_train)  # 모델 학습
    y_pred = model.predict(X_test)  # 예측 수행
    
    # MSE 및 R² Score 계산
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    print(f"{name} - MSE: {mse:.4f}, R² Score: {r2:.4f}")

Linear Regression - MSE: 4.9603, R² Score: 0.5418
Decision Tree - MSE: 6.6388, R² Score: 0.3867
Random Forest - MSE: 3.7356, R² Score: 0.6549
