In [2]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# 데이터 불러오기
machine_data = pd.read_csv('machine.data_update.csv')

# 필요 없는 열 제거
machine_data_processed = machine_data.drop(['VendorName', 'ModelName', 'ERP'], axis=1)

# 데이터 탐색 및 시각화
plt.figure(figsize=(12, 8))
correlation_matrix = machine_data_processed.corr()
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt='.2f')
plt.title('Correlation Matrix of CPU Performance Data')
plt.show()

# 데이터 스케일링
scaler = StandardScaler()
scaled_features = scaler.fit_transform(machine_data_processed.drop('PRP', axis=1))

# 스케일된 데이터프레임 생성
scaled_features_df = pd.DataFrame(scaled_features, columns=machine_data_processed.columns[:-1])
scaled_features_df['PRP'] = machine_data_processed['PRP']

# 데이터 분할
X = scaled_features_df.drop('PRP', axis=1)
y = scaled_features_df['PRP']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 다중 선형 회귀 모델 훈련
model = LinearRegression()
model.fit(X_train, y_train)

# 교차 검증
cv_scores = cross_val_score(model, X_train, y_train, cv=5)

# 학습 데이터 평가
y_train_pred = model.predict(X_train)

# 테스트 데이터 평가
y_test_pred = model.predict(X_test)

# 평가 결과 출력
print(f'Cross-validation Mean Score: {cv_scores.mean():.4f}')
print(f'Test R^2 Score: {r2_score(y_test, y_test_pred):.4f}')
print(f'Test Mean Squared Error: {mean_squared_error(y_test, y_test_pred):.4f}')
print(f'Test Mean Absolute Error: {mean_absolute_error(y_test, y_test_pred):.4f}')

# 모델 성능 개선 - 다항 회귀 모델 적용
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline

pipeline = Pipeline([
    ('poly_features', PolynomialFeatures(degree=2)),
    ('scaler', StandardScaler()),
    ('linear_regression', LinearRegression())
])

pipeline.fit(X_train, y_train)

# 테스트 데이터 평가 (다항 회귀)
y_test_pred_poly = pipeline.predict(X_test)

# 개선된 모델 평가 결과 출력
print(f'Polynomial Test R^2 Score: {r2_score(y_test, y_test_pred_poly):.4f}')
print(f'Polynomial Test Mean Squared Error: {mean_squared_error(y_test, y_test_pred_poly):.4f}')
print(f'Polynomial Test Mean Absolute Error: {mean_absolute_error(y_test, y_test_pred_poly):.4f}')


ModuleNotFoundError: No module named 'seaborn'