<a href="https://colab.research.google.com/github/devCat404/python_projects/blob/main/Untitled1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

class LinearRegressionModel:
    def __init__(self, src, target_column, test_size=0.2, random_state=42):
        self.src = src
        self.target_column = target_column
        self.test_size = test_size
        self.random_state = random_state
        self.model = LinearRegression()
        self._load_and_prepare_data()

    def _load_and_prepare_data(self):
        if isinstance(self.src, str):
            self.df = pd.read_csv(self.src)
        elif isinstance(self.src, pd.DataFrame):
            self.df = self.src.copy()
        else:
            raise ValueError("지원하지 않는 src 형식입니다. CSV 경로 문자열 또는 DataFrame을 사용하세요.")

        self.X = self.df.drop(columns=[self.target_column])
        self.y = self.df[self.target_column]

        self.X_train, self.X_val, self.y_train, self.y_val = train_test_split(
            self.X, self.y, test_size=self.test_size, random_state=self.random_state
        )

    def fit(self):
        self.model.fit(self.X_train, self.y_train)
        print("모델 훈련 완료")

    def evaluate(self):
        predictions = self.model.predict(self.X_val)
        mse = mean_squared_error(self.y_val, predictions)
        r2 = r2_score(self.y_val, predictions)
        print(f"검증 MSE: {mse:.4f}")
        print(f"R² Score: {r2:.4f}")
        return mse, r2

    def predict(self, input_data):
        if isinstance(input_data, (list, np.ndarray)):
            input_data = np.array(input_data).reshape(1, -1)
        elif isinstance(input_data, pd.Series):
            input_data = input_data.values.reshape(1, -1)
        return self.model.predict(input_data)

    def plot(self):
        if self.X.shape[1] != 1:
            print("입력 변수가 1개일 때만 시각화할 수 있습니다.")
            return
        plt.scatter(self.X_val, self.y_val, color='blue', label='Answer')
        plt.plot(self.X_val, self.model.predict(self.X_val), color='red', label='Predicted')
        plt.xlabel(self.X.columns[0])
        plt.ylabel(self.target_column)
        plt.title('Validation Data vs Prediction')
        plt.legend()
        plt.show()


In [None]:
df = sns.load_dataset("tips")
df = df.select_dtypes(include=[np.number])
model = LinearRegressionModel(df, target_column='tip')

model.fit()
model.evaluate()

input_example = df.drop(columns='tip').iloc[0]
print("예측값:", model.predict(input_example))

model.plot()

모델 훈련 완료
검증 MSE: 0.6486
R² Score: 0.4811
예측값: [2.64224322]
입력 변수가 1개일 때만 시각화할 수 있습니다.


