In [None]:
!pip install pytorch-tabnet

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor, StackingRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.linear_model import LinearRegression
import xgboost as xgb
import lightgbm as lgb
from pytorch_tabnet.tab_model import TabNetRegressor
import pickle

In [None]:
from sklearn.preprocessing import LabelEncoder

In [None]:
import seaborn as sns

In [None]:
/content/drive/MyDrive/Colab Notebooks/vehicle_data/casim_output.xlsx

In [None]:
carsim = pd.read_excel('/content/drive/MyDrive/Colab Notebooks/vehicle_data/casim_output.xlsx')

In [None]:
matlab = pd.read_excel('/content/drive/MyDrive/Colab Notebooks/vehicle_data/matlab_output_compare_14dof - 최종본.xlsx')

In [None]:
common_rows = pd.merge(carsim[['car_name']], matlab[['car_name']], on='car_name')['car_name']
carsim_common = carsim[carsim['car_name'].isin(common_rows)]
matlab_common = matlab[matlab['car_name'].isin(common_rows)]

In [None]:
carsim_common['SSG'].describe()

In [None]:
matlab_common['SSG'].describe()

In [None]:
sns.scatterplot(carsim_common['SSG'])
sns.scatterplot(matlab_common['SSG'])

In [None]:
sns.scatterplot(carsim_common[carsim_common['car_name'] == 'AVANTE']['SSG'] - matlab_common[matlab_common['car_name'] == 'AVANTE']['SSG'])

In [None]:
sns.scatterplot(carsim_common[carsim_common['car_name'] == 'X5']['SSG'] - matlab_common[matlab_common['car_name'] == 'X5']['SSG'])

In [None]:
 sns.scatterplot(carsim_common[carsim_common['car_name'] == 'GRANDUER']['SSG'] - matlab_common[matlab_common['car_name'] == 'GRANDUER']['SSG'])

In [None]:
carsim_common['car_name'].unique()

In [None]:
carsim_test = carsim[(carsim['vehicle_sprung_mass'] >= 1500) & (carsim['vehicle_sprung_mass'] < 1600)]
carsim_train = carsim[~((carsim['vehicle_sprung_mass'] >= 1500) & (carsim['vehicle_sprung_mass'] <= 1600))]

carsim_x_train = carsim_train.iloc[:, 3:]
carsim_y_train = carsim_train.iloc[:, :3]
carsim_x_test = carsim_test.iloc[:, 3:]
carsim_y_test = carsim_test.iloc[:, :3]

In [None]:
750, 1110, 1270, 1370, 1590

In [None]:
carsim750 = carsim[(carsim['car_name'] == 'A_HATCHBACK')].iloc[0, :]

In [None]:
carsim1110 = carsim[(carsim['car_name'] == 'B_HATCHBACK')].iloc[1, :]
carsim1270 = carsim[(carsim['car_name'] == 'C_HATCHBACK')].iloc[3, :]
carsim1370 = carsim[(carsim['car_name'] == 'D_SEDAN')].iloc[4, :]
carsim1590 = carsim[(carsim['car_name'] == 'E_SUV')].iloc[12, :]

In [None]:
carsim_testset = pd.concat([carsim750, carsim1110, carsim1270, carsim1370, carsim1590], axis = 1).T

In [None]:
carsim_trainset = carsim.drop(carsim_testset.index)

In [None]:
carsim.shape, matlab.shape

In [None]:

class VehiclePerformancePredictor:
    def __init__(self, X, y):
        self.X = X
        self.y = y
        self.scaler = StandardScaler()
        self.models = {
            'RandomForest': RandomForestRegressor(n_estimators=100, random_state=42),
            'XGBoost': xgb.XGBRegressor(objective='reg:squarederror', random_state=42),
            'LightGBM': lgb.LGBMRegressor(random_state=42),
            'SVM': SVR()
        }
        self.results = {}
        self.predictions = {}  # 예측값을 저장할 딕셔너리
        self.y_test_values = {}  # 실제 y_test 값을 저장할 딕셔너리

    def preprocess_data(self):
        # Split the data
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(
            self.X, self.y, test_size=0.2, random_state=42)

        # Standardize the data
        self.X_train = self.scaler.fit_transform(self.X_train)
        self.X_test = self.scaler.transform(self.X_test)

    def train_models(self):
        # Train machine learning models
        for target in self.y.columns:
            y_train_target = self.y_train[target]
            y_test_target = self.y_test[target]

            self.y_test_values[target] = y_test_target  # 실제 y_test 값 저장

            for model_name, model in self.models.items():
                model.fit(self.X_train, y_train_target)
                y_pred = model.predict(self.X_test)

                # Evaluate the model
                r2 = r2_score(y_test_target, y_pred)
                mse = mean_squared_error(y_test_target, y_pred)
                mae = mean_absolute_error(y_test_target, y_pred)

                # Save the model
                with open(f'{model_name}_{target}_model.pkl', 'wb') as file:
                    pickle.dump(model, file)

                # Save the results
                self.results[f'{model_name}_{target}'] = {
                    'R2': r2,
                    'MSE': mse,
                    'MAE': mae
                }

                # Save predictions
                self.predictions[f'{model_name}_{target}'] = y_pred  # 예측값 저장

        # Train Stacking model
        estimators = [
            ('rf', self.models['RandomForest']),
            ('xgb', self.models['XGBoost']),
            ('lgbm', self.models['LightGBM']),
            ('svm', self.models['SVM'])
        ]
        stacking_model = StackingRegressor(estimators=estimators, final_estimator=LinearRegression())

        for target in self.y.columns:
            y_train_target = self.y_train[target]
            y_test_target = self.y_test[target]

            stacking_model.fit(self.X_train, y_train_target)
            y_pred = stacking_model.predict(self.X_test)

            # Evaluate the model
            r2 = r2_score(y_test_target, y_pred)
            mse = mean_squared_error(y_test_target, y_pred)
            mae = mean_absolute_error(y_test_target, y_pred)

            # Save the model
            with open(f'Stacking_{target}_model.pkl', 'wb') as file:
                pickle.dump(stacking_model, file)

            # Save the results
            self.results[f'Stacking_{target}'] = {
                'R2': r2,
                'MSE': mse,
                'MAE': mae
            }

            # Save predictions
            self.predictions[f'Stacking_{target}'] = y_pred  # 예측값 저장

    def train_tabnet(self):
        # Train TabNet model
        tabnet_model = TabNetRegressor()
        tabnet_model.fit(
            X_train=self.X_train, y_train=self.y_train.values,
            eval_set=[(self.X_test, self.y_test.values)],
            patience=10, max_epochs=100,
            eval_metric=['rmse'],
            batch_size=16, virtual_batch_size=8
        )

        # Save the TabNet model
        with open('TabNet_model.pkl', 'wb') as file:
            pickle.dump(tabnet_model, file)

        # Predict and evaluate for each target
        y_pred = tabnet_model.predict(self.X_test)

        for i, target in enumerate(self.y.columns):
            r2 = r2_score(self.y_test[target], y_pred[:, i])
            mse = mean_squared_error(self.y_test[target], y_pred[:, i])
            mae = mean_absolute_error(self.y_test[target], y_pred[:, i])

            # Save the results
            self.results[f'TabNet_{target}'] = {
                'R2': r2,
                'MSE': mse,
                'MAE': mae
            }

            # Save predictions
            self.predictions[f'TabNet_{target}'] = y_pred[:, i]  # 예측값 저장

    def get_results(self):
        # Convert results to DataFrame
        results_df = pd.DataFrame(self.results).T
        return results_df

    def get_predictions(self):
        # 실제 값과 예측 값을 DataFrame으로 반환
        predictions_df = {}
        for target in self.y.columns:
            predictions_df[target] = pd.DataFrame({
                'y_test': self.y_test_values[target],
                'Predicted': self.predictions.get(f'TabNet_{target}', 'N/A')
            })
        return predictions_df

    def get_best_model_predictions(self):
        # 각 target에 대해 가장 높은 R2 성능을 보이는 모델과 그 예측값을 반환
        best_model_predictions = {}
        for target in self.y.columns:
            best_r2 = -float('inf')
            best_model = None

            # 각 모델의 성능을 확인하여 가장 높은 R2를 보이는 모델을 선택
            for model_name in ['RandomForest', 'XGBoost', 'LightGBM', 'SVM', 'Stacking', 'TabNet']:
                model_key = f'{model_name}_{target}'
                if model_key in self.results:
                    r2 = self.results[model_key]['R2']
                    if r2 > best_r2:
                        best_r2 = r2
                        best_model = model_key

            # 가장 높은 R2를 보인 모델의 예측값 저장
            if best_model:
                best_model_predictions[target] = pd.DataFrame({
                    'y_test': self.y_test_values[target],
                    'Predicted': self.predictions[best_model],
                    'Best_Model': best_model
                })

        return best_model_predictions


In [None]:
class VehiclePerformancePredictor:
    def __init__(self, X, y):
        self.X = X
        self.y = y
        self.scaler = StandardScaler()
        self.models = {
            'RandomForest': RandomForestRegressor(n_estimators=100, random_state=42),
            'XGBoost': xgb.XGBRegressor(objective='reg:squarederror', random_state=42),
            'LightGBM': lgb.LGBMRegressor(random_state=42),
            'SVM': SVR()
        }
        self.results = {}
        self.predictions = {}  # 예측값을 저장할 딕셔너리
        self.y_test_values = {}  # 실제 y_test 값을 저장할 딕셔너리

    def preprocess_data(self):
        # Split the data
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(
            self.X, self.y, test_size=0.2, random_state=42)

        # Standardize the data
        self.X_train = self.scaler.fit_transform(self.X_train)
        self.X_test = self.scaler.transform(self.X_test)

    def train_models(self):
        # Train machine learning models
        for target in self.y.columns:
            y_train_target = self.y_train[target]
            y_test_target = self.y_test[target]

            self.y_test_values[target] = y_test_target  # 실제 y_test 값 저장

            for model_name, model in self.models.items():
                model.fit(self.X_train, y_train_target)
                y_pred = model.predict(self.X_test)

                # Evaluate the model
                r2 = r2_score(y_test_target, y_pred)
                mse = mean_squared_error(y_test_target, y_pred)
                mae = mean_absolute_error(y_test_target, y_pred)

                # Save the model
                with open(f'{model_name}_{target}_model.pkl', 'wb') as file:
                    pickle.dump(model, file)

                # Save the results
                self.results[f'{model_name}_{target}'] = {
                    'R2': r2,
                    'MSE': mse,
                    'MAE': mae
                }

                # Save predictions
                self.predictions[f'{model_name}_{target}'] = y_pred  # 예측값 저장

        # Train Stacking model
        estimators = [
            ('rf', self.models['RandomForest']),
            ('xgb', self.models['XGBoost']),
            ('lgbm', self.models['LightGBM']),
            ('svm', self.models['SVM'])
        ]
        stacking_model = StackingRegressor(estimators=estimators, final_estimator=LinearRegression())

        for target in self.y.columns:
            y_train_target = self.y_train[target]
            y_test_target = self.y_test[target]

            stacking_model.fit(self.X_train, y_train_target)
            y_pred = stacking_model.predict(self.X_test)

            # Evaluate the model
            r2 = r2_score(y_test_target, y_pred)
            mse = mean_squared_error(y_test_target, y_pred)
            mae = mean_absolute_error(y_test_target, y_pred)

            # Save the model
            with open(f'Stacking_{target}_model.pkl', 'wb') as file:
                pickle.dump(stacking_model, file)

            # Save the results
            self.results[f'Stacking_{target}'] = {
                'R2': r2,
                'MSE': mse,
                'MAE': mae
            }

            # Save predictions
            self.predictions[f'Stacking_{target}'] = y_pred  # 예측값 저장

    def train_tabnet(self):
        # Train TabNet model
        tabnet_model = TabNetRegressor()
        tabnet_model.fit(
            X_train=self.X_train, y_train=self.y_train.values,
            eval_set=[(self.X_test, self.y_test.values)],
            patience=10, max_epochs=100,
            eval_metric=['rmse'],
            batch_size=16, virtual_batch_size=8
        )

        # Save the TabNet model
        with open('TabNet_model.pkl', 'wb') as file:
            pickle.dump(tabnet_model, file)

        # Predict and evaluate for each target
        y_pred = tabnet_model.predict(self.X_test)

        for i, target in enumerate(self.y.columns):
            r2 = r2_score(self.y_test[target], y_pred[:, i])
            mse = mean_squared_error(self.y_test[target], y_pred[:, i])
            mae = mean_absolute_error(self.y_test[target], y_pred[:, i])

            # Save the results
            self.results[f'TabNet_{target}'] = {
                'R2': r2,
                'MSE': mse,
                'MAE': mae
            }

            # Save predictions
            self.predictions[f'TabNet_{target}'] = y_pred[:, i]  # 예측값 저장

    def get_results(self):
        # Convert results to DataFrame
        results_df = pd.DataFrame(self.results).T
        return results_df

    def get_predictions(self):
        # 실제 값과 예측 값을 DataFrame으로 반환
        predictions_df = {}
        for target in self.y.columns:
            predictions_df[target] = pd.DataFrame({
                'y_test': self.y_test_values[target],
                'Predicted': self.predictions.get(f'TabNet_{target}', 'N/A')
            })
        return predictions_df

    def get_best_model_predictions(self):
        # 각 target에 대해 가장 높은 R2 성능을 보이는 모델과 그 예측값을 반환
        best_model_predictions = {}
        for target in self.y.columns:
            best_r2 = -float('inf')
            best_model = None

            # 각 모델의 성능을 확인하여 가장 높은 R2를 보이는 모델을 선택
            for model_name in ['RandomForest', 'XGBoost', 'LightGBM', 'SVM', 'Stacking', 'TabNet']:
                model_key = f'{model_name}_{target}'
                if model_key in self.results:
                    r2 = self.results[model_key]['R2']
                    if r2 > best_r2:
                        best_r2 = r2
                        best_model = model_key

            # 가장 높은 R2를 보인 모델의 예측값 저장
            if best_model:
                best_model_predictions[target] = pd.DataFrame({
                    'y_test': self.y_test_values[target],
                    'Predicted': self.predictions[best_model],
                    'Best_Model': best_model
                })

        return best_model_predictions

In [None]:
class VehiclePerformancePredictor:
    def __init__(self, X, y):
        self.X = X
        self.y = y
        self.scaler = StandardScaler()
        self.models = {
            'RandomForest': RandomForestRegressor(n_estimators=100, random_state=42),
            'XGBoost': xgb.XGBRegressor(objective='reg:squarederror', random_state=42),
            'LightGBM': lgb.LGBMRegressor(random_state=42),
            'SVM': SVR()
        }
        self.results = {}
        self.predictions = {}  # 예측값을 저장할 딕셔너리
        self.y_test_values = {}  # 실제 y_test 값을 저장할 딕셔너리

    def preprocess_data(self):

        self.X_train = carsim_x_train2
        self.X_test = carsim_x_test2
        self.y_train = carsim_y_train
        self.y_test = carsim_y_test
        # Split the data
#        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(
#            self.X, self.y, test_size=0.2, random_state=42)


        # Standardize the data
        self.X_train = self.scaler.fit_transform(self.X_train)
        self.X_test = self.scaler.transform(self.X_test)

    def train_models(self):
        # Train machine learning models
        for target in self.y.columns:
            y_train_target = self.y_train[target]
            y_test_target = self.y_test[target]

            self.y_test_values[target] = y_test_target  # 실제 y_test 값 저장

            for model_name, model in self.models.items():
                model.fit(self.X_train, y_train_target)
                y_pred = model.predict(self.X_test)

                # Evaluate the model
                r2 = r2_score(y_test_target, y_pred)
                mse = mean_squared_error(y_test_target, y_pred)
                mae = mean_absolute_error(y_test_target, y_pred)

                # Save the model
                with open(f'{model_name}_{target}_model.pkl', 'wb') as file:
                    pickle.dump(model, file)

                # Save the results
                self.results[f'{model_name}_{target}'] = {
                    'R2': r2,
                    'MSE': mse,
                    'MAE': mae
                }

                # Save predictions
                self.predictions[f'{model_name}_{target}'] = y_pred  # 예측값 저장

        # Train Stacking model
        estimators = [
            ('rf', self.models['RandomForest']),
            ('xgb', self.models['XGBoost']),
            ('lgbm', self.models['LightGBM']),
            ('svm', self.models['SVM'])
        ]
        stacking_model = StackingRegressor(estimators=estimators, final_estimator=LinearRegression())

        for target in self.y.columns:
            y_train_target = self.y_train[target]
            y_test_target = self.y_test[target]

            stacking_model.fit(self.X_train, y_train_target)
            y_pred = stacking_model.predict(self.X_test)

            # Evaluate the model
            r2 = r2_score(y_test_target, y_pred)
            mse = mean_squared_error(y_test_target, y_pred)
            mae = mean_absolute_error(y_test_target, y_pred)

            # Save the model
            with open(f'Stacking_{target}_model.pkl', 'wb') as file:
                pickle.dump(stacking_model, file)

            # Save the results
            self.results[f'Stacking_{target}'] = {
                'R2': r2,
                'MSE': mse,
                'MAE': mae
            }

            # Save predictions
            self.predictions[f'Stacking_{target}'] = y_pred  # 예측값 저장

    def train_tabnet(self):
        # Train TabNet model
        tabnet_model = TabNetRegressor()
        tabnet_model.fit(
            X_train=self.X_train, y_train=self.y_train.values,
            eval_set=[(self.X_test, self.y_test.values)],
            patience=10, max_epochs=100,
            eval_metric=['rmse'],
            batch_size=16, virtual_batch_size=8
        )

        # Save the TabNet model
        with open('TabNet_model.pkl', 'wb') as file:
            pickle.dump(tabnet_model, file)

        # Predict and evaluate for each target
        y_pred = tabnet_model.predict(self.X_test)

        for i, target in enumerate(self.y.columns):
            r2 = r2_score(self.y_test[target], y_pred[:, i])
            mse = mean_squared_error(self.y_test[target], y_pred[:, i])
            mae = mean_absolute_error(self.y_test[target], y_pred[:, i])

            # Save the results
            self.results[f'TabNet_{target}'] = {
                'R2': r2,
                'MSE': mse,
                'MAE': mae
            }

            # Save predictions
            self.predictions[f'TabNet_{target}'] = y_pred[:, i]  # 예측값 저장

    def get_results(self):
        # Convert results to DataFrame
        results_df = pd.DataFrame(self.results).T
        return results_df

    def get_predictions(self):
        # 실제 값과 예측 값을 DataFrame으로 반환
        predictions_df = {}
        for target in self.y.columns:
            predictions_df[target] = pd.DataFrame({
                'y_test': self.y_test_values[target],
                'Predicted': self.predictions.get(f'TabNet_{target}', 'N/A')
            })
        return predictions_df

    def get_best_model_predictions(self):
        # 각 target에 대해 가장 높은 R2 성능을 보이는 모델과 그 예측값을 반환
        best_model_predictions = {}
        for target in self.y.columns:
            best_r2 = -float('inf')
            best_model = None

            # 각 모델의 성능을 확인하여 가장 높은 R2를 보이는 모델을 선택
            for model_name in ['RandomForest', 'XGBoost', 'LightGBM', 'SVM', 'Stacking', 'TabNet']:
                model_key = f'{model_name}_{target}'
                if model_key in self.results:
                    r2 = self.results[model_key]['R2']
                    if r2 > best_r2:
                        best_r2 = r2
                        best_model = model_key

            # 가장 높은 R2를 보인 모델의 예측값 저장
            if best_model:
                best_model_predictions[target] = pd.DataFrame({
                    'y_test': self.y_test_values[target],
                    'Predicted': self.predictions[best_model],
                    'Best_Model': best_model
                })

        return best_model_predictions

In [None]:
import os
os.getcwd()

In [None]:
class VehiclePerformancePredictor:
    def __init__(self, X_new):
        self.X_new = X_new
        self.scaler = StandardScaler()
        self.models = ['RandomForest', 'XGBoost', 'LightGBM', 'SVM', 'Stacking', 'TabNet']
        self.loaded_models = {}  # 로드된 모델을 저장할 딕셔너리
        self.predictions = {}  # 예측값을 저장할 딕셔너리

    def load_models(self, target_columns):
        # 저장된 모델 파일을 로드하는 메서드
        for model_name in self.models:
            for target in target_columns:
                try:
                    with open(f'{model_name}_{target}_model.pkl', 'rb') as file:
                        self.loaded_models[f'{model_name}_{target}'] = pickle.load(file)
                except FileNotFoundError:
                    print(f"{model_name}_{target} 모델 파일을 찾을 수 없습니다.")

    def preprocess_data(self):
        # 새로운 데이터를 스케일링 처리
        self.X_new = self.scaler.fit_transform(self.X_new)

    def predict_new_data(self, target_columns):
        # 새로운 데이터에 대해 각 모델의 예측 수행
        for target in target_columns:
            self.predictions[target] = {}
            for model_name in self.models:
                model_key = f'{model_name}_{target}'
                if model_key in self.loaded_models:
                    model = self.loaded_models[model_key]
                    y_pred = model.predict(self.X_new)
                    self.predictions[target][model_name] = y_pred
                else:
                    self.predictions[target][model_name] = '모델이 로드되지 않았습니다.'
        return self.predictions

    def get_best_predictions(self, results):
        # 가장 성능이 좋은 모델의 예측을 선택하는 함수
        best_predictions = {}
        for target, models in self.predictions.items():
            best_model = max(results[target], key=lambda x: results[target][x]['R2'])
            best_predictions[target] = models[best_model]
        return best_predictions

# 새로운 데이터 예측을 위한 사용 예시
def predict_new_vehicle_performance(X_new, target_columns, results):
    predictor = VehiclePerformancePredictor(X_new)

    # 저장된 모델 로드
    predictor.load_models(target_columns)

    # 데이터 전처리
    predictor.preprocess_data()

    # 새로운 데이터에 대한 예측 수행
    predictions = predictor.predict_new_data(target_columns)

    # 가장 성능이 좋은 모델의 예측값을 선택
    best_predictions = predictor.get_best_predictions(results)

    return best_predictions

# 새로운 데이터 예시
X_new = matlab2

# target 변수 명 (이전에 사용한 y의 column 명)
target_columns = ['USG', 'SSG', 'MaxAy']

# 기존의 학습된 결과 (성능 평가 결과)
results = {
    'USG': {
        'RandomForest': {'R2': 0.85},
        'XGBoost': {'R2': 0.88},
        'LightGBM': {'R2': 0.87},
        'SVM': {'R2': 0.80},
        'Stacking': {'R2': 0.90},
        'TabNet': {'R2': 0.89}
    },
    'SSG': {
        'RandomForest': {'R2': 0.82},
        'XGBoost': {'R2': 0.84},
        'LightGBM': {'R2': 0.83},
        'SVM': {'R2': 0.79},
        'Stacking': {'R2': 0.86},
        'TabNet': {'R2': 0.88}
    },
    'MaxAy': {
        'RandomForest': {'R2': 0.82},
        'XGBoost': {'R2': 0.84},
        'LightGBM': {'R2': 0.83},
        'SVM': {'R2': 0.79},
        'Stacking': {'R2': 0.86},
        'TabNet': {'R2': 0.88}
    }
}

# 새로운 데이터에 대해 예측 수행
best_predictions = predict_new_vehicle_performance(X_new, target_columns, results)

print(best_predictions)

In [None]:
mat_x = matlab2
mat_y = matlab.loc[:, ['USG', 'SSG', 'MaxAy']]

In [None]:
import joblib

In [None]:
loaded_model = joblib.load('./LightGBM_USG_model.pkl')

In [None]:
full_mat = pd.concat([mat_y, mat_x], axis = 1)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(mat_x, mat_y, test_size = 0.4, random_state = 42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
loaded_model.fit(X_train, y_train.iloc[:, 0])

In [None]:
lgbm_y_usg_pred = loaded_model.predict(X_test)

In [None]:
y_test.shape, lgbm_y_usg_pred.shape

In [None]:
r2_score(lgbm_y_usg_pred, y_test.iloc[:, 0])

In [None]:
print(mean_squared_error(lgbm_y_usg_pred, y_test.iloc[:, 0]))
print(mean_absolute_error(lgbm_y_usg_pred, y_test.iloc[:, 0]))

In [None]:
carsim_test = carsim[(carsim['vehicle_sprung_mass'] >= 1500) & (carsim['vehicle_sprung_mass'] < 1600)]
carsim_train = carsim[~((carsim['vehicle_sprung_mass'] >= 1500) & (carsim['vehicle_sprung_mass'] <= 1600))]

carsim_x_train = carsim_train.iloc[:, 3:]
carsim_y_train = carsim_train.iloc[:, :3]
carsim_x_test = carsim_test.iloc[:, 3:]
carsim_y_test = carsim_test.iloc[:, :3]

In [None]:
carsim_x_train = carsim_trainset.iloc[:, 3:]
carsim_y_train = carsim_trainset.iloc[:, :3]
carsim_x_test = carsim_testset.iloc[:, 3:]
carsim_y_test = carsim_testset.iloc[:, :3]

In [None]:
le = LabelEncoder()
carsim_x_train['car_name_code'] = le.fit_transform(carsim_x_train['car_name'])
carsim_x_train['car_cat_code'] = le.fit_transform(carsim_x_train['car_cat'])
carsim_x_train['car_size_code'] = le.fit_transform(carsim_x_train['car_size'])
carsim_x_train['tire_cat_code'] = le.fit_transform(carsim_x_train['tire_cat'])

carsim_x_test['car_name_code'] = le.fit_transform(carsim_x_test['car_name'])
carsim_x_test['car_cat_code'] = le.fit_transform(carsim_x_test['car_cat'])
carsim_x_test['car_size_code'] = le.fit_transform(carsim_x_test['car_size'])
carsim_x_test['tire_cat_code'] = le.fit_transform(carsim_x_test['tire_cat'])

In [None]:
le = LabelEncoder()
carsim_x_train['car_name_code'] = le.fit_transform(carsim_x_train['car_name'])
carsim_x_train['car_cat_code'] = le.fit_transform(carsim_x_train['car_cat'])
carsim_x_train['car_size_code'] = le.fit_transform(carsim_x_train['car_size'])
carsim_x_train['tire_cat_code'] = le.fit_transform(carsim_x_train['tire_cat'])

carsim_x_test['car_name_code'] = le.fit_transform(carsim_x_test['car_name'])
carsim_x_test['car_cat_code'] = le.fit_transform(carsim_x_test['car_cat'])
carsim_x_test['car_size_code'] = le.fit_transform(carsim_x_test['car_size'])
carsim_x_test['tire_cat_code'] = le.fit_transform(carsim_x_test['tire_cat'])

In [None]:
matlab['car_name_code'] = le.fit_transform(matlab['car_name'])
matlab['car_cat_code'] = le.fit_transform(matlab['car_cat'])
matlab['car_size_code'] = le.fit_transform(matlab['car_size'])
matlab['tire_cat_code'] = le.fit_transform(matlab['tire_cat'])

In [None]:
carsim2 = carsim.loc[:, "tire_number":]

In [None]:
matlab2 = matlab.loc[:, "tire_number":]

In [None]:
carsim_x_train2 = carsim_x_train.loc[:, "tire_number":]
carsim_x_test2 = carsim_x_test.loc[:, "tire_number":]

In [None]:
X = carsim2
y = carsim[['USG', 'SSG', 'MaxAy']]

# 클래스 사용
predictor = VehiclePerformancePredictor(X, y)
predictor.preprocess_data()
predictor.train_models()
#predictor.train_tabnet()
results_df = predictor.get_results()
print(results_df)

In [None]:
# 가장 성능이 좋은 모델의 예측값 확인
best_predictions = predictor.get_best_model_predictions()

# 예측값 출력
for target, df in best_predictions.items():
    print(f"Best model predictions for {target}:")
    print(df)


In [None]:
matlab[['USG', 'SSG']].loc[549]

In [None]:
carsim.loc[609]

In [None]:
matlab

In [None]:
sns.scatterplot(x = carsim_test['vehicle_sprung_mass'], y = best_predictions['USG']['Predicted'])
sns.scatterplot(x = carsim_test['vehicle_sprung_mass'], y = best_predictions['USG']['y_test'])


In [None]:
sns.scatterplot(x = carsim_test['vehicle_sprung_mass'], y = best_predictions['SSG']['Predicted'])
sns.scatterplot(x = carsim_test['vehicle_sprung_mass'], y = best_predictions['SSG']['y_test'])

In [None]:
sns.scatterplot(x = best_predictions['USG']['y_test'], y = best_predictions['USG']['Predicted'])

In [None]:
sns.scatterplot(x = best_predictions['SSG']['y_test'], y = best_predictions['SSG']['Predicted'])

In [None]:
sns.scatterplot(x = best_predictions['MaxAy']['y_test'], y = best_predictions['MaxAy']['Predicted'])

In [None]:
usg_best = best_predictions['USG'][['y_test', 'Predicted']]
ssg_best = best_predictions['SSG'][['y_test', 'Predicted']]
maxay_best = best_predictions['MaxAy'][['y_test', 'Predicted']]

In [None]:
sns.scatterplot(x = usg_best['y_test'], y = usg_best['Predicted'])

In [None]:
sns.scatterplot(x = ssg_best['y_test'], y = ssg_best['Predicted'])

In [None]:
sns.scatterplot(x = maxay_best['y_test'], y = maxay_best['Predicted'])

In [None]:
results_df

In [None]:
from sklearn.ensemble import RandomForestRegressor
import xgboost as xgb
import lightgbm as lgb
from sklearn.svm import SVR

# 모델 초기화
models = {
    'RandomForest': RandomForestRegressor(n_estimators=100, random_state=42),
    'XGBoost': xgb.XGBRegressor(objective='reg:squarederror', random_state=42),
    'LightGBM': lgb.LGBMRegressor(random_state=42),
    'SVM': SVR()
}

# 타겟 변수에 대해 각 모델 학습 및 평가
results = {}

for target in y.columns:
    y_train_target = y_train[target]
    y_test_target = y_test[target]

    for model_name, model in models.items():
        model.fit(X_train, y_train_target)
        y_pred = model.predict(X_test)

        # 성능 평가
        r2 = r2_score(y_test_target, y_pred)
        mse = mean_squared_error(y_test_target, y_pred)
        mae = mean_absolute_error(y_test_target, y_pred)

        # 결과 저장
        results[f'{model_name}_{target}'] = {
            'R2': r2,
            'MSE': mse,
            'MAE': mae
        }

# 결과 출력
for key, metrics in results.items():
    print(f"Model: {key}")
    print(f"R-squared: {metrics['R2']}")
    print(f"MSE: {metrics['MSE']}")
    print(f"MAE: {metrics['MAE']}")
    print("-" * 30)


In [None]:
from sklearn.ensemble import StackingRegressor
from sklearn.linear_model import LinearRegression

# Stacking 모델 정의
estimators = [
    ('rf', RandomForestRegressor(n_estimators=100, random_state=42)),
    ('xgb', xgb.XGBRegressor(objective='reg:squarederror', random_state=42)),
    ('lgbm', lgb.LGBMRegressor(random_state=42)),
    ('svm', SVR())
]

stacking_model = StackingRegressor(estimators=estimators, final_estimator=LinearRegression())

# 타겟 변수에 대해 Stacking 모델 학습 및 평가
for target in y.columns:
    y_train_target = y_train[target]
    y_test_target = y_test[target]

    stacking_model.fit(X_train, y_train_target)
    y_pred = stacking_model.predict(X_test)

    # 성능 평가
    r2 = r2_score(y_test_target, y_pred)
    mse = mean_squared_error(y_test_target, y_pred)
    mae = mean_absolute_error(y_test_target, y_pred)

    # 결과 저장
    results[f'Stacking_{target}'] = {
        'R2': r2,
        'MSE': mse,
        'MAE': mae
    }

# 결과 출력
for key, metrics in results.items():
    print(f"Model: {key}")
    print(f"R-squared: {metrics['R2']}")
    print(f"MSE: {metrics['MSE']}")
    print(f"MAE: {metrics['MAE']}")
    print("-" * 30)


In [None]:
import pickle
import xgboost as xgb
import lightgbm as lgb

# 모델 초기화 (기존 코드에서 학습된 모델들이 있다고 가정)
models = {
    'RandomForest': RandomForestRegressor(n_estimators=100, random_state=42),
    'XGBoost': xgb.XGBRegressor(objective='reg:squarederror', random_state=42),
    'LightGBM': lgb.LGBMRegressor(random_state=42),
    'SVM': SVR()
}

# 학습된 모델들 저장
for model_name, model in models.items():
    with open(f'{model_name}_model.pkl', 'wb') as file:
        pickle.dump(model, file)


In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler

# 모델 불러오기
loaded_models = {}
for model_name in models.keys():
    with open(f'{model_name}_model.pkl', 'rb') as file:
        loaded_models[model_name] = pickle.load(file)

# 새로운 데이터셋 (예시 데이터로 대체 가능)
new_data = {
    'vehicle_weight': [1550, 1650],
    'engine_power': [120, 180],
    'tire_width': [210, 220],
    'tire_diameter': [16, 17]
}
new_df = pd.DataFrame(new_data)

# 데이터 표준화 (기존 학습 시 사용한 스케일러 사용)
scaler = StandardScaler()
new_X = scaler.fit_transform(new_df)

# 불러온 모델들을 사용하여 예측 수행
predictions = {}
for model_name, model in loaded_models.items():
    predictions[model_name] = model.predict(new_X)

# 결과 출력
for model_name, prediction in predictions.items():
    print(f"Predictions using {model_name}: {prediction}")


In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

# 랜덤 포레스트 모델 생성 및 학습
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# 예측 및 성능 평가
y_pred_rf = rf_model.predict(X_test)
mse_rf = mean_squared_error(y_test, y_pred_rf)
print(f"Random Forest MSE: {mse_rf}")


In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# 딥러닝 모델 생성
model = Sequential([
    Dense(64, input_dim=X_train.shape[1], activation='relu'),
    Dense(32, activation='relu'),
    Dense(1)  # 출력층 (성능 예측)
])

# 모델 컴파일
model.compile(optimizer='adam', loss='mean_squared_error')

# 모델 학습
model.fit(X_train, y_train, epochs=50, batch_size=10, validation_split=0.2)

# 예측 및 성능 평가
y_pred_dl = model.predict(X_test)
mse_dl = mean_squared_error(y_test, y_pred_dl)
print(f"Deep Learning MSE: {mse_dl}")


In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# 예시 데이터프레임 생성
data = {
    'vehicle_weight': [1500, 1600, 1700, 1800, 1900],
    'engine_power': [100, 150, 200, 250, 300],
    'tire_width': [205, 215, 225, 235, 245],
    'tire_diameter': [15, 16, 17, 18, 19],
    'performance_1': [10, 20, 30, 40, 50],  # target variable 1
    'performance_2': [20, 25, 35, 45, 55],  # target variable 2
    'performance_3': [30, 35, 40, 50, 60]   # target variable 3
}
df = pd.DataFrame(data)

# 입력 변수와 타겟 변수 분리
X = df.drop(['performance_1', 'performance_2', 'performance_3'], axis=1)
y = df[['performance_1', 'performance_2', 'performance_3']]

# 학습 데이터와 테스트 데이터로 분리
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 데이터 표준화
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [None]:
from pytorch_tabnet.tab_model import TabNetRegressor

# TabNet 모델 생성
tabnet_model = TabNetRegressor()

# 모델 학습
tabnet_model.fit(
    X_train=X_train, y_train=y_train.values,
    eval_set=[(X_test, y_test.values)],
    patience=10, max_epochs=100,
    eval_metric=['rmse'],
    batch_size=32, virtual_batch_size=16
)

# 예측 수행
y_pred = tabnet_model.predict(X_test)

# 성능 평가
mse = mean_squared_error(y_test, y_pred, multioutput='raw_values')
mae = mean_absolute_error(y_test, y_pred, multioutput='raw_values')
r2 = r2_score(y_test, y_pred, multioutput='raw_values')

print(f"TabNet Model - MSE per target: {mse}")
print(f"TabNet Model - MAE per target: {mae}")
print(f"TabNet Model - R2 per target: {r2}")
