In [1]:
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

# Generate synthetic data
X, y = make_regression(n_samples=1000, n_features=20, noise=0.1, random_state=42)

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a Random Forest Regressor model and train it
rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

# Make predictions and evaluate the model
y_pred = rf.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")

Mean Squared Error: 7055.507694741972


In [2]:
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
import numpy as np  # 추가: numpy for square root

# Generate synthetic data
X, y = make_regression(n_samples=1000, n_features=20, noise=0.1, random_state=42)

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a Random Forest Regressor model and train it
rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

# Make predictions and evaluate the model
y_pred = rf.predict(X_test)
mse = mean_squared_error(y_test, y_pred)

# Calculate RMSE from MSE
rmse = np.sqrt(mse)  # 추가: square root of MSE
print(f"Root Mean Squared Error: {rmse}")

Root Mean Squared Error: 83.99706956044344


In [3]:
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error  # 변경: mean_absolute_error

# Generate synthetic data
X, y = make_regression(n_samples=1000, n_features=20, noise=0.1, random_state=42)

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a Random Forest Regressor model and train it
rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

# Make predictions and evaluate the model
y_pred = rf.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)  # 변경: MAE 계산

print(f"Mean Absolute Error: {mae}")

Mean Absolute Error: 66.91350815502615


In [4]:
# Dataset 예시
import pandas as pd

# NumPy 배열을 Pandas DataFrame으로 변환
X_df = pd.DataFrame(X, columns=[f'Feature_{i}' for i in range(X.shape[1])])
y_df = pd.DataFrame(y, columns=['Target'])

# DataFrame의 처음 5개 행 확인
print("First 5 rows of X DataFrame:")
print(X_df.head())

print("\nFirst 5 rows of y DataFrame:")
print(y_df.head())

First 5 rows of X DataFrame:
   Feature_0  Feature_1  Feature_2  Feature_3  Feature_4  Feature_5  \
0   0.225842   1.551378  -0.107347   0.859695  -0.942963  -1.096625   
1   0.110836  -1.454615   0.263888  -1.654510   0.818549   0.482849   
2   0.458600  -0.081280  -0.698474   0.737528   0.860085   0.275249   
3  -1.795643  -0.453414  -0.423760   0.155325   0.487775   0.398147   
4  -1.180626   0.339530   0.328010  -0.224555   0.963951  -1.058450   

   Feature_6  Feature_7  Feature_8  Feature_9  Feature_10  Feature_11  \
0  -1.197167  -1.733767  -0.950042   1.274060    1.672990    1.477373   
1   0.358334  -0.061366   0.327626   1.270846   -2.128649   -0.546838   
2   0.333945  -1.076817   0.947821  -0.181406   -0.379908    0.379610   
3   0.732829   2.042178   0.733548   1.598322   -1.274232   -0.734233   
4   0.941256   0.968745  -0.002686   0.042907   -0.589144   -1.049914   

   Feature_12  Feature_13  Feature_14  Feature_15  Feature_16  Feature_17  \
0    0.335126   -0.621135   

In [5]:
from sklearn.datasets import make_regression
import numpy as np

# 데이터셋 생성
X, y = make_regression(n_samples=1000, n_features=20, noise=0.1, random_state=42)

# X (특성 데이터)의 처음 5개 샘플 확인
print("First 5 samples of X:")
print(X[:5])

# y (타겟 데이터)의 처음 5개 값 확인
print("\nFirst 5 values of y:")
print(y[:5])

First 5 samples of X:
[[ 0.22584183  1.55137772 -0.10734682  0.85969505 -0.94296337 -1.09662504
  -1.19716659 -1.73376709 -0.95004188  1.27405964  1.6729897   1.47737256
   0.33512613 -0.6211348  -1.52510564  1.01109701  1.47818394 -1.35154745
   0.3646018   0.13316649]
 [ 0.11083638 -1.45461475  0.26388849 -1.65451039  0.81854872  0.48284875
   0.35833407 -0.06136604  0.32762622  1.27084562 -2.12864912 -0.54683846
   2.67962869 -1.85158683  0.8720902  -0.98785807  1.74449555 -1.42646391
  -0.60451386 -0.16624207]
 [ 0.45860045 -0.08127955 -0.69847376  0.73752806  0.86008507  0.27524869
   0.33394505 -1.07681697  0.94782078 -0.18140603 -0.37990797  0.37961006
   1.50808285  0.55537966 -1.38301781 -0.83618711  0.60673     0.90822252
   0.51141526 -2.09317802]
 [-1.79564317 -0.45341411 -0.42375968  0.155325    0.48777482  0.39814727
   0.73282908  2.04217791  0.73354781  1.59832181 -1.27423212 -0.73423332
   0.07660907  1.04848265  0.00878365 -0.14152964 -2.10474951 -0.33009019
  -0.5117